You can use Galtea to log and evaluate real user interactions from your production environment. This helps you monitor your product’s performance over time.
Single-Turn Production Monitoring
For simple, single-turn interactions, you can send the details to Galtea using create_single_turn
with is_production=True
.
from galtea import Galtea
import os
galtea = Galtea(api_key=os.getenv("GALTEA_API_KEY"))
VERSION_ID = "YOUR_ACTIVE_VERSION_ID"
# In your application's request handler...
def handle_user_query(user_query, retrieval_context=None):
# Your logic to get a response from your model
model_response = your_product_function(user_query, retrieval_context)
# Log and evaluate the interaction in Galtea
galtea.evaluation_tasks.create_single_turn(
version_id=VERSION_ID,
is_production=True,
metrics=["coherence", "answer-relevancy", "faithfulness"],
input=user_query,
actual_output=model_response,
retrieval_context=retrieval_context
)
return model_response
Multi-Turn Production Monitoring (Conversations)
For multi-turn conversations, use the session-based workflow to log the entire interaction first and then evaluate it.
from galtea import Galtea
import os
galtea = Galtea(api_key=os.getenv("GALTEA_API_KEY"))
VERSION_ID = "YOUR_ACTIVE_VERSION_ID"
METRICS_TO_EVALUATE = ["conversation-relevancy", "knowledge-retention"]
# 1. Create a session at the start of the conversation
# Use is_production=True for real user interactions
session = galtea.sessions.create(
version_id=VERSION_ID,
is_production=True
)
# 2. Await for user interactions and save each turn or upload them as they happen
# This would happen dynamically in your application, but for this example, we'll simulate a conversation
conversation_turns = [
{"input": "What's the weather like in Paris?", "output": "It's sunny and 22°C in Paris."},
{"input": "Is it expected to rain later?", "output": "No, the forecast shows clear skies for the rest of the day."},
{"input": "Great, thanks!", "output": "You're welcome!"}
]
# 3a. Log all turns as inference results
# This could be done just after each response of the AI model instead of using a for loop
for turn in conversation_turns:
# Log each turn as an inference result
galtea.inference_results.create(
session_id=session.id,
input=turn["input"],
output=turn["output"]
)
# 3b. If you have all turns ready, you can also create them in batch
# galtea.inference_results.create_batch(
# session_id=session.id,
# conversation_turns=conversation_turns
# )
# 4. Evaluate the entire session once the conversation has finished
galtea.evaluation_tasks.create(
session_id=session.id,
metrics=METRICS_TO_EVALUATE
)
print(f"Logged and evaluated production session {session.id}")