Create single-turn evaluations

curl --request POST \
  --url https://api.galtea.ai/evaluations/singleTurn \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "metrics": [
    {
      "id": "id_123",
      "name": "Example Name",
      "score": 0.95
    }
  ],
  "versionId": "ver_123",
  "actualOutput": "Model response text",
  "testCaseId": "tc_123",
  "input": "What is the capital of France?",
  "retrievalContext": "Retrieved context document",
  "isProduction": true
}
'

import requests

url = "https://api.galtea.ai/evaluations/singleTurn"

payload = {
    "metrics": [
        {
            "id": "id_123",
            "name": "Example Name",
            "score": 0.95
        }
    ],
    "versionId": "ver_123",
    "actualOutput": "Model response text",
    "testCaseId": "tc_123",
    "input": "What is the capital of France?",
    "retrievalContext": "Retrieved context document",
    "isProduction": True
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    metrics: [{id: 'id_123', name: 'Example Name', score: 0.95}],
    versionId: 'ver_123',
    actualOutput: 'Model response text',
    testCaseId: 'tc_123',
    input: 'What is the capital of France?',
    retrievalContext: 'Retrieved context document',
    isProduction: true
  })
};

fetch('https://api.galtea.ai/evaluations/singleTurn', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.galtea.ai/evaluations/singleTurn",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'metrics' => [
        [
                'id' => 'id_123',
                'name' => 'Example Name',
                'score' => 0.95
        ]
    ],
    'versionId' => 'ver_123',
    'actualOutput' => 'Model response text',
    'testCaseId' => 'tc_123',
    'input' => 'What is the capital of France?',
    'retrievalContext' => 'Retrieved context document',
    'isProduction' => true
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.galtea.ai/evaluations/singleTurn"

	payload := strings.NewReader("{\n  \"metrics\": [\n    {\n      \"id\": \"id_123\",\n      \"name\": \"Example Name\",\n      \"score\": 0.95\n    }\n  ],\n  \"versionId\": \"ver_123\",\n  \"actualOutput\": \"Model response text\",\n  \"testCaseId\": \"tc_123\",\n  \"input\": \"What is the capital of France?\",\n  \"retrievalContext\": \"Retrieved context document\",\n  \"isProduction\": true\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.galtea.ai/evaluations/singleTurn")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"metrics\": [\n    {\n      \"id\": \"id_123\",\n      \"name\": \"Example Name\",\n      \"score\": 0.95\n    }\n  ],\n  \"versionId\": \"ver_123\",\n  \"actualOutput\": \"Model response text\",\n  \"testCaseId\": \"tc_123\",\n  \"input\": \"What is the capital of France?\",\n  \"retrievalContext\": \"Retrieved context document\",\n  \"isProduction\": true\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.galtea.ai/evaluations/singleTurn")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"metrics\": [\n    {\n      \"id\": \"id_123\",\n      \"name\": \"Example Name\",\n      \"score\": 0.95\n    }\n  ],\n  \"versionId\": \"ver_123\",\n  \"actualOutput\": \"Model response text\",\n  \"testCaseId\": \"tc_123\",\n  \"input\": \"What is the capital of France?\",\n  \"retrievalContext\": \"Retrieved context document\",\n  \"isProduction\": true\n}"

response = http.request(request)
puts response.read_body

[
  {
    "id": "eval_123",
    "metricId": "metric_123",
    "sessionId": "session_123",
    "userId": "user_123",
    "status": "SUCCESS",
    "testCaseId": "tc_123",
    "inferenceResultId": "ir_123",
    "score": 0.95,
    "reason": "High quality response",
    "error": "<string>",
    "canRetry": false,
    "creditsUsed": 1,
    "conversationSimulatorVersion": "1.0.0",
    "humanEvaluatorId": "<string>",
    "humanEvaluatorStartedAt": "2023-11-07T05:31:56Z",
    "humanScore": 123,
    "humanReason": "<string>",
    "humanEvaluatorFinishedAt": "2023-11-07T05:31:56Z",
    "failedTurns": [
      "<string>"
    ],
    "deletedAt": "2023-11-07T05:31:56Z",
    "evaluatedAt": "2023-11-07T05:31:56Z",
    "metricLegacyAt": "2023-11-07T05:31:56Z",
    "metricDisabledAt": "2023-11-07T05:31:56Z",
    "testCaseLegacyAt": "2023-11-07T05:31:56Z",
    "createdAt": "2023-11-07T05:31:56Z"
  }
]

{
  "error": "Error type",
  "message": "Error message description"
}

POST

evaluations

singleTurn

Create single-turn evaluations

curl --request POST \
  --url https://api.galtea.ai/evaluations/singleTurn \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "metrics": [
    {
      "id": "id_123",
      "name": "Example Name",
      "score": 0.95
    }
  ],
  "versionId": "ver_123",
  "actualOutput": "Model response text",
  "testCaseId": "tc_123",
  "input": "What is the capital of France?",
  "retrievalContext": "Retrieved context document",
  "isProduction": true
}
'

import requests

url = "https://api.galtea.ai/evaluations/singleTurn"

payload = {
    "metrics": [
        {
            "id": "id_123",
            "name": "Example Name",
            "score": 0.95
        }
    ],
    "versionId": "ver_123",
    "actualOutput": "Model response text",
    "testCaseId": "tc_123",
    "input": "What is the capital of France?",
    "retrievalContext": "Retrieved context document",
    "isProduction": True
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    metrics: [{id: 'id_123', name: 'Example Name', score: 0.95}],
    versionId: 'ver_123',
    actualOutput: 'Model response text',
    testCaseId: 'tc_123',
    input: 'What is the capital of France?',
    retrievalContext: 'Retrieved context document',
    isProduction: true
  })
};

fetch('https://api.galtea.ai/evaluations/singleTurn', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.galtea.ai/evaluations/singleTurn",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'metrics' => [
        [
                'id' => 'id_123',
                'name' => 'Example Name',
                'score' => 0.95
        ]
    ],
    'versionId' => 'ver_123',
    'actualOutput' => 'Model response text',
    'testCaseId' => 'tc_123',
    'input' => 'What is the capital of France?',
    'retrievalContext' => 'Retrieved context document',
    'isProduction' => true
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.galtea.ai/evaluations/singleTurn"

	payload := strings.NewReader("{\n  \"metrics\": [\n    {\n      \"id\": \"id_123\",\n      \"name\": \"Example Name\",\n      \"score\": 0.95\n    }\n  ],\n  \"versionId\": \"ver_123\",\n  \"actualOutput\": \"Model response text\",\n  \"testCaseId\": \"tc_123\",\n  \"input\": \"What is the capital of France?\",\n  \"retrievalContext\": \"Retrieved context document\",\n  \"isProduction\": true\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.galtea.ai/evaluations/singleTurn")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"metrics\": [\n    {\n      \"id\": \"id_123\",\n      \"name\": \"Example Name\",\n      \"score\": 0.95\n    }\n  ],\n  \"versionId\": \"ver_123\",\n  \"actualOutput\": \"Model response text\",\n  \"testCaseId\": \"tc_123\",\n  \"input\": \"What is the capital of France?\",\n  \"retrievalContext\": \"Retrieved context document\",\n  \"isProduction\": true\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.galtea.ai/evaluations/singleTurn")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"metrics\": [\n    {\n      \"id\": \"id_123\",\n      \"name\": \"Example Name\",\n      \"score\": 0.95\n    }\n  ],\n  \"versionId\": \"ver_123\",\n  \"actualOutput\": \"Model response text\",\n  \"testCaseId\": \"tc_123\",\n  \"input\": \"What is the capital of France?\",\n  \"retrievalContext\": \"Retrieved context document\",\n  \"isProduction\": true\n}"

response = http.request(request)
puts response.read_body

[
  {
    "id": "eval_123",
    "metricId": "metric_123",
    "sessionId": "session_123",
    "userId": "user_123",
    "status": "SUCCESS",
    "testCaseId": "tc_123",
    "inferenceResultId": "ir_123",
    "score": 0.95,
    "reason": "High quality response",
    "error": "<string>",
    "canRetry": false,
    "creditsUsed": 1,
    "conversationSimulatorVersion": "1.0.0",
    "humanEvaluatorId": "<string>",
    "humanEvaluatorStartedAt": "2023-11-07T05:31:56Z",
    "humanScore": 123,
    "humanReason": "<string>",
    "humanEvaluatorFinishedAt": "2023-11-07T05:31:56Z",
    "failedTurns": [
      "<string>"
    ],
    "deletedAt": "2023-11-07T05:31:56Z",
    "evaluatedAt": "2023-11-07T05:31:56Z",
    "metricLegacyAt": "2023-11-07T05:31:56Z",
    "metricDisabledAt": "2023-11-07T05:31:56Z",
    "testCaseLegacyAt": "2023-11-07T05:31:56Z",
    "createdAt": "2023-11-07T05:31:56Z"
  }
]

{
  "error": "Error type",
  "message": "Error message description"
}

Authorizations

Authorization

string

header

required

API key authorization. Pass your API key in the Authorization header as a Bearer token. Both new (gsk_*) and legacy (gsk-) API keys are accepted, e.g. Authorization: Bearer gsk_... or Authorization: Bearer gsk-....

Body

application/json

metrics

object[]

required

Show child attributes

versionId

string

required

Example:

"ver_123"

actualOutput

string

required

Example:

"Model response text"

testCaseId

string

Required when isProduction is false. Must be omitted when isProduction is true.

Example:

"tc_123"

input

string

User input/prompt. Required when isProduction is true. Must be omitted when isProduction is false.

Example:

"What is the capital of France?"

retrievalContext

string

RAG retrieval context used to generate the actual output

Example:

"Retrieved context document"

isProduction

boolean

When true, creates a production evaluation (input required, testCaseId must be omitted). When false (default), testCaseId is required and input must be omitted.

Response

Evaluations created successfully

string

required

Example:

"eval_123"

metricId

string

required

Example:

"metric_123"

sessionId

string

required

Example:

"session_123"

userId

string | null

required

Example:

"user_123"

status

enum<string>

required

Available options:

PENDING,

PENDING_HUMAN,

SUCCESS,

FAILED,

SKIPPED,

CANCELLED

Example:

"SUCCESS"

testCaseId

string | null

required

Example:

"tc_123"

inferenceResultId

string | null

required

Example:

"ir_123"

score

number | null

required

Example:

0.95

reason

string | null

required

Example:

"High quality response"

error

string | null

required

canRetry

boolean | null

required

Example:

false

creditsUsed

integer | null

required

Example:

1

conversationSimulatorVersion

string | null

required

Example:

"1.0.0"

humanEvaluatorId

string | null

required

User ID of the human evaluator

humanEvaluatorStartedAt

string<date-time> | null

required

humanScore

number | null

required

Human-provided annotation score

humanReason

string | null

required

Human-provided annotation reason

humanEvaluatorFinishedAt

string<date-time> | null

required

Timestamp when human evaluation was submitted

failedTurns

string[]

required

Conversation turns that failed

deletedAt

string<date-time> | null

required

evaluatedAt

string<date-time> | null

required

metricLegacyAt

string<date-time> | null

required

metricDisabledAt

string<date-time> | null

required

testCaseLegacyAt

string<date-time> | null

required

createdAt

string<date-time>

Create evaluations for a single inference result Retry failed evaluations