Get evaluations

curl --request GET \
  --url https://api.galtea.ai/evaluations \
  --header 'Authorization: Bearer <token>'

import requests

url = "https://api.galtea.ai/evaluations"

headers = {"Authorization": "Bearer <token>"}

response = requests.get(url, headers=headers)

print(response.text)

const options = {method: 'GET', headers: {Authorization: 'Bearer <token>'}};

fetch('https://api.galtea.ai/evaluations', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.galtea.ai/evaluations",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.galtea.ai/evaluations"

	req, _ := http.NewRequest("GET", url, nil)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.get("https://api.galtea.ai/evaluations")
  .header("Authorization", "Bearer <token>")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.galtea.ai/evaluations")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request["Authorization"] = 'Bearer <token>'

response = http.request(request)
puts response.read_body

[
  {
    "id": "eval_123",
    "metricId": "metric_123",
    "sessionId": "session_123",
    "userId": "user_123",
    "status": "SUCCESS",
    "testCaseId": "tc_123",
    "inferenceResultId": "ir_123",
    "score": 0.95,
    "reason": "High quality response",
    "error": "<string>",
    "canRetry": false,
    "creditsUsed": 1,
    "conversationSimulatorVersion": "1.0.0",
    "humanEvaluatorId": "<string>",
    "humanEvaluatorStartedAt": "2023-11-07T05:31:56Z",
    "humanScore": 123,
    "humanReason": "<string>",
    "humanEvaluatorFinishedAt": "2023-11-07T05:31:56Z",
    "failedTurns": [
      "<string>"
    ],
    "deletedAt": "2023-11-07T05:31:56Z",
    "evaluatedAt": "2023-11-07T05:31:56Z",
    "metricLegacyAt": "2023-11-07T05:31:56Z",
    "metricDisabledAt": "2023-11-07T05:31:56Z",
    "testCaseLegacyAt": "2023-11-07T05:31:56Z",
    "createdAt": "2023-11-07T05:31:56Z"
  }
]

{
  "error": "Error type",
  "message": "Error message description"
}

{
  "error": "Error type",
  "message": "Error message description"
}

GET

evaluations

Get evaluations

curl --request GET \
  --url https://api.galtea.ai/evaluations \
  --header 'Authorization: Bearer <token>'

import requests

url = "https://api.galtea.ai/evaluations"

headers = {"Authorization": "Bearer <token>"}

response = requests.get(url, headers=headers)

print(response.text)

const options = {method: 'GET', headers: {Authorization: 'Bearer <token>'}};

fetch('https://api.galtea.ai/evaluations', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.galtea.ai/evaluations",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.galtea.ai/evaluations"

	req, _ := http.NewRequest("GET", url, nil)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.get("https://api.galtea.ai/evaluations")
  .header("Authorization", "Bearer <token>")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.galtea.ai/evaluations")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request["Authorization"] = 'Bearer <token>'

response = http.request(request)
puts response.read_body

[
  {
    "id": "eval_123",
    "metricId": "metric_123",
    "sessionId": "session_123",
    "userId": "user_123",
    "status": "SUCCESS",
    "testCaseId": "tc_123",
    "inferenceResultId": "ir_123",
    "score": 0.95,
    "reason": "High quality response",
    "error": "<string>",
    "canRetry": false,
    "creditsUsed": 1,
    "conversationSimulatorVersion": "1.0.0",
    "humanEvaluatorId": "<string>",
    "humanEvaluatorStartedAt": "2023-11-07T05:31:56Z",
    "humanScore": 123,
    "humanReason": "<string>",
    "humanEvaluatorFinishedAt": "2023-11-07T05:31:56Z",
    "failedTurns": [
      "<string>"
    ],
    "deletedAt": "2023-11-07T05:31:56Z",
    "evaluatedAt": "2023-11-07T05:31:56Z",
    "metricLegacyAt": "2023-11-07T05:31:56Z",
    "metricDisabledAt": "2023-11-07T05:31:56Z",
    "testCaseLegacyAt": "2023-11-07T05:31:56Z",
    "createdAt": "2023-11-07T05:31:56Z"
  }
]

{
  "error": "Error type",
  "message": "Error message description"
}

{
  "error": "Error type",
  "message": "Error message description"
}

Authorizations

Authorization

string

header

required

API key authorization. Pass your API key in the Authorization header as a Bearer token. Both new (gsk_*) and legacy (gsk-) API keys are accepted, e.g. Authorization: Bearer gsk_... or Authorization: Bearer gsk-....

Query Parameters

ids

string[]

Filter by evaluation IDs

productIds

string[]

Filter by product IDs

sessionIds

string[]

Filter by session IDs

inferenceResultIds

string[]

Filter by inference result IDs (for single-turn evaluations)

metricIds

string[]

Filter by metric IDs

metricGroupIds

string[]

Filter by metric group IDs (matches all revisions of a metric)

testCaseIds

string[]

Filter by test case IDs

testIds

string[]

Filter by test IDs (include only). Use an empty string ("") to select evaluations without a test

excludeTestIds

string[]

Omit evaluations linked to the specified test IDs. Use an empty string ("") to omit evaluations without a test

versionIds

string[]

Filter by version IDs

specificationIds

string[]

Filter by specification IDs (returns evaluations whose metric is linked to any of the given specifications)

statuses

enum<string>[]

Filter by evaluation statuses

Available options:

PENDING,

PENDING_HUMAN,

SUCCESS,

FAILED,

SKIPPED,

CANCELLED

evaluationTypes

string[]

Filter by evaluation types

metricSources

string[]

Filter by metric sources

sort

string[]

Sort instructions (field and direction pairs)

canRetry

boolean

Filter evaluations that can be retried

augmentedTestCase

boolean

Filter evaluations by whether their test case is augmented

isProduction

boolean

Filter evaluations by the joined Session.isProduction flag. Prefer this over the legacy testIds/excludeTestIds empty-string trick.

humanEvaluatorId

string

Filter by human evaluator user ID

fromCreatedAt

string<date-time>

Filter evaluations created at or after this timestamp (ISO 8601 format)

toCreatedAt

string<date-time>

Filter evaluations created at or before this timestamp (ISO 8601 format)

limit

integer

default:10000

Maximum number of results

Required range: 0 <= x <= 9007199254740991

offset

integer

default:0

Number of results to skip

Required range: x >= 0

Response

Evaluations retrieved successfully

string

required

Example:

"eval_123"

metricId

string

required

Example:

"metric_123"

sessionId

string

required

Example:

"session_123"

userId

string | null

required

Example:

"user_123"

status

enum<string>

required

Available options:

PENDING,

PENDING_HUMAN,

SUCCESS,

FAILED,

SKIPPED,

CANCELLED

Example:

"SUCCESS"

testCaseId

string | null

required

Example:

"tc_123"

inferenceResultId

string | null

required

Example:

"ir_123"

score

number | null

required

Example:

0.95

reason

string | null

required

Example:

"High quality response"

error

string | null

required

canRetry

boolean | null

required

Example:

false

creditsUsed

integer | null

required

Example:

1

conversationSimulatorVersion

string | null

required

Example:

"1.0.0"

humanEvaluatorId

string | null

required

User ID of the human evaluator

humanEvaluatorStartedAt

string<date-time> | null

required

humanScore

number | null

required

Human-provided annotation score

humanReason

string | null

required

Human-provided annotation reason

humanEvaluatorFinishedAt

string<date-time> | null

required

Timestamp when human evaluation was submitted

failedTurns

string[]

required

Conversation turns that failed

deletedAt

string<date-time> | null

required

evaluatedAt

string<date-time> | null

required

metricLegacyAt

string<date-time> | null

required

metricDisabledAt

string<date-time> | null

required

testCaseLegacyAt

string<date-time> | null

required

createdAt

string<date-time>

Delete evaluation Get analytics data