Documentation Index
Fetch the complete documentation index at: https://www.truefoundry.com/llms.txt
Use this file to discover all available pages before exploring further.
Query Examples
Distribution Queries
Count by model name
Count by model name
Get request counts grouped by model:
response = requests.post(
"https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
headers={
"Authorization": "Bearer <your_api_key>",
"Content-Type": "application/json"
},
json={
"startTs": "2025-01-21T00:00:00.000Z",
"endTs": "2025-01-22T00:00:00.000Z",
"datasource": "modelMetrics",
"type": "distribution",
"aggregations": [
{"type": "count", "column": "modelName"}
],
"filters": [
{
"fieldName": "virtualModelName",
"operator": "IS_NULL",
"value": true
}
],
"groupBy": ["modelName"]
}
)
Count by team
Count by team
Get request counts grouped by team:
response = requests.post(
"https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
headers={
"Authorization": "Bearer <your_api_key>",
"Content-Type": "application/json"
},
json={
"startTs": "2025-01-21T00:00:00.000Z",
"endTs": "2025-01-22T00:00:00.000Z",
"datasource": "modelMetrics",
"type": "distribution",
"aggregations": [
{"type": "count", "column": "team"}
],
"filters": [
{
"fieldName": "virtualModelName",
"operator": "IS_NULL",
"value": true
}
],
"groupBy": ["team"]
}
)
Sum tokens by model
Sum tokens by model
Get total input and output tokens grouped by model:
response = requests.post(
"https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
headers={
"Authorization": "Bearer <your_api_key>",
"Content-Type": "application/json"
},
json={
"startTs": "2025-01-21T00:00:00.000Z",
"endTs": "2025-01-22T00:00:00.000Z",
"datasource": "modelMetrics",
"type": "distribution",
"aggregations": [
{"type": "sum", "column": "inputTokens"},
{"type": "sum", "column": "outputTokens"}
],
"filters": [
{
"fieldName": "virtualModelName",
"operator": "IS_NULL",
"value": true
}
],
"groupBy": ["modelName"]
}
)
Latency percentiles by model
Latency percentiles by model
Get p50, p90, and p99 latency percentiles grouped by model:
response = requests.post(
"https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
headers={
"Authorization": "Bearer <your_api_key>",
"Content-Type": "application/json"
},
json={
"startTs": "2025-01-21T00:00:00.000Z",
"endTs": "2025-01-22T00:00:00.000Z",
"datasource": "modelMetrics",
"type": "distribution",
"aggregations": [
{"type": "p50", "column": "latencyMs"},
{"type": "p90", "column": "latencyMs"},
{"type": "p99", "column": "latencyMs"}
],
"filters": [
{
"fieldName": "virtualModelName",
"operator": "IS_NULL",
"value": true
}
],
"groupBy": ["modelName"]
}
)
Multi-dimensional grouping
Multi-dimensional grouping
Group by multiple dimensions:
response = requests.post(
"https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
headers={
"Authorization": "Bearer <your_api_key>",
"Content-Type": "application/json"
},
json={
"startTs": "2025-01-21T00:00:00.000Z",
"endTs": "2025-01-22T00:00:00.000Z",
"datasource": "modelMetrics",
"type": "distribution",
"aggregations": [],
"filters": [
{
"fieldName": "virtualModelName",
"operator": "IS_NULL",
"value": true
}
],
"groupBy": ["modelName", "userEmail", "virtualaccount"]
}
)
Group by metadata
Group by metadata
Group by a custom metadata key:
response = requests.post(
"https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
headers={
"Authorization": "Bearer <your_api_key>",
"Content-Type": "application/json"
},
json={
"startTs": "2025-01-21T00:00:00.000Z",
"endTs": "2025-01-22T00:00:00.000Z",
"datasource": "modelMetrics",
"type": "distribution",
"aggregations": [],
"filters": [
{
"fieldName": "virtualModelName",
"operator": "IS_NULL",
"value": true
}
],
"groupBy": ["modelName", "metadata.environment"]
}
)
Filter by model name
Filter by model name
Filter results to specific models:
response = requests.post(
"https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
headers={
"Authorization": "Bearer <your_api_key>",
"Content-Type": "application/json"
},
json={
"startTs": "2025-01-21T00:00:00.000Z",
"endTs": "2025-01-22T00:00:00.000Z",
"datasource": "modelMetrics",
"type": "distribution",
"aggregations": [],
"filters": [
{
"fieldName": "virtualModelName",
"operator": "IS_NULL",
"value": true
},
{
"fieldName": "modelName",
"operator": "IN",
"value": ["gpt-4", "gpt-3.5-turbo", "claude-2"]
}
],
"groupBy": ["modelName"]
}
)
Filter high latency requests
Filter high latency requests
Find requests with latency above a threshold:
response = requests.post(
"https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
headers={
"Authorization": "Bearer <your_api_key>",
"Content-Type": "application/json"
},
json={
"startTs": "2025-01-21T00:00:00.000Z",
"endTs": "2025-01-22T00:00:00.000Z",
"datasource": "modelMetrics",
"type": "distribution",
"aggregations": [],
"filters": [
{
"fieldName": "virtualModelName",
"operator": "IS_NULL",
"value": true
},
{
"fieldName": "latencyMs",
"operator": "GREATER_THAN",
"value": 1000
}
],
"groupBy": ["modelName"]
}
)
Filter by latency range
Filter by latency range
Find requests within a latency range:
response = requests.post(
"https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
headers={
"Authorization": "Bearer <your_api_key>",
"Content-Type": "application/json"
},
json={
"startTs": "2025-01-21T00:00:00.000Z",
"endTs": "2025-01-22T00:00:00.000Z",
"datasource": "modelMetrics",
"type": "distribution",
"aggregations": [],
"filters": [
{
"fieldName": "virtualModelName",
"operator": "IS_NULL",
"value": true
},
{
"fieldName": "latencyMs",
"operator": "BETWEEN",
"value": [500, 5000]
}
],
"groupBy": ["modelName"]
}
)
Filter by token counts
Filter by token counts
Filter by input and output token thresholds:
response = requests.post(
"https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
headers={
"Authorization": "Bearer <your_api_key>",
"Content-Type": "application/json"
},
json={
"startTs": "2025-01-21T00:00:00.000Z",
"endTs": "2025-01-22T00:00:00.000Z",
"datasource": "modelMetrics",
"type": "distribution",
"aggregations": [],
"filters": [
{
"fieldName": "virtualModelName",
"operator": "IS_NULL",
"value": true
},
{
"fieldName": "inputTokens",
"operator": "GREATER_THAN",
"value": 100
},
{
"fieldName": "outputTokens",
"operator": "LESS_THAN_EQUAL",
"value": 1000
}
],
"groupBy": ["modelName"]
}
)
Filter by team
Filter by team
Filter to specific teams using array operators:
response = requests.post(
"https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
headers={
"Authorization": "Bearer <your_api_key>",
"Content-Type": "application/json"
},
json={
"startTs": "2025-01-21T00:00:00.000Z",
"endTs": "2025-01-22T00:00:00.000Z",
"datasource": "modelMetrics",
"type": "distribution",
"aggregations": [],
"filters": [
{
"fieldName": "virtualModelName",
"operator": "IS_NULL",
"value": true
},
{
"fieldName": "team",
"operator": "ARRAY_HAS_ANY",
"value": ["team-alpha", "team-beta"]
}
],
"groupBy": ["team", "modelName"]
}
)
Filter by metadata
Filter by metadata
Filter by custom metadata values:
response = requests.post(
"https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
headers={
"Authorization": "Bearer <your_api_key>",
"Content-Type": "application/json"
},
json={
"startTs": "2025-01-21T00:00:00.000Z",
"endTs": "2025-01-22T00:00:00.000Z",
"datasource": "modelMetrics",
"type": "distribution",
"aggregations": [],
"filters": [
{
"fieldName": "virtualModelName",
"operator": "IS_NULL",
"value": true
},
{
"metadataKey": "environment",
"operator": "IN",
"value": ["production"]
}
],
"groupBy": ["modelName"]
}
)
Complex filter combination
Complex filter combination
Combine multiple filter types:
response = requests.post(
"https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
headers={
"Authorization": "Bearer <your_api_key>",
"Content-Type": "application/json"
},
json={
"startTs": "2025-01-21T00:00:00.000Z",
"endTs": "2025-01-22T00:00:00.000Z",
"datasource": "modelMetrics",
"type": "distribution",
"aggregations": [],
"filters": [
{
"fieldName": "virtualModelName",
"operator": "IS_NULL",
"value": true
},
{
"fieldName": "modelName",
"operator": "IN",
"value": ["gpt-4", "gpt-3.5-turbo"]
},
{
"fieldName": "latencyMs",
"operator": "BETWEEN",
"value": [100, 10000]
},
{
"fieldName": "inputTokens",
"operator": "GREATER_THAN",
"value": 50
},
{
"fieldName": "outputTokens",
"operator": "LESS_THAN",
"value": 2000
}
],
"groupBy": ["modelName"]
}
)
Virtual model metrics only
Virtual model metrics only
Query metrics for virtual models only. Use
IS_NULL with value false to return only requests routed through a virtual model related metrics:response = requests.post(
"https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
headers={
"Authorization": "Bearer <your_api_key>",
"Content-Type": "application/json"
},
json={
"startTs": "2025-01-21T00:00:00.000Z",
"endTs": "2025-01-22T00:00:00.000Z",
"datasource": "modelMetrics",
"type": "distribution",
"aggregations": [
{"type": "count", "column": "virtualModelName"},
{"type": "sum", "column": "inputTokens"},
{"type": "sum", "column": "outputTokens"}
],
"filters": [
{
"fieldName": "virtualModelName",
"operator": "IS_NULL",
"value": false
}
],
"groupBy": ["virtualModelName"]
}
)