Skip to main content

Documentation Index

Fetch the complete documentation index at: https://www.truefoundry.com/llms.txt

Use this file to discover all available pages before exploring further.

Query Examples

Distribution Queries

Get request counts grouped by model:
response = requests.post(
    "https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
    headers={
        "Authorization": "Bearer <your_api_key>",
        "Content-Type": "application/json"
    },
    json={
        "startTs": "2025-01-21T00:00:00.000Z",
        "endTs": "2025-01-22T00:00:00.000Z",
        "datasource": "modelMetrics",
        "type": "distribution",
        "aggregations": [
            {"type": "count", "column": "modelName"}
        ],
        "filters": [
            {
                "fieldName": "virtualModelName",
                "operator": "IS_NULL",
                "value": true
            }
        ],
        "groupBy": ["modelName"]
    }
)
Get request counts grouped by team:
response = requests.post(
    "https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
    headers={
        "Authorization": "Bearer <your_api_key>",
        "Content-Type": "application/json"
    },
    json={
        "startTs": "2025-01-21T00:00:00.000Z",
        "endTs": "2025-01-22T00:00:00.000Z",
        "datasource": "modelMetrics",
        "type": "distribution",
        "aggregations": [
            {"type": "count", "column": "team"}
        ],
        "filters": [
            {
                "fieldName": "virtualModelName",
                "operator": "IS_NULL",
                "value": true
            }
        ],
        "groupBy": ["team"]
    }
)
Get total input and output tokens grouped by model:
response = requests.post(
    "https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
    headers={
        "Authorization": "Bearer <your_api_key>",
        "Content-Type": "application/json"
    },
    json={
        "startTs": "2025-01-21T00:00:00.000Z",
        "endTs": "2025-01-22T00:00:00.000Z",
        "datasource": "modelMetrics",
        "type": "distribution",
        "aggregations": [
            {"type": "sum", "column": "inputTokens"},
            {"type": "sum", "column": "outputTokens"}
        ],
        "filters": [
            {
                "fieldName": "virtualModelName",
                "operator": "IS_NULL",
                "value": true
            }
        ],
        "groupBy": ["modelName"]
    }
)
Get p50, p90, and p99 latency percentiles grouped by model:
response = requests.post(
    "https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
    headers={
        "Authorization": "Bearer <your_api_key>",
        "Content-Type": "application/json"
    },
    json={
        "startTs": "2025-01-21T00:00:00.000Z",
        "endTs": "2025-01-22T00:00:00.000Z",
        "datasource": "modelMetrics",
        "type": "distribution",
        "aggregations": [
            {"type": "p50", "column": "latencyMs"},
            {"type": "p90", "column": "latencyMs"},
            {"type": "p99", "column": "latencyMs"}
        ],
        "filters": [
            {
                "fieldName": "virtualModelName",
                "operator": "IS_NULL",
                "value": true
            }
        ],
        "groupBy": ["modelName"]
    }
)
Group by multiple dimensions:
response = requests.post(
    "https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
    headers={
        "Authorization": "Bearer <your_api_key>",
        "Content-Type": "application/json"
    },
    json={
        "startTs": "2025-01-21T00:00:00.000Z",
        "endTs": "2025-01-22T00:00:00.000Z",
        "datasource": "modelMetrics",
        "type": "distribution",
        "aggregations": [],
        "filters": [
            {
                "fieldName": "virtualModelName",
                "operator": "IS_NULL",
                "value": true
            }
        ],
        "groupBy": ["modelName", "userEmail", "virtualaccount"]
    }
)
Group by a custom metadata key:
response = requests.post(
    "https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
    headers={
        "Authorization": "Bearer <your_api_key>",
        "Content-Type": "application/json"
    },
    json={
        "startTs": "2025-01-21T00:00:00.000Z",
        "endTs": "2025-01-22T00:00:00.000Z",
        "datasource": "modelMetrics",
        "type": "distribution",
        "aggregations": [],
        "filters": [
            {
                "fieldName": "virtualModelName",
                "operator": "IS_NULL",
                "value": true
            }
        ],
        "groupBy": ["modelName", "metadata.environment"]
    }
)
Filter results to specific models:
response = requests.post(
    "https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
    headers={
        "Authorization": "Bearer <your_api_key>",
        "Content-Type": "application/json"
    },
    json={
        "startTs": "2025-01-21T00:00:00.000Z",
        "endTs": "2025-01-22T00:00:00.000Z",
        "datasource": "modelMetrics",
        "type": "distribution",
        "aggregations": [],
        "filters": [
            {
                "fieldName": "virtualModelName",
                "operator": "IS_NULL",
                "value": true
            },
            {
                "fieldName": "modelName",
                "operator": "IN",
                "value": ["gpt-4", "gpt-3.5-turbo", "claude-2"]
            }
        ],
        "groupBy": ["modelName"]
    }
)
Find requests with latency above a threshold:
response = requests.post(
    "https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
    headers={
        "Authorization": "Bearer <your_api_key>",
        "Content-Type": "application/json"
    },
    json={
        "startTs": "2025-01-21T00:00:00.000Z",
        "endTs": "2025-01-22T00:00:00.000Z",
        "datasource": "modelMetrics",
        "type": "distribution",
        "aggregations": [],
        "filters": [
            {
                "fieldName": "virtualModelName",
                "operator": "IS_NULL",
                "value": true
            },
            {
                "fieldName": "latencyMs",
                "operator": "GREATER_THAN",
                "value": 1000
            }
        ],
        "groupBy": ["modelName"]
    }
)
Find requests within a latency range:
response = requests.post(
    "https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
    headers={
        "Authorization": "Bearer <your_api_key>",
        "Content-Type": "application/json"
    },
    json={
        "startTs": "2025-01-21T00:00:00.000Z",
        "endTs": "2025-01-22T00:00:00.000Z",
        "datasource": "modelMetrics",
        "type": "distribution",
        "aggregations": [],
        "filters": [
            {
                "fieldName": "virtualModelName",
                "operator": "IS_NULL",
                "value": true
            },
            {
                "fieldName": "latencyMs",
                "operator": "BETWEEN",
                "value": [500, 5000]
            }
        ],
        "groupBy": ["modelName"]
    }
)
Filter by input and output token thresholds:
response = requests.post(
    "https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
    headers={
        "Authorization": "Bearer <your_api_key>",
        "Content-Type": "application/json"
    },
    json={
        "startTs": "2025-01-21T00:00:00.000Z",
        "endTs": "2025-01-22T00:00:00.000Z",
        "datasource": "modelMetrics",
        "type": "distribution",
        "aggregations": [],
        "filters": [
            {
                "fieldName": "virtualModelName",
                "operator": "IS_NULL",
                "value": true
            },
            {
                "fieldName": "inputTokens",
                "operator": "GREATER_THAN",
                "value": 100
            },
            {
                "fieldName": "outputTokens",
                "operator": "LESS_THAN_EQUAL",
                "value": 1000
            }
        ],
        "groupBy": ["modelName"]
    }
)
Filter to specific teams using array operators:
response = requests.post(
    "https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
    headers={
        "Authorization": "Bearer <your_api_key>",
        "Content-Type": "application/json"
    },
    json={
        "startTs": "2025-01-21T00:00:00.000Z",
        "endTs": "2025-01-22T00:00:00.000Z",
        "datasource": "modelMetrics",
        "type": "distribution",
        "aggregations": [],
        "filters": [
            {
                "fieldName": "virtualModelName",
                "operator": "IS_NULL",
                "value": true
            },
            {
                "fieldName": "team",
                "operator": "ARRAY_HAS_ANY",
                "value": ["team-alpha", "team-beta"]
            }
        ],
        "groupBy": ["team", "modelName"]
    }
)
Filter by custom metadata values:
response = requests.post(
    "https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
    headers={
        "Authorization": "Bearer <your_api_key>",
        "Content-Type": "application/json"
    },
    json={
        "startTs": "2025-01-21T00:00:00.000Z",
        "endTs": "2025-01-22T00:00:00.000Z",
        "datasource": "modelMetrics",
        "type": "distribution",
        "aggregations": [],
        "filters": [
            {
                "fieldName": "virtualModelName",
                "operator": "IS_NULL",
                "value": true
            },
            {
                "metadataKey": "environment",
                "operator": "IN",
                "value": ["production"]
            }
        ],
        "groupBy": ["modelName"]
    }
)
Combine multiple filter types:
response = requests.post(
    "https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
    headers={
        "Authorization": "Bearer <your_api_key>",
        "Content-Type": "application/json"
    },
    json={
        "startTs": "2025-01-21T00:00:00.000Z",
        "endTs": "2025-01-22T00:00:00.000Z",
        "datasource": "modelMetrics",
        "type": "distribution",
        "aggregations": [],
        "filters": [
            {
                "fieldName": "virtualModelName",
                "operator": "IS_NULL",
                "value": true
            },
            {
                "fieldName": "modelName",
                "operator": "IN",
                "value": ["gpt-4", "gpt-3.5-turbo"]
            },
            {
                "fieldName": "latencyMs",
                "operator": "BETWEEN",
                "value": [100, 10000]
            },
            {
                "fieldName": "inputTokens",
                "operator": "GREATER_THAN",
                "value": 50
            },
            {
                "fieldName": "outputTokens",
                "operator": "LESS_THAN",
                "value": 2000
            }
        ],
        "groupBy": ["modelName"]
    }
)
Query metrics for virtual models only. Use IS_NULL with value false to return only requests routed through a virtual model related metrics:
response = requests.post(
    "https://{your_control_plane_url}/api/svc/v1/llm-gateway/metrics/query",
    headers={
        "Authorization": "Bearer <your_api_key>",
        "Content-Type": "application/json"
    },
    json={
        "startTs": "2025-01-21T00:00:00.000Z",
        "endTs": "2025-01-22T00:00:00.000Z",
        "datasource": "modelMetrics",
        "type": "distribution",
        "aggregations": [
            {"type": "count", "column": "virtualModelName"},
            {"type": "sum", "column": "inputTokens"},
            {"type": "sum", "column": "outputTokens"}
        ],
        "filters": [
            {
                "fieldName": "virtualModelName",
                "operator": "IS_NULL",
                "value": false
            }
        ],
        "groupBy": ["virtualModelName"]
    }
)