diff --git a/apps/webapp/app/components/navigation/SideMenu.tsx b/apps/webapp/app/components/navigation/SideMenu.tsx index 18a4387c996..90f25fde788 100644 --- a/apps/webapp/app/components/navigation/SideMenu.tsx +++ b/apps/webapp/app/components/navigation/SideMenu.tsx @@ -10,6 +10,7 @@ import { ClockIcon, Cog8ToothIcon, CogIcon, + CubeIcon, ExclamationTriangleIcon, FolderIcon, FolderOpenIcon, @@ -77,11 +78,12 @@ import { v3EnvironmentVariablesPath, v3ErrorsPath, v3LogsPath, + v3PromptsPath, + v3ModelsPath, v3ProjectAlertsPath, v3ProjectPath, v3ProjectSettingsGeneralPath, v3ProjectSettingsIntegrationsPath, - v3PromptsPath, v3QueuesPath, v3RunsPath, v3SchedulesPath, @@ -456,34 +458,50 @@ export function SideMenu({ /> - - - - + {(user.admin || user.isImpersonating || featureFlags.hasAiAccess) && ( + + + {(user.admin || user.isImpersonating || featureFlags.hasAiModelsAccess) && ( + + )} + + + )} {(user.admin || user.isImpersonating || featureFlags.hasQueryAccess) && ( ; + }>; +}; + +export type ModelMetricsPoint = { + minute: string; + callCount: number; + totalInputTokens: number; + totalOutputTokens: number; + totalCost: number; + ttfcP50: number; + ttfcP90: number; + ttfcP95: number; + ttfcP99: number; + tpsP50: number; + tpsP90: number; + tpsP95: number; + tpsP99: number; + durationP50: number; + durationP90: number; + durationP95: number; + durationP99: number; +}; + +export type UserModelMetrics = { + totalCalls: number; + totalCost: number; + totalInputTokens: number; + totalOutputTokens: number; + avgTtfc: number; + avgTps: number; + taskBreakdown: Array<{ + taskIdentifier: string; + calls: number; + cost: number; + }>; +}; + +export type ModelComparisonItem = { + responseModel: string; + genAiSystem: string; + callCount: number; + totalInputTokens: number; + totalOutputTokens: number; + totalCost: number; + ttfcP50: number; + ttfcP90: number; + tpsP50: number; + tpsP90: number; +}; + +export type PopularModel = { + responseModel: string; + genAiSystem: string; + callCount: number; + totalCost: number; + ttfcP50: number; +}; + +// --- ClickHouse schemas for user metrics --- + +const UserMetricsSummaryRow = z.object({ + total_calls: z.coerce.number(), + total_cost: z.coerce.number(), + total_input_tokens: z.coerce.number(), + total_output_tokens: z.coerce.number(), + avg_ttfc: z.coerce.number(), + avg_tps: z.coerce.number(), +}); + +const UserTaskBreakdownRow = z.object({ + task_identifier: z.string(), + calls: z.coerce.number(), + cost: z.coerce.number(), +}); + +// --- Presenter --- + +export class ModelRegistryPresenter extends BasePresenter { + private readonly clickhouse: ClickHouse; + + constructor(clickhouse: ClickHouse, replica?: PrismaClientOrTransaction) { + super(undefined, replica); + this.clickhouse = clickhouse; + } + + /** List all visible global models with pricing, grouped by provider. */ + async getModelCatalog(): Promise { + const models = await this._replica.llmModel.findMany({ + where: { + projectId: null, + isHidden: false, + }, + include: { + pricingTiers: { + where: { isDefault: true }, + include: { prices: true }, + take: 1, + }, + }, + orderBy: { modelName: "asc" }, + }); + + type CatalogItemWithBase = ModelCatalogItem & { _baseModelName: string | null }; + const items: CatalogItemWithBase[] = models.map((m) => { + const defaultTier = m.pricingTiers[0]; + const prices = defaultTier?.prices ?? []; + const inputPrice = prices.find((p) => p.usageType === "input"); + const outputPrice = prices.find((p) => p.usageType === "output"); + const provider = m.provider ?? inferProvider(m.modelName); + const catalogEntry = modelCatalog[m.modelName]; + + return { + friendlyId: m.friendlyId, + modelName: m.modelName, + provider, + displayId: formatModelId(provider, m.modelName), + description: m.description, + contextWindow: m.contextWindow, + maxOutputTokens: m.maxOutputTokens, + capabilities: m.capabilities, + inputPrice: inputPrice ? Number(inputPrice.price) : null, + outputPrice: outputPrice ? Number(outputPrice.price) : null, + releaseDate: m.startDate ? m.startDate.toISOString().split("T")[0] : null, + supportsStructuredOutput: catalogEntry?.supportsStructuredOutput ?? false, + supportsParallelToolCalls: catalogEntry?.supportsParallelToolCalls ?? false, + supportsStreamingToolCalls: catalogEntry?.supportsStreamingToolCalls ?? false, + variants: [], + _baseModelName: m.baseModelName, + }; + }); + + // Normalize version dots for grouping: "3.5" → "3-5", "4.1" → "4-1" + const normalizeForGrouping = (name: string) => name.replace(/(\d)\.(\d)/g, "$1-$2"); + + // Group variants by their normalized base model name + const variantGroups = new Map(); + + for (const item of items) { + const groupKey = normalizeForGrouping(item._baseModelName ?? item.modelName); + const group = variantGroups.get(groupKey) ?? []; + group.push(item); + variantGroups.set(groupKey, group); + } + + // For each group, pick the best representative as the "card" model + // and nest the rest as variants + const baseModels: ModelCatalogItem[] = []; + + for (const [groupKey, group] of variantGroups) { + if (group.length === 1) { + // Standalone model, no variants + baseModels.push(group[0]); + continue; + } + + // Pick representative: prefer the actual base model (no _baseModelName), + // then "-latest" variant, then the newest by release date + let representative = group.find((m) => !m._baseModelName) + ?? group.find((m) => m.modelName.endsWith("-latest")) + ?? group.sort((a, b) => { + if (!a.releaseDate && !b.releaseDate) return 0; + if (!a.releaseDate) return 1; + if (!b.releaseDate) return -1; + return b.releaseDate.localeCompare(a.releaseDate); + })[0]; + + // Nest the others as variants, sorted newest first + const others = group + .filter((m) => m !== representative) + .sort((a, b) => { + if (!a.releaseDate && !b.releaseDate) return a.modelName.localeCompare(b.modelName); + if (!a.releaseDate) return 1; + if (!b.releaseDate) return -1; + return b.releaseDate.localeCompare(a.releaseDate); + }); + + representative.variants = others.map((m) => ({ + friendlyId: m.friendlyId, + modelName: m.modelName, + displayId: m.displayId, + releaseDate: m.releaseDate, + })); + + baseModels.push(representative); + } + + // Group by provider, sort models within each group by release date (newest first) + const groups = new Map(); + for (const item of baseModels) { + const group = groups.get(item.provider) ?? []; + group.push(item); + groups.set(item.provider, group); + } + + return Array.from(groups.entries()) + .sort(([a], [b]) => a.localeCompare(b)) + .map(([provider, models]) => ({ + provider, + models: models.sort((a, b) => { + if (!a.releaseDate && !b.releaseDate) return a.modelName.localeCompare(b.modelName); + if (!a.releaseDate) return 1; + if (!b.releaseDate) return -1; + return b.releaseDate.localeCompare(a.releaseDate); + }), + })); + } + + /** Get a single model with full pricing details. */ + async getModelDetail(friendlyId: string): Promise { + const model = await this._replica.llmModel.findUnique({ + where: { friendlyId }, + include: { + pricingTiers: { + include: { prices: true }, + orderBy: { priority: "asc" }, + }, + }, + }); + + if (!model) return null; + + const defaultTier = model.pricingTiers.find((t) => t.isDefault) ?? model.pricingTiers[0]; + const defaultPrices = defaultTier?.prices ?? []; + const inputPrice = defaultPrices.find((p) => p.usageType === "input"); + const outputPrice = defaultPrices.find((p) => p.usageType === "output"); + const provider = model.provider ?? inferProvider(model.modelName); + const catalogEntry = modelCatalog[model.modelName]; + + return { + friendlyId: model.friendlyId, + modelName: model.modelName, + provider, + displayId: formatModelId(provider, model.modelName), + description: model.description, + contextWindow: model.contextWindow, + maxOutputTokens: model.maxOutputTokens, + capabilities: model.capabilities, + inputPrice: inputPrice ? Number(inputPrice.price) : null, + outputPrice: outputPrice ? Number(outputPrice.price) : null, + releaseDate: model.startDate ? model.startDate.toISOString().split("T")[0] : null, + supportsStructuredOutput: catalogEntry?.supportsStructuredOutput ?? false, + supportsParallelToolCalls: catalogEntry?.supportsParallelToolCalls ?? false, + supportsStreamingToolCalls: catalogEntry?.supportsStreamingToolCalls ?? false, + variants: [], + matchPattern: model.matchPattern, + source: model.source, + pricingTiers: model.pricingTiers.map((t) => ({ + name: t.name, + isDefault: t.isDefault, + prices: Object.fromEntries(t.prices.map((p) => [p.usageType, Number(p.price)])), + })), + }; + } + + /** Get global aggregate metrics for a model (no tenant info). */ + async getGlobalMetrics( + responseModel: string, + startTime: Date, + endTime: Date + ): Promise { + const [error, rows] = await this.clickhouse.llmModelAggregates.globalMetrics + .setParams({ + responseModel, + startTime: formatDateForCH(startTime), + endTime: formatDateForCH(endTime), + }) + .execute(); + + if (error || !rows) return []; + + return rows.map((r) => ({ + minute: r.minute, + callCount: r.call_count, + totalInputTokens: r.total_input_tokens, + totalOutputTokens: r.total_output_tokens, + totalCost: r.total_cost, + ttfcP50: r.ttfc_p50, + ttfcP90: r.ttfc_p90, + ttfcP95: r.ttfc_p95, + ttfcP99: r.ttfc_p99, + tpsP50: r.tps_p50, + tpsP90: r.tps_p90, + tpsP95: 0, + tpsP99: 0, + durationP50: r.duration_p50, + durationP90: r.duration_p90, + durationP95: 0, + durationP99: 0, + })); + } + + /** Get per-project usage metrics for a model. */ + async getUserMetrics( + responseModel: string, + projectId: string, + environmentId: string, + startTime: Date, + endTime: Date + ): Promise { + const summaryQuery = this.clickhouse.reader.query({ + name: "modelRegistryUserSummary", + query: ` + SELECT + count() AS total_calls, + sum(total_cost) AS total_cost, + sum(input_tokens) AS total_input_tokens, + sum(output_tokens) AS total_output_tokens, + round(avg(ms_to_first_chunk), 1) AS avg_ttfc, + round(avg(tokens_per_second), 1) AS avg_tps + FROM trigger_dev.llm_metrics_v1 + WHERE response_model = {responseModel: String} + AND project_id = {projectId: String} + AND environment_id = {environmentId: String} + AND start_time >= {startTime: String} + AND start_time <= {endTime: String} + `, + params: z.object({ + responseModel: z.string(), + projectId: z.string(), + environmentId: z.string(), + startTime: z.string(), + endTime: z.string(), + }), + schema: UserMetricsSummaryRow, + }); + + const taskQuery = this.clickhouse.reader.query({ + name: "modelRegistryUserTasks", + query: ` + SELECT + task_identifier, + count() AS calls, + sum(total_cost) AS cost + FROM trigger_dev.llm_metrics_v1 + WHERE response_model = {responseModel: String} + AND project_id = {projectId: String} + AND environment_id = {environmentId: String} + AND start_time >= {startTime: String} + AND start_time <= {endTime: String} + GROUP BY task_identifier + ORDER BY cost DESC + LIMIT 20 + `, + params: z.object({ + responseModel: z.string(), + projectId: z.string(), + environmentId: z.string(), + startTime: z.string(), + endTime: z.string(), + }), + schema: UserTaskBreakdownRow, + }); + + const queryParams = { + responseModel, + projectId, + environmentId, + startTime: formatDateForCH(startTime), + endTime: formatDateForCH(endTime), + }; + + const [summaryResult, taskResult] = await Promise.all([ + summaryQuery(queryParams), + taskQuery(queryParams), + ]); + + const [summaryError, summaryRows] = summaryResult; + const [taskError, taskRows] = taskResult; + + const defaultSummary = { + total_calls: 0, + total_cost: 0, + total_input_tokens: 0, + total_output_tokens: 0, + avg_ttfc: 0, + avg_tps: 0, + }; + + const summary = !summaryError && summaryRows?.[0] ? summaryRows[0] : defaultSummary; + + return { + totalCalls: summary.total_calls, + totalCost: summary.total_cost, + totalInputTokens: summary.total_input_tokens, + totalOutputTokens: summary.total_output_tokens, + avgTtfc: summary.avg_ttfc, + avgTps: summary.avg_tps, + taskBreakdown: !taskError && taskRows + ? taskRows.map((r) => ({ + taskIdentifier: r.task_identifier, + calls: r.calls, + cost: r.cost, + })) + : [], + }; + } + + /** Get comparison data for 2-4 models. */ + async getModelComparison( + responseModels: string[], + startTime: Date, + endTime: Date + ): Promise { + const [error, rows] = await this.clickhouse.llmModelAggregates.comparison + .setParams({ + responseModels, + startTime: formatDateForCH(startTime), + endTime: formatDateForCH(endTime), + }) + .execute(); + + if (error || !rows) return []; + + return rows.map((r) => ({ + responseModel: r.response_model, + genAiSystem: r.gen_ai_system, + callCount: r.call_count, + totalInputTokens: r.total_input_tokens, + totalOutputTokens: r.total_output_tokens, + totalCost: r.total_cost, + ttfcP50: r.ttfc_p50, + ttfcP90: r.ttfc_p90, + tpsP50: r.tps_p50, + tpsP90: r.tps_p90, + })); + } + + /** Get the most popular models by call count. */ + async getPopularModels( + startTime: Date, + endTime: Date, + limit: number = 20 + ): Promise { + const [error, rows] = await this.clickhouse.llmModelAggregates.popular + .setParams({ + startTime: formatDateForCH(startTime), + endTime: formatDateForCH(endTime), + limit, + }) + .execute(); + + if (error || !rows) return []; + + return rows.map((r) => ({ + responseModel: r.response_model, + genAiSystem: r.gen_ai_system, + callCount: r.call_count, + totalCost: r.total_cost, + ttfcP50: r.ttfc_p50, + })); + } +} diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.$modelId/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.$modelId/route.tsx new file mode 100644 index 00000000000..5b8fc9170db --- /dev/null +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.$modelId/route.tsx @@ -0,0 +1,595 @@ +import { ArrowsRightLeftIcon } from "@heroicons/react/20/solid"; +import { type MetaFunction } from "@remix-run/react"; +import { type LoaderFunctionArgs } from "@remix-run/server-runtime"; +import { useState } from "react"; +import { typedjson, useTypedLoaderData } from "remix-typedjson"; +import { z } from "zod"; +import { PageBody, PageContainer } from "~/components/layout/AppLayout"; +import { Badge } from "~/components/primitives/Badge"; +import { LinkButton } from "~/components/primitives/Buttons"; +import { Callout } from "~/components/primitives/Callout"; +import { Header2 } from "~/components/primitives/Headers"; +import { Input } from "~/components/primitives/Input"; +import { Label } from "~/components/primitives/Label"; +import { NavBar, PageAccessories, PageTitle } from "~/components/primitives/PageHeader"; +import * as Property from "~/components/primitives/PropertyTable"; +import { + Table, + TableBody, + TableCell, + TableHeader, + TableHeaderCell, + TableRow, +} from "~/components/primitives/Table"; +import { TabButton, TabContainer } from "~/components/primitives/Tabs"; +import { InlineCode } from "~/components/code/InlineCode"; +import { MetricWidget } from "~/routes/resources.metric"; +import type { QueryWidgetConfig } from "~/components/metrics/QueryWidget"; +import { findProjectBySlug } from "~/models/project.server"; +import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; +import { ModelRegistryPresenter } from "~/presenters/v3/ModelRegistryPresenter.server"; +import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { requireUserId } from "~/services/session.server"; +import { useOrganization } from "~/hooks/useOrganizations"; +import { useProject } from "~/hooks/useProject"; +import { useEnvironment } from "~/hooks/useEnvironment"; +import { + EnvironmentParamSchema, + v3ModelComparePath, + v3ModelsPath, +} from "~/utils/pathBuilder"; +import { + formatModelPrice, + formatTokenCount, + formatModelCost, + formatCapability, + formatProviderName, +} from "~/utils/modelFormatters"; + +const ParamSchema = EnvironmentParamSchema.extend({ + modelId: z.string(), +}); + +export const meta: MetaFunction = () => { + return [{ title: "Model Detail | Trigger.dev" }]; +}; + +export const loader = async ({ request, params }: LoaderFunctionArgs) => { + const userId = await requireUserId(request); + const { organizationSlug, projectParam, envParam, modelId } = ParamSchema.parse(params); + + const project = await findProjectBySlug(organizationSlug, projectParam, userId); + if (!project) { + throw new Response("Project not found", { status: 404 }); + } + + const environment = await findEnvironmentBySlug(project.id, envParam, userId); + if (!environment) { + throw new Response("Environment not found", { status: 404 }); + } + + const presenter = new ModelRegistryPresenter(clickhouseClient); + const model = await presenter.getModelDetail(modelId); + + if (!model) { + throw new Response("Model not found", { status: 404 }); + } + + const now = new Date(); + const sevenDaysAgo = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000); + + const userMetrics = await presenter.getUserMetrics( + model.modelName, + project.id, + environment.id, + sevenDaysAgo, + now + ); + + return typedjson({ + model, + userMetrics, + organizationId: project.organizationId, + projectId: project.id, + environmentId: environment.id, + }); +}; + +/** Escape a value for safe interpolation into a TSQL single-quoted string. */ +function escapeTSQL(value: string): string { + return value.replace(/'/g, "''"); +} + +function bignumberConfig(column: string, opts?: { aggregation?: "sum" | "avg" | "first"; suffix?: string; abbreviate?: boolean }): QueryWidgetConfig { + return { type: "bignumber", column, aggregation: opts?.aggregation ?? "sum", abbreviate: opts?.abbreviate ?? false, suffix: opts?.suffix }; +} + +function chartConfig(opts: { chartType: "bar" | "line"; xAxisColumn: string; yAxisColumns: string[]; aggregation?: "sum" | "avg" }): QueryWidgetConfig { + return { type: "chart", chartType: opts.chartType, xAxisColumn: opts.xAxisColumn, yAxisColumns: opts.yAxisColumns, groupByColumn: null, stacked: false, sortByColumn: null, sortDirection: "asc", aggregation: opts.aggregation ?? "sum" }; +} + +type Tab = "overview" | "global" | "usage"; + +const TAB_CONFIG: { id: Tab; label: string }[] = [ + { id: "overview", label: "Overview" }, + { id: "global", label: "Global Metrics" }, + { id: "usage", label: "Your Usage" }, +]; + +export default function ModelDetailPage() { + const { model, userMetrics, organizationId, projectId, environmentId } = + useTypedLoaderData(); + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + const [activeTab, setActiveTab] = useState("overview"); + + return ( + + + + + + Compare with... + + + + + + {TAB_CONFIG.map((tab) => ( + setActiveTab(tab.id)} + > + {tab.label} + + ))} + + +
+ {activeTab === "overview" && } + {activeTab === "global" && ( + + )} + {activeTab === "usage" && ( + + )} +
+
+
+ ); +} + +// --- Cost Estimator --- + +function CostEstimator({ + inputPrice, + outputPrice, + defaultInputTokens, + defaultOutputTokens, +}: { + inputPrice: number | null; + outputPrice: number | null; + defaultInputTokens?: number; + defaultOutputTokens?: number; +}) { + const [inputTokens, setInputTokens] = useState(defaultInputTokens ?? 1000); + const [outputTokens, setOutputTokens] = useState(defaultOutputTokens ?? 500); + const [numCalls, setNumCalls] = useState(1000); + + if (inputPrice === null && outputPrice === null) return null; + + const inputCost = inputTokens * (inputPrice ?? 0) * numCalls; + const outputCost = outputTokens * (outputPrice ?? 0) * numCalls; + const totalCost = inputCost + outputCost; + + return ( +
+ Cost Estimator +
+
+
+ + setInputTokens(parseInt(e.target.value) || 0)} + /> +
+
+ + setOutputTokens(parseInt(e.target.value) || 0)} + /> +
+
+ + setNumCalls(parseInt(e.target.value) || 0)} + /> +
+
+ +
+ {formatModelCost(totalCost)} +
+
+
+ Input: {formatModelCost(inputCost)} ({formatTokenCount(inputTokens * numCalls)}{" "} + tokens x {formatModelPrice(inputPrice)}/1M) +
+
+ Output: {formatModelCost(outputCost)} ({formatTokenCount(outputTokens * numCalls)}{" "} + tokens x {formatModelPrice(outputPrice)}/1M) +
+
+
+
+
+ ); +} + +// --- Overview Tab --- + +function OverviewTab({ + model, + userMetrics, +}: { + model: ReturnType>["model"]; + userMetrics: ReturnType>["userMetrics"]; +}) { + return ( +
+
+ {/* Model Info */} +
+ Model Info + + + Provider + {formatProviderName(model.provider)} + + + Model Name + + {model.modelName} + + + {model.description && ( + + Description + {model.description} + + )} + {model.contextWindow && ( + + Context Window + + {formatTokenCount(model.contextWindow)} tokens + + + )} + {model.maxOutputTokens && ( + + Max Output + + {formatTokenCount(model.maxOutputTokens)} tokens + + + )} + {model.capabilities.length > 0 && ( + + Capabilities + +
+ {model.capabilities.map((cap) => ( + + {formatCapability(cap)} + + ))} +
+
+
+ )} + + Match Pattern + + {model.matchPattern} + + +
+
+ + {/* Pricing */} +
+ Pricing + + + Input + + {formatModelPrice(model.inputPrice)} / 1M tokens + + + + Output + + {formatModelPrice(model.outputPrice)} / 1M tokens + + + + {model.pricingTiers.length > 1 && ( +
+

All pricing tiers

+ {model.pricingTiers.map((tier) => ( +
+ {tier.name} + {tier.isDefault && ( + + default + + )} +
+ {Object.entries(tier.prices).map(([usage, price]) => ( +
+ {usage}: ${(price * 1_000_000).toFixed(4)} / 1M +
+ ))} +
+
+ ))} +
+ )} +
+
+ + {/* Cost Estimator */} + 0 + ? Math.round(userMetrics.totalInputTokens / userMetrics.totalCalls) + : undefined + } + defaultOutputTokens={ + userMetrics.totalCalls > 0 + ? Math.round(userMetrics.totalOutputTokens / userMetrics.totalCalls) + : undefined + } + /> +
+ ); +} + +// --- Global Metrics Tab --- + +function GlobalMetricsTab({ + modelName, + organizationId, + projectId, + environmentId, +}: { + modelName: string; + organizationId: string; + projectId: string; + environmentId: string; +}) { + const widgetProps = { + organizationId, + projectId, + environmentId, + scope: "environment" as const, + period: "7d", + from: null, + to: null, + }; + + return ( +
+ {/* Big numbers */} +
+
+ +
+
+ +
+
+ +
+
+ +
+
+ + {/* Charts */} +
+
+ +
+
+ +
+
+ + + Aggregated across all Trigger.dev users. No tenant-specific data is exposed. + +
+ ); +} + +// --- Your Usage Tab --- + +function YourUsageTab({ + modelName, + organizationId, + projectId, + environmentId, +}: { + modelName: string; + organizationId: string; + projectId: string; + environmentId: string; +}) { + const widgetProps = { + organizationId, + projectId, + environmentId, + scope: "environment" as const, + period: "7d", + from: null, + to: null, + }; + + return ( +
+ {/* Big numbers */} +
+
+ +
+
+ +
+
+ 0`} + config={bignumberConfig("avg_ttfc", { aggregation: "avg", suffix: "ms" })} + {...widgetProps} + /> +
+
+ 0`} + config={bignumberConfig("avg_tps", { aggregation: "avg" })} + {...widgetProps} + /> +
+
+ + {/* Charts */} +
+
+ +
+
+ +
+
+ + {/* Task breakdown */} +
+ +
+
+ ); +} diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx new file mode 100644 index 00000000000..e207e290882 --- /dev/null +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx @@ -0,0 +1,597 @@ +import { + AdjustmentsHorizontalIcon, + ListBulletIcon, + MagnifyingGlassIcon, + Squares2X2Icon, +} from "@heroicons/react/20/solid"; +import { Link, type MetaFunction, useNavigate } from "@remix-run/react"; +import { type LoaderFunctionArgs } from "@remix-run/server-runtime"; +import { useMemo, useState } from "react"; +import { typedjson, useTypedLoaderData } from "remix-typedjson"; +import { PageBody, PageContainer } from "~/components/layout/AppLayout"; +import { AppliedFilter } from "~/components/primitives/AppliedFilter"; +import { Badge } from "~/components/primitives/Badge"; +import { Button } from "~/components/primitives/Buttons"; +import { Checkbox } from "~/components/primitives/Checkbox"; +import { Header2 } from "~/components/primitives/Headers"; +import { Input } from "~/components/primitives/Input"; +import { NavBar, PageAccessories, PageTitle } from "~/components/primitives/PageHeader"; +import { + SelectProvider, + SelectTrigger, + SelectPopover, + SelectList, + SelectItem, +} from "~/components/primitives/Select"; +import { + Table, + TableBody, + TableCell, + TableHeader, + TableHeaderCell, + TableRow, +} from "~/components/primitives/Table"; +import { appliedSummary } from "~/components/runs/v3/SharedFilters"; +import { useSearchParams } from "~/hooks/useSearchParam"; +import { findProjectBySlug } from "~/models/project.server"; +import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; +import { + type ModelCatalogItem, + type PopularModel, + ModelRegistryPresenter, +} from "~/presenters/v3/ModelRegistryPresenter.server"; +import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { requireUserId } from "~/services/session.server"; +import { useEnvironment } from "~/hooks/useEnvironment"; +import { useOrganization } from "~/hooks/useOrganizations"; +import { useProject } from "~/hooks/useProject"; +import { + EnvironmentParamSchema, + v3ModelComparePath, + v3ModelDetailPath, +} from "~/utils/pathBuilder"; +import { + formatModelPrice, + formatTokenCount, + formatCapability, + formatProviderName, +} from "~/utils/modelFormatters"; +import { formatNumberCompact } from "~/utils/numberFormatter"; + +export const meta: MetaFunction = () => { + return [{ title: "Models | Trigger.dev" }]; +}; + +export const loader = async ({ request, params }: LoaderFunctionArgs) => { + const userId = await requireUserId(request); + const { organizationSlug, projectParam, envParam } = EnvironmentParamSchema.parse(params); + + const project = await findProjectBySlug(organizationSlug, projectParam, userId); + if (!project) { + throw new Response("Project not found", { status: 404 }); + } + + const environment = await findEnvironmentBySlug(project.id, envParam, userId); + if (!environment) { + throw new Response("Environment not found", { status: 404 }); + } + + const presenter = new ModelRegistryPresenter(clickhouseClient); + const catalog = await presenter.getModelCatalog(); + + const now = new Date(); + const sevenDaysAgo = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000); + const popularModels = await presenter.getPopularModels(sevenDaysAgo, now, 50); + + const allProviders = catalog.map((g) => g.provider); + const allCapabilities = Array.from( + new Set(catalog.flatMap((g) => g.models.flatMap((m) => m.capabilities))) + ).sort(); + + return typedjson({ catalog, popularModels, allProviders, allCapabilities }); +}; + +// --- Helpers --- + +const FEATURE_OPTIONS = [ + { value: "structuredOutput", label: "Structured Output" }, + { value: "parallelToolCalls", label: "Parallel Tool Calls" }, + { value: "streamingToolCalls", label: "Streaming Tool Calls" }, +] as const; + +type FeatureKey = (typeof FEATURE_OPTIONS)[number]["value"]; + +function modelMatchesFeature(model: ModelCatalogItem, feature: FeatureKey): boolean { + switch (feature) { + case "structuredOutput": + return model.supportsStructuredOutput; + case "parallelToolCalls": + return model.supportsParallelToolCalls; + case "streamingToolCalls": + return model.supportsStreamingToolCalls; + } +} + +// --- Filter Components --- + +function ProviderFilter({ providers }: { providers: string[] }) { + const { values, replace, del } = useSearchParams(); + const selected = values("providers"); + + return ( + <> + replace({ providers: v })}> + + {selected.length === 0 ? ( + + + Provider + + ) : null} + + + + {providers.map((p) => ( + + {formatProviderName(p)} + + ))} + + + + {selected.length > 0 && ( + del("providers")} + /> + )} + + ); +} + +function CapabilityFilter({ capabilities }: { capabilities: string[] }) { + const { values, replace, del } = useSearchParams(); + const selected = values("capabilities"); + + return ( + <> + replace({ capabilities: v })}> + + {selected.length === 0 ? ( + + + Capability + + ) : null} + + + + {capabilities.map((c) => ( + + {formatCapability(c)} + + ))} + + + + {selected.length > 0 && ( + del("capabilities")} + /> + )} + + ); +} + +function FeaturesFilter() { + const { values, replace, del } = useSearchParams(); + const selected = values("features"); + + return ( + <> + replace({ features: v })}> + + {selected.length === 0 ? ( + + + Features + + ) : null} + + + + {FEATURE_OPTIONS.map((f) => ( + + {f.label} + + ))} + + + + {selected.length > 0 && ( + FEATURE_OPTIONS.find((f) => f.value === s)?.label ?? s) + )! + } + onRemove={() => del("features")} + /> + )} + + ); +} + +// --- Model Card --- + +function ModelCard({ + model, + popular, + onToggleCompare, + isSelected, +}: { + model: ModelCatalogItem; + popular?: PopularModel; + onToggleCompare: (modelName: string) => void; + isSelected: boolean; +}) { + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + const detailPath = v3ModelDetailPath(organization, project, environment, model.friendlyId); + + return ( +
+
e.stopPropagation()}> + onToggleCompare(model.modelName)} + title="Select for comparison" + /> +
+ + + {model.displayId} + + + {model.description && ( +

{model.description}

+ )} + +
+ + {formatModelPrice(model.inputPrice)}/1M in + + + {formatModelPrice(model.outputPrice)}/1M out + + {model.contextWindow && ( + {formatTokenCount(model.contextWindow)} ctx + )} +
+ + {model.capabilities.length > 0 && ( +
+ {model.capabilities.map((cap) => ( + + {formatCapability(cap)} + + ))} +
+ )} + +
+ {popular && popular.callCount > 0 && ( + {formatNumberCompact(popular.callCount)} calls (7d) + )} + {popular && popular.ttfcP50 > 0 && ( + {popular.ttfcP50.toFixed(0)}ms TTFC + )} +
+ + {model.variants.length > 0 && } +
+ ); +} + +function VariantsDropdown({ variants }: { variants: ModelCatalogItem["variants"] }) { + const [isOpen, setIsOpen] = useState(false); + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + + return ( +
+ + {isOpen && ( +
+ {variants.map((v) => ( + + {v.modelName} + {v.releaseDate && ( + {v.releaseDate} + )} + + ))} +
+ )} +
+ ); +} + +// --- Models Table --- + +function ModelsTable({ + models, + popularMap, + compareSet, + onToggleCompare, +}: { + models: ModelCatalogItem[]; + popularMap: Map; + compareSet: Set; + onToggleCompare: (modelName: string) => void; +}) { + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + + return ( + + + + + Model + Provider + Input $/1M + Output $/1M + Context + p50 TTFC + Calls (7d) + + + + {models.map((model) => { + const path = v3ModelDetailPath(organization, project, environment, model.friendlyId); + const popular = popularMap.get(model.modelName); + return ( + + + onToggleCompare(model.modelName)} + /> + + + {model.displayId} + + {formatProviderName(model.provider)} + + {formatModelPrice(model.inputPrice)} + + + {formatModelPrice(model.outputPrice)} + + + {formatTokenCount(model.contextWindow)} + + + {popular && popular.ttfcP50 > 0 ? `${popular.ttfcP50.toFixed(0)}ms` : "—"} + + + {popular && popular.callCount > 0 + ? formatNumberCompact(popular.callCount) + : "—"} + + + ); + })} + +
+ ); +} + +// --- Main Page --- + +export default function ModelsPage() { + const { catalog, popularModels, allProviders, allCapabilities } = + useTypedLoaderData(); + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + const navigate = useNavigate(); + const searchParams = useSearchParams(); + + const view = searchParams.value("view") ?? "cards"; + const search = searchParams.value("search") ?? ""; + const selectedProviders = searchParams.values("providers"); + const selectedCapabilities = searchParams.values("capabilities"); + const selectedFeatures = searchParams.values("features") as FeatureKey[]; + const [compareSet, setCompareSet] = useState>(new Set()); + + const popularMap = useMemo(() => { + const map = new Map(); + for (const m of popularModels) { + // Index by raw response_model + map.set(m.responseModel, m); + // Also index by model name without provider prefix (e.g. "openai/gpt-4o" → "gpt-4o") + if (m.responseModel.includes("/")) { + map.set(m.responseModel.split("/").slice(1).join("/"), m); + } + } + return map; + }, [popularModels]); + + const filteredCatalog = useMemo(() => { + return catalog + .map((group) => ({ + ...group, + models: group.models.filter((m) => { + if (search && !m.displayId.toLowerCase().includes(search.toLowerCase())) return false; + if (selectedProviders.length > 0 && !selectedProviders.includes(m.provider)) return false; + if ( + selectedCapabilities.length > 0 && + !selectedCapabilities.every((c) => m.capabilities.includes(c)) + ) + return false; + if ( + selectedFeatures.length > 0 && + !selectedFeatures.every((f) => modelMatchesFeature(m, f)) + ) + return false; + return true; + }), + })) + .filter((group) => group.models.length > 0); + }, [catalog, search, selectedProviders, selectedCapabilities, selectedFeatures]); + + const allFilteredModels = useMemo( + () => filteredCatalog.flatMap((g) => g.models), + [filteredCatalog] + ); + + const toggleCompare = (modelName: string) => { + setCompareSet((prev) => { + const next = new Set(prev); + if (next.has(modelName)) { + next.delete(modelName); + } else if (next.size < 4) { + next.add(modelName); + } + return next; + }); + }; + + const hasActiveFilters = + selectedProviders.length > 0 || + selectedCapabilities.length > 0 || + selectedFeatures.length > 0; + + return ( + + + + +
+
+ + searchParams.replace({ search: e.target.value || undefined })} + variant="small" + className="pl-8" + fullWidth={false} + /> +
+ +
+ + +
+
+
+
+ + {/* Filter bar */} +
+ + + + {hasActiveFilters && ( + + )} +
+ + {/* Compare bar */} + {compareSet.size >= 2 && ( +
+ {compareSet.size} models selected +
+ + +
+
+ )} + + {view === "cards" ? ( +
+ {filteredCatalog.map((group) => ( +
+ {formatProviderName(group.provider)} +
+ {group.models.map((model) => ( + + ))} +
+
+ ))} + {filteredCatalog.length === 0 && ( +

+ No models match your filters. +

+ )} +
+ ) : ( + + )} +
+
+ ); +} diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.compare/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.compare/route.tsx new file mode 100644 index 00000000000..661fb294268 --- /dev/null +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.compare/route.tsx @@ -0,0 +1,221 @@ +import { ArrowsRightLeftIcon } from "@heroicons/react/20/solid"; +import { type MetaFunction } from "@remix-run/react"; +import { type LoaderFunctionArgs } from "@remix-run/server-runtime"; +import { typedjson, useTypedLoaderData } from "remix-typedjson"; +import { MainCenteredContainer, PageBody, PageContainer } from "~/components/layout/AppLayout"; +import { InfoPanel } from "~/components/primitives/InfoPanel"; +import { LinkButton } from "~/components/primitives/Buttons"; +import { NavBar, PageTitle } from "~/components/primitives/PageHeader"; +import { + Table, + TableBody, + TableCell, + TableHeader, + TableHeaderCell, + TableRow, +} from "~/components/primitives/Table"; +import { findProjectBySlug } from "~/models/project.server"; +import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; +import { + type ModelComparisonItem, + ModelRegistryPresenter, +} from "~/presenters/v3/ModelRegistryPresenter.server"; +import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { requireUserId } from "~/services/session.server"; +import { useOrganization } from "~/hooks/useOrganizations"; +import { useProject } from "~/hooks/useProject"; +import { useEnvironment } from "~/hooks/useEnvironment"; +import { EnvironmentParamSchema, v3ModelsPath } from "~/utils/pathBuilder"; +import { formatModelCost } from "~/utils/modelFormatters"; +import { formatNumberCompact } from "~/utils/numberFormatter"; + +export const meta: MetaFunction = () => { + return [{ title: "Compare Models | Trigger.dev" }]; +}; + +export const loader = async ({ request, params }: LoaderFunctionArgs) => { + const userId = await requireUserId(request); + const { organizationSlug, projectParam, envParam } = EnvironmentParamSchema.parse(params); + + const project = await findProjectBySlug(organizationSlug, projectParam, userId); + if (!project) { + throw new Response("Project not found", { status: 404 }); + } + + const environment = await findEnvironmentBySlug(project.id, envParam, userId); + if (!environment) { + throw new Response("Environment not found", { status: 404 }); + } + + const url = new URL(request.url); + const modelsParam = url.searchParams.get("models") ?? ""; + const responseModels = modelsParam.split(",").filter(Boolean).slice(0, 4); + + if (responseModels.length < 2) { + return typedjson({ comparison: [] as ModelComparisonItem[], models: responseModels }); + } + + const presenter = new ModelRegistryPresenter(clickhouseClient); + const now = new Date(); + const sevenDaysAgo = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000); + + const comparison = await presenter.getModelComparison(responseModels, sevenDaysAgo, now); + + return typedjson({ comparison, models: responseModels }); +}; + +type ComparisonRow = { + label: string; + values: string[]; + bestIndex?: number; +}; + +function buildRows(models: string[], comparison: ModelComparisonItem[]): ComparisonRow[] { + const dataMap = new Map(); + for (const item of comparison) { + dataMap.set(item.responseModel, item); + } + + const getValue = (model: string, key: keyof ModelComparisonItem) => { + const d = dataMap.get(model); + return d ? d[key] : 0; + }; + + const findBest = (values: number[], lowerIsBetter: boolean) => { + if (values.every((v) => v === 0)) return undefined; + const filtered = values.map((v, i) => ({ v, i })).filter(({ v }) => v > 0); + if (filtered.length === 0) return undefined; + const best = lowerIsBetter + ? filtered.reduce((a, b) => (a.v < b.v ? a : b)) + : filtered.reduce((a, b) => (a.v > b.v ? a : b)); + return best.i; + }; + + const callValues = models.map((m) => Number(getValue(m, "callCount"))); + const ttfcP50Values = models.map((m) => Number(getValue(m, "ttfcP50"))); + const ttfcP90Values = models.map((m) => Number(getValue(m, "ttfcP90"))); + const tpsP50Values = models.map((m) => Number(getValue(m, "tpsP50"))); + const tpsP90Values = models.map((m) => Number(getValue(m, "tpsP90"))); + const costValues = models.map((m) => Number(getValue(m, "totalCost"))); + + return [ + { + label: "Provider", + values: models.map((m) => dataMap.get(m)?.genAiSystem ?? "—"), + }, + { + label: "Total Calls (7d)", + values: callValues.map((v) => formatNumberCompact(v)), + bestIndex: findBest(callValues, false), + }, + { + label: "p50 TTFC", + values: ttfcP50Values.map((v) => (v > 0 ? `${v.toFixed(0)}ms` : "—")), + bestIndex: findBest(ttfcP50Values, true), + }, + { + label: "p90 TTFC", + values: ttfcP90Values.map((v) => (v > 0 ? `${v.toFixed(0)}ms` : "—")), + bestIndex: findBest(ttfcP90Values, true), + }, + { + label: "Tokens/sec (p50)", + values: tpsP50Values.map((v) => (v > 0 ? v.toFixed(0) : "—")), + bestIndex: findBest(tpsP50Values, false), + }, + { + label: "Tokens/sec (p90)", + values: tpsP90Values.map((v) => (v > 0 ? v.toFixed(0) : "—")), + bestIndex: findBest(tpsP90Values, false), + }, + { + label: "Total Cost (7d)", + values: costValues.map((v) => (v > 0 ? formatModelCost(v) : "—")), + bestIndex: findBest(costValues, true), + }, + ]; +} + +export default function ModelComparePage() { + const { comparison, models } = useTypedLoaderData(); + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + + const rows = buildRows(models, comparison); + + return ( + + + + + + {models.length < 2 ? ( + + +

+ Select 2-4 models from the catalog to compare their pricing, capabilities, and + performance metrics side by side. +

+ + Browse models + +
+
+ ) : ( +
+ + + + Metric + {models.map((model) => ( + + {model} + + ))} + + + + {rows.map((row) => ( + + + {row.label} + + {row.values.map((value, i) => ( + + + {value} + + + ))} + + ))} + +
+
+ )} +
+
+ ); +} diff --git a/apps/webapp/app/routes/admin.api.v1.llm-models.$modelId.ts b/apps/webapp/app/routes/admin.api.v1.llm-models.$modelId.ts index 4e8357c886c..2556dc8267f 100644 --- a/apps/webapp/app/routes/admin.api.v1.llm-models.$modelId.ts +++ b/apps/webapp/app/routes/admin.api.v1.llm-models.$modelId.ts @@ -41,6 +41,12 @@ const UpdateModelSchema = z.object({ modelName: z.string().min(1).optional(), matchPattern: z.string().min(1).optional(), startDate: z.string().nullable().optional(), + provider: z.string().nullable().optional(), + description: z.string().nullable().optional(), + contextWindow: z.number().int().nullable().optional(), + maxOutputTokens: z.number().int().nullable().optional(), + capabilities: z.array(z.string()).optional(), + isHidden: z.boolean().optional(), pricingTiers: z .array( z.object({ @@ -94,7 +100,7 @@ export async function action({ request, params }: ActionFunctionArgs) { return json({ error: "Invalid request body", details: parsed.error.issues }, { status: 400 }); } - const { modelName, matchPattern, startDate, pricingTiers } = parsed.data; + const { modelName, matchPattern, startDate, pricingTiers, provider, description, contextWindow, maxOutputTokens, capabilities, isHidden } = parsed.data; // Validate regex if provided — strip (?i) POSIX flag since our registry handles it if (matchPattern) { @@ -114,6 +120,12 @@ export async function action({ request, params }: ActionFunctionArgs) { ...(modelName !== undefined && { modelName }), ...(matchPattern !== undefined && { matchPattern }), ...(startDate !== undefined && { startDate: startDate ? new Date(startDate) : null }), + ...(provider !== undefined && { provider }), + ...(description !== undefined && { description }), + ...(contextWindow !== undefined && { contextWindow }), + ...(maxOutputTokens !== undefined && { maxOutputTokens }), + ...(capabilities !== undefined && { capabilities }), + ...(isHidden !== undefined && { isHidden }), }, }); diff --git a/apps/webapp/app/routes/admin.api.v1.llm-models.seed.ts b/apps/webapp/app/routes/admin.api.v1.llm-models.seed.ts index 805f97ad233..32e780d9fb9 100644 --- a/apps/webapp/app/routes/admin.api.v1.llm-models.seed.ts +++ b/apps/webapp/app/routes/admin.api.v1.llm-models.seed.ts @@ -1,5 +1,5 @@ import { type ActionFunctionArgs, json } from "@remix-run/server-runtime"; -import { seedLlmPricing } from "@internal/llm-pricing"; +import { seedLlmPricing, syncLlmCatalog } from "@internal/llm-model-catalog"; import { prisma } from "~/db.server"; import { authenticateApiRequestWithPersonalAccessToken } from "~/services/personalAccessToken.server"; import { llmPricingRegistry } from "~/v3/llmPricingRegistry.server"; @@ -15,9 +15,26 @@ export async function action({ request }: ActionFunctionArgs) { return json({ error: "You must be an admin to perform this action" }, { status: 403 }); } + const url = new URL(request.url); + const action = url.searchParams.get("action") ?? "seed"; + + if (action === "sync") { + const result = await syncLlmCatalog(prisma); + + if (llmPricingRegistry) { + await llmPricingRegistry.reload(); + } + + return json({ + success: true, + ...result, + message: `Synced ${result.modelsUpdated} models, skipped ${result.modelsSkipped}`, + }); + } + + // Default: seed (creates new + syncs existing) const result = await seedLlmPricing(prisma); - // Reload the in-memory registry after seeding (if enabled) if (llmPricingRegistry) { await llmPricingRegistry.reload(); } @@ -25,6 +42,6 @@ export async function action({ request }: ActionFunctionArgs) { return json({ success: true, ...result, - message: `Seeded ${result.modelsCreated} models, skipped ${result.modelsSkipped} existing`, + message: `Seeded ${result.modelsCreated} created, ${result.modelsSkipped} skipped, ${result.modelsUpdated} updated`, }); } diff --git a/apps/webapp/app/routes/admin.api.v1.llm-models.ts b/apps/webapp/app/routes/admin.api.v1.llm-models.ts index 6305869c605..4e3cc39f47a 100644 --- a/apps/webapp/app/routes/admin.api.v1.llm-models.ts +++ b/apps/webapp/app/routes/admin.api.v1.llm-models.ts @@ -49,6 +49,12 @@ const CreateModelSchema = z.object({ matchPattern: z.string().min(1), startDate: z.string().optional(), source: z.enum(["default", "admin"]).optional().default("admin"), + provider: z.string().optional(), + description: z.string().optional(), + contextWindow: z.number().int().optional(), + maxOutputTokens: z.number().int().optional(), + capabilities: z.array(z.string()).optional(), + isHidden: z.boolean().optional(), pricingTiers: z.array( z.object({ name: z.string().min(1), @@ -88,7 +94,7 @@ export async function action({ request }: ActionFunctionArgs) { return json({ error: "Invalid request body", details: parsed.error.issues }, { status: 400 }); } - const { modelName, matchPattern, startDate, source, pricingTiers } = parsed.data; + const { modelName, matchPattern, startDate, source, pricingTiers, provider, description, contextWindow, maxOutputTokens, capabilities, isHidden } = parsed.data; // Validate regex pattern — strip (?i) POSIX flag since our registry handles it try { @@ -107,6 +113,12 @@ export async function action({ request }: ActionFunctionArgs) { matchPattern, startDate: startDate ? new Date(startDate) : null, source, + provider: provider ?? null, + description: description ?? null, + contextWindow: contextWindow ?? null, + maxOutputTokens: maxOutputTokens ?? null, + capabilities: capabilities ?? [], + isHidden: isHidden ?? false, }, }); diff --git a/apps/webapp/app/routes/admin.llm-models.$modelId.tsx b/apps/webapp/app/routes/admin.llm-models.$modelId.tsx index e37491a1b4f..60c182d2274 100644 --- a/apps/webapp/app/routes/admin.llm-models.$modelId.tsx +++ b/apps/webapp/app/routes/admin.llm-models.$modelId.tsx @@ -41,6 +41,12 @@ const SaveSchema = z.object({ modelName: z.string().min(1), matchPattern: z.string().min(1), pricingTiersJson: z.string(), + provider: z.string().optional(), + description: z.string().optional(), + contextWindow: z.string().optional(), + maxOutputTokens: z.string().optional(), + capabilities: z.string().optional(), + isHidden: z.string().optional(), }); export async function action({ request, params }: ActionFunctionArgs) { @@ -95,9 +101,19 @@ export async function action({ request, params }: ActionFunctionArgs) { } // Update model + const { provider, description, contextWindow, maxOutputTokens, capabilities, isHidden } = parsed.data; await prisma.llmModel.update({ where: { id: modelId }, - data: { modelName, matchPattern }, + data: { + modelName, + matchPattern, + provider: provider || null, + description: description || null, + contextWindow: contextWindow ? parseInt(contextWindow) || null : null, + maxOutputTokens: maxOutputTokens ? parseInt(maxOutputTokens) || null : null, + capabilities: capabilities ? capabilities.split(",").map((s) => s.trim()).filter(Boolean) : [], + isHidden: isHidden === "on", + }, }); // Replace tiers @@ -135,6 +151,12 @@ export default function AdminLlmModelDetailRoute() { const [modelName, setModelName] = useState(model.modelName); const [matchPattern, setMatchPattern] = useState(model.matchPattern); + const [provider, setProvider] = useState(model.provider ?? ""); + const [description, setDescription] = useState(model.description ?? ""); + const [contextWindow, setContextWindow] = useState(model.contextWindow?.toString() ?? ""); + const [maxOutputTokens, setMaxOutputTokens] = useState(model.maxOutputTokens?.toString() ?? ""); + const [capabilities, setCapabilities] = useState(model.capabilities?.join(", ") ?? ""); + const [isHidden, setIsHidden] = useState(model.isHidden ?? false); const [testInput, setTestInput] = useState(""); const [tiers, setTiers] = useState(() => model.pricingTiers.map((t) => ({ @@ -236,6 +258,83 @@ export default function AdminLlmModelDetailRoute() { + {/* Catalog metadata */} +
+ + +
+
+ + setProvider(e.target.value)} + variant="medium" + fullWidth + placeholder="openai, anthropic, google" + /> +
+
+ + setContextWindow(e.target.value)} + variant="medium" + fullWidth + placeholder="128000" + /> +
+
+ +
+ + setDescription(e.target.value)} + variant="medium" + fullWidth + placeholder="Brief model description" + /> +
+ +
+
+ + setMaxOutputTokens(e.target.value)} + variant="medium" + fullWidth + placeholder="16384" + /> +
+
+ + setCapabilities(e.target.value)} + variant="medium" + fullWidth + placeholder="vision, tool_use, streaming, json_mode" + /> +
+
+ + +
+ {/* Pricing tiers */}
diff --git a/apps/webapp/app/routes/admin.llm-models._index.tsx b/apps/webapp/app/routes/admin.llm-models._index.tsx index fb2f6fdc491..ea2eff72541 100644 --- a/apps/webapp/app/routes/admin.llm-models._index.tsx +++ b/apps/webapp/app/routes/admin.llm-models._index.tsx @@ -20,7 +20,7 @@ import { import { prisma } from "~/db.server"; import { requireUserId } from "~/services/session.server"; import { createSearchParams } from "~/utils/searchParams"; -import { seedLlmPricing } from "@internal/llm-pricing"; +import { seedLlmPricing, syncLlmCatalog } from "@internal/llm-model-catalog"; import { llmPricingRegistry } from "~/v3/llmPricingRegistry.server"; const PAGE_SIZE = 50; @@ -87,12 +87,24 @@ export async function action({ request }: ActionFunctionArgs) { if (_action === "seed") { console.log("[admin] seed action started"); const result = await seedLlmPricing(prisma); - console.log(`[admin] seed complete: ${result.modelsCreated} created, ${result.modelsSkipped} skipped`); + console.log(`[admin] seed complete: ${result.modelsCreated} created, ${result.modelsSkipped} skipped, ${result.modelsUpdated} updated`); await llmPricingRegistry?.reload(); console.log("[admin] registry reloaded after seed"); return typedjson({ success: true, - message: `Seeded: ${result.modelsCreated} created, ${result.modelsSkipped} skipped`, + message: `Seeded: ${result.modelsCreated} created, ${result.modelsSkipped} skipped, ${result.modelsUpdated} updated`, + }); + } + + if (_action === "sync") { + console.log("[admin] sync catalog action started"); + const result = await syncLlmCatalog(prisma); + console.log(`[admin] sync complete: ${result.modelsUpdated} updated, ${result.modelsSkipped} skipped`); + await llmPricingRegistry?.reload(); + console.log("[admin] registry reloaded after sync"); + return typedjson({ + success: true, + message: `Synced: ${result.modelsUpdated} updated, ${result.modelsSkipped} skipped`, }); } @@ -138,6 +150,7 @@ export default function AdminLlmModelsRoute() { const { models, filters, page, pageCount, total } = useTypedLoaderData(); const seedFetcher = useFetcher(); + const syncFetcher = useFetcher(); const reloadFetcher = useFetcher(); const testFetcher = useFetcher<{ testResult?: { @@ -179,6 +192,17 @@ export default function AdminLlmModelsRoute() { + + + + +
+ {/* Catalog metadata */} +
+ + +
+
+ + setProvider(e.target.value)} + variant="medium" + fullWidth + placeholder="openai, anthropic, google" + /> +
+
+ + setContextWindow(e.target.value)} + variant="medium" + fullWidth + placeholder="128000" + /> +
+
+ +
+ + setDescription(e.target.value)} + variant="medium" + fullWidth + placeholder="Brief model description" + /> +
+ +
+
+ + setMaxOutputTokens(e.target.value)} + variant="medium" + fullWidth + placeholder="16384" + /> +
+
+ + setCapabilities(e.target.value)} + variant="medium" + fullWidth + placeholder="vision, tool_use, streaming, json_mode" + /> +
+
+ + +
+ {/* Pricing tiers */}
diff --git a/apps/webapp/app/services/queryService.server.ts b/apps/webapp/app/services/queryService.server.ts index d6397764eb6..1f3bdbba18a 100644 --- a/apps/webapp/app/services/queryService.server.ts +++ b/apps/webapp/app/services/queryService.server.ts @@ -213,11 +213,20 @@ export async function executeQuery( const periodClipped = requestedFromDate !== null && requestedFromDate < maxQueryPeriodDate; // Force tenant isolation and time period limits + // Global tables (no tenantColumns) skip tenant isolation — they contain anonymized cross-tenant data + const isGlobalTable = matchedSchema != null && !matchedSchema.tenantColumns; const enforcedWhereClause = { - organization_id: { op: "eq", value: organizationId }, - project_id: - scope === "project" || scope === "environment" ? { op: "eq", value: projectId } : undefined, - environment_id: scope === "environment" ? { op: "eq", value: environmentId } : undefined, + ...(isGlobalTable + ? {} + : { + organization_id: { op: "eq", value: organizationId }, + project_id: + scope === "project" || scope === "environment" + ? { op: "eq", value: projectId } + : undefined, + environment_id: + scope === "environment" ? { op: "eq", value: environmentId } : undefined, + }), [timeColumn]: { op: "gte", value: maxQueryPeriodDate }, // Optional filters for tasks and queues task_identifier: diff --git a/apps/webapp/app/utils/modelFormatters.ts b/apps/webapp/app/utils/modelFormatters.ts new file mode 100644 index 00000000000..0070c31dad2 --- /dev/null +++ b/apps/webapp/app/utils/modelFormatters.ts @@ -0,0 +1,53 @@ +import { formatNumberCompact } from "./numberFormatter"; + +/** Format a per-token price as $/1M tokens. */ +export function formatModelPrice(pricePerToken: number | null): string { + if (pricePerToken === null) return "—"; + const perMillion = pricePerToken * 1_000_000; + if (perMillion < 0.01) return `$${perMillion.toFixed(4)}`; + if (perMillion < 1) return `$${perMillion.toFixed(3)}`; + return `$${perMillion.toFixed(2)}`; +} + +/** Format a token count (context window, max output). */ +export function formatTokenCount(tokens: number | null): string { + if (tokens === null) return "—"; + return formatNumberCompact(tokens); +} + +/** Format a dollar cost value. */ +export function formatModelCost(dollars: number): string { + if (dollars === 0) return "$0"; + if (dollars < 0.01) return `$${dollars.toFixed(4)}`; + if (dollars < 1) return `$${dollars.toFixed(3)}`; + return `$${dollars.toFixed(2)}`; +} + +/** Format a capability slug from SCREAMING_CASE to Title Case. */ +export function formatCapability(cap: string): string { + return cap + .toLowerCase() + .split("_") + .map((word) => word.charAt(0).toUpperCase() + word.slice(1)) + .join(" "); +} + +/** Capitalize a provider name. */ +export function formatProviderName(provider: string): string { + const names: Record = { + openai: "OpenAI", + anthropic: "Anthropic", + google: "Google", + meta: "Meta", + mistral: "Mistral", + cohere: "Cohere", + ai21: "AI21", + amazon: "Amazon", + xai: "xAI", + deepseek: "DeepSeek", + qwen: "Qwen", + perplexity: "Perplexity", + nous: "Nous", + }; + return names[provider.toLowerCase()] ?? provider.charAt(0).toUpperCase() + provider.slice(1); +} diff --git a/apps/webapp/app/utils/pathBuilder.ts b/apps/webapp/app/utils/pathBuilder.ts index 038ee8597ea..cc3d0fba0fe 100644 --- a/apps/webapp/app/utils/pathBuilder.ts +++ b/apps/webapp/app/utils/pathBuilder.ts @@ -551,6 +551,31 @@ export function v3PromptPath( return version != null ? `${base}?version=${version}` : base; } +export function v3ModelsPath( + organization: OrgForPath, + project: ProjectForPath, + environment: EnvironmentForPath +) { + return `${v3EnvironmentPath(organization, project, environment)}/models`; +} + +export function v3ModelDetailPath( + organization: OrgForPath, + project: ProjectForPath, + environment: EnvironmentForPath, + modelId: string +) { + return `${v3ModelsPath(organization, project, environment)}/${modelId}`; +} + +export function v3ModelComparePath( + organization: OrgForPath, + project: ProjectForPath, + environment: EnvironmentForPath +) { + return `${v3ModelsPath(organization, project, environment)}/compare`; +} + export function v3ErrorsPath( organization: OrgForPath, project: ProjectForPath, diff --git a/apps/webapp/app/v3/canAccessAi.server.ts b/apps/webapp/app/v3/canAccessAi.server.ts new file mode 100644 index 00000000000..d7957c4dcc7 --- /dev/null +++ b/apps/webapp/app/v3/canAccessAi.server.ts @@ -0,0 +1,46 @@ +import { prisma } from "~/db.server"; +import { env } from "~/env.server"; +import { FEATURE_FLAG, makeFlag } from "~/v3/featureFlags.server"; + +export async function canAccessAi(options: { + userId: string; + isAdmin: boolean; + isImpersonating: boolean; + organizationSlug: string; +}): Promise { + const { userId, isAdmin, isImpersonating, organizationSlug } = options; + + // 1. If env var is set then globally enabled + if (env.AI_FEATURES_ENABLED === "1") { + return true; + } + + // 2. Admins always have access + if (isAdmin || isImpersonating) { + return true; + } + + // 3. Check if org/global feature flag is on + const org = await prisma.organization.findFirst({ + where: { + slug: organizationSlug, + members: { some: { userId } }, + }, + select: { + featureFlags: true, + }, + }); + + const flag = makeFlag(); + const flagResult = await flag({ + key: FEATURE_FLAG.hasAiAccess, + defaultValue: false, + overrides: (org?.featureFlags as Record) ?? {}, + }); + if (flagResult) { + return true; + } + + // 4. Not enabled anywhere + return false; +} diff --git a/apps/webapp/app/v3/canAccessAiModels.server.ts b/apps/webapp/app/v3/canAccessAiModels.server.ts new file mode 100644 index 00000000000..a3a489ce881 --- /dev/null +++ b/apps/webapp/app/v3/canAccessAiModels.server.ts @@ -0,0 +1,46 @@ +import { prisma } from "~/db.server"; +import { env } from "~/env.server"; +import { FEATURE_FLAG, makeFlag } from "~/v3/featureFlags.server"; + +export async function canAccessAiModels(options: { + userId: string; + isAdmin: boolean; + isImpersonating: boolean; + organizationSlug: string; +}): Promise { + const { userId, isAdmin, isImpersonating, organizationSlug } = options; + + // 1. If env var is set then globally enabled + if (env.AI_MODELS_ENABLED === "1") { + return true; + } + + // 2. Admins always have access + if (isAdmin || isImpersonating) { + return true; + } + + // 3. Check if org/global feature flag is on + const org = await prisma.organization.findFirst({ + where: { + slug: organizationSlug, + members: { some: { userId } }, + }, + select: { + featureFlags: true, + }, + }); + + const flag = makeFlag(); + const flagResult = await flag({ + key: FEATURE_FLAG.hasAiModelsAccess, + defaultValue: false, + overrides: (org?.featureFlags as Record) ?? {}, + }); + if (flagResult) { + return true; + } + + // 4. Not enabled anywhere + return false; +} diff --git a/apps/webapp/app/v3/eventRepository/clickhouseEventRepository.server.ts b/apps/webapp/app/v3/eventRepository/clickhouseEventRepository.server.ts index 72f8f3baa34..e598a17fdcc 100644 --- a/apps/webapp/app/v3/eventRepository/clickhouseEventRepository.server.ts +++ b/apps/webapp/app/v3/eventRepository/clickhouseEventRepository.server.ts @@ -266,6 +266,7 @@ export class ClickhouseEventRepository implements IEventRepository { gen_ai_system: llmMetrics.genAiSystem, request_model: llmMetrics.requestModel, response_model: llmMetrics.responseModel, + base_response_model: llmMetrics.baseResponseModel, matched_model_id: llmMetrics.matchedModelId, operation_id: llmMetrics.operationId, finish_reason: llmMetrics.finishReason, diff --git a/apps/webapp/app/v3/eventRepository/eventRepository.types.ts b/apps/webapp/app/v3/eventRepository/eventRepository.types.ts index bd87c370a03..0b45e536490 100644 --- a/apps/webapp/app/v3/eventRepository/eventRepository.types.ts +++ b/apps/webapp/app/v3/eventRepository/eventRepository.types.ts @@ -25,6 +25,7 @@ export type LlmMetricsData = { genAiSystem: string; requestModel: string; responseModel: string; + baseResponseModel: string; matchedModelId: string; operationId: string; finishReason: string; diff --git a/apps/webapp/app/v3/featureFlags.server.ts b/apps/webapp/app/v3/featureFlags.server.ts index e889b2123d2..f32f34c64b8 100644 --- a/apps/webapp/app/v3/featureFlags.server.ts +++ b/apps/webapp/app/v3/featureFlags.server.ts @@ -7,6 +7,8 @@ export const FEATURE_FLAG = { taskEventRepository: "taskEventRepository", hasQueryAccess: "hasQueryAccess", hasLogsPageAccess: "hasLogsPageAccess", + hasAiAccess: "hasAiAccess", + hasAiModelsAccess: "hasAiModelsAccess", } as const; const FeatureFlagCatalog = { @@ -15,6 +17,8 @@ const FeatureFlagCatalog = { [FEATURE_FLAG.taskEventRepository]: z.enum(["clickhouse", "clickhouse_v2", "postgres"]), [FEATURE_FLAG.hasQueryAccess]: z.coerce.boolean(), [FEATURE_FLAG.hasLogsPageAccess]: z.coerce.boolean(), + [FEATURE_FLAG.hasAiAccess]: z.coerce.boolean(), + [FEATURE_FLAG.hasAiModelsAccess]: z.coerce.boolean(), }; type FeatureFlagKey = keyof typeof FeatureFlagCatalog; diff --git a/apps/webapp/app/v3/llmPricingRegistry.server.ts b/apps/webapp/app/v3/llmPricingRegistry.server.ts index 627609bb1d8..2212c41779d 100644 --- a/apps/webapp/app/v3/llmPricingRegistry.server.ts +++ b/apps/webapp/app/v3/llmPricingRegistry.server.ts @@ -1,4 +1,4 @@ -import { ModelPricingRegistry, seedLlmPricing } from "@internal/llm-pricing"; +import { ModelPricingRegistry, seedLlmPricing } from "@internal/llm-model-catalog"; import { prisma, $replica } from "~/db.server"; import { env } from "~/env.server"; import { signalsEmitter } from "~/services/signals.server"; diff --git a/apps/webapp/app/v3/querySchemas.ts b/apps/webapp/app/v3/querySchemas.ts index e380459f717..1d2c5467742 100644 --- a/apps/webapp/app/v3/querySchemas.ts +++ b/apps/webapp/app/v3/querySchemas.ts @@ -859,7 +859,111 @@ export const llmMetricsSchema: TableSchema = { }, }; -export const querySchemas: TableSchema[] = [runsSchema, metricsSchema, llmMetricsSchema]; +/** + * Schema definition for the llm_models table (trigger_dev.llm_model_aggregates_v1) + * Global table — no tenant columns. Contains anonymized cross-tenant model performance data. + */ +export const llmModelsSchema: TableSchema = { + name: "llm_models", + clickhouseName: "trigger_dev.llm_model_aggregates_v1", + description: + "Cross-tenant model performance aggregates: calls, cost, latency, and throughput per model per minute. No tenant-specific data.", + timeConstraint: "minute", + // No tenantColumns — this is a global table with anonymized data + columns: { + response_model: { + name: "response_model", + ...column("String", { + description: "The model name as returned by the provider", + example: "gpt-4o-2024-08-06", + coreColumn: true, + }), + }, + base_response_model: { + name: "base_response_model", + ...column("String", { + description: "The base model name with dated variants grouped", + example: "gpt-4o", + coreColumn: true, + }), + }, + gen_ai_system: { + name: "gen_ai_system", + ...column("String", { + description: "The AI provider system identifier", + example: "openai.responses", + coreColumn: true, + }), + }, + minute: { + name: "minute", + ...column("DateTime", { + description: "Aggregation time bucket (per minute)", + coreColumn: true, + }), + }, + call_count: { + name: "call_count", + ...column("UInt64", { + description: "Number of LLM calls in this time bucket", + coreColumn: true, + }), + }, + total_input_tokens: { + name: "total_input_tokens", + ...column("UInt64", { + description: "Total input tokens consumed", + }), + }, + total_output_tokens: { + name: "total_output_tokens", + ...column("UInt64", { + description: "Total output tokens generated", + }), + }, + total_cost: { + name: "total_cost", + ...column("Float64", { + description: "Total cost in USD", + customRenderType: "costInDollars", + coreColumn: true, + }), + }, + // Aggregate state columns — use quantilesMerge() in queries to extract values + // Example: quantilesMerge(0.5)(ttfc_quantiles)[1] AS ttfc_p50 + ttfc_quantiles: { + name: "ttfc_quantiles", + ...column("String", { + description: + "Time to first chunk quantile state. Use quantilesMerge(0.5)(ttfc_quantiles)[1] AS ttfc_p50 in queries.", + example: "quantilesMerge(0.5)(ttfc_quantiles)[1]", + }), + }, + tps_quantiles: { + name: "tps_quantiles", + ...column("String", { + description: + "Tokens per second quantile state. Use quantilesMerge(0.5)(tps_quantiles)[1] AS tps_p50 in queries.", + example: "quantilesMerge(0.5)(tps_quantiles)[1]", + }), + }, + duration_quantiles: { + name: "duration_quantiles", + ...column("String", { + description: + "Duration quantile state. Use quantilesMerge(0.5)(duration_quantiles)[1] AS duration_p50 in queries.", + example: "quantilesMerge(0.5)(duration_quantiles)[1]", + }), + }, + }, +}; + +export const querySchemas: TableSchema[] = [ + runsSchema, + metricsSchema, + llmMetricsSchema, + llmModelsSchema, +]; /** * Default query for the query editor diff --git a/apps/webapp/app/v3/utils/enrichCreatableEvents.server.ts b/apps/webapp/app/v3/utils/enrichCreatableEvents.server.ts index 6b52e93469f..64382010496 100644 --- a/apps/webapp/app/v3/utils/enrichCreatableEvents.server.ts +++ b/apps/webapp/app/v3/utils/enrichCreatableEvents.server.ts @@ -1,6 +1,7 @@ +import { modelCatalog } from "@internal/llm-model-catalog"; import type { CreateEventInput, LlmMetricsData } from "../eventRepository/eventRepository.types"; -// Registry interface — matches ModelPricingRegistry from @internal/llm-pricing +// Registry interface — matches ModelPricingRegistry from @internal/llm-model-catalog type CostRegistry = { isLoaded: boolean; calculateCost( @@ -183,6 +184,7 @@ function enrichLlmMetrics(event: CreateEventInput): void { genAiSystem: typeof props["gen_ai.system"] === "string" ? props["gen_ai.system"] : "unknown", requestModel: typeof props["gen_ai.request.model"] === "string" ? props["gen_ai.request.model"] : responseModel, responseModel, + baseResponseModel: modelCatalog[responseModel]?.baseModelName ?? responseModel, matchedModelId: cost?.matchedModelId ?? "", operationId, finishReason, diff --git a/apps/webapp/package.json b/apps/webapp/package.json index a34b0cf7f90..3eafd1467fc 100644 --- a/apps/webapp/package.json +++ b/apps/webapp/package.json @@ -57,7 +57,7 @@ "@heroicons/react": "^2.0.12", "@jsonhero/schema-infer": "^0.1.5", "@internal/cache": "workspace:*", - "@internal/llm-pricing": "workspace:*", + "@internal/llm-model-catalog": "workspace:*", "@internal/redis": "workspace:*", "@internal/run-engine": "workspace:*", "@internal/schedule-engine": "workspace:*", diff --git a/apps/webapp/seed-ai-spans.mts b/apps/webapp/seed-ai-spans.mts index 35ec3d5851e..6ada9fb3285 100644 --- a/apps/webapp/seed-ai-spans.mts +++ b/apps/webapp/seed-ai-spans.mts @@ -11,7 +11,7 @@ import { enrichCreatableEvents, setLlmPricingRegistry, } from "./app/v3/utils/enrichCreatableEvents.server"; -import { ModelPricingRegistry, seedLlmPricing } from "@internal/llm-pricing"; +import { ModelPricingRegistry, seedLlmPricing } from "@internal/llm-model-catalog"; import { nanoid } from "nanoid"; import { unflattenAttributes } from "@trigger.dev/core/v3/utils/flattenAttributes"; import type { Attributes } from "@opentelemetry/api"; @@ -138,6 +138,7 @@ function eventToLlmMetricsRow(event: CreateEventInput): LlmMetricsV1Input { gen_ai_system: llm.genAiSystem, request_model: llm.requestModel, response_model: llm.responseModel, + base_response_model: llm.baseResponseModel, matched_model_id: llm.matchedModelId, operation_id: llm.operationId, finish_reason: llm.finishReason, @@ -156,6 +157,8 @@ function eventToLlmMetricsRow(event: CreateEventInput): LlmMetricsV1Input { ms_to_first_chunk: llm.msToFirstChunk, tokens_per_second: llm.tokensPerSecond, metadata: llm.metadata, + prompt_slug: llm.promptSlug, + prompt_version: llm.promptVersion, start_time: formatStartTime(BigInt(event.startTime)), duration: formatDuration(event.duration ?? 0), }; diff --git a/internal-packages/clickhouse/schema/026_add_base_response_model_to_llm_metrics_v1.sql b/internal-packages/clickhouse/schema/026_add_base_response_model_to_llm_metrics_v1.sql new file mode 100644 index 00000000000..3d23295b026 --- /dev/null +++ b/internal-packages/clickhouse/schema/026_add_base_response_model_to_llm_metrics_v1.sql @@ -0,0 +1,7 @@ +-- +goose Up +ALTER TABLE trigger_dev.llm_metrics_v1 + ADD COLUMN base_response_model String DEFAULT '' CODEC(ZSTD(1)); + +-- +goose Down +ALTER TABLE trigger_dev.llm_metrics_v1 + DROP COLUMN base_response_model; diff --git a/internal-packages/clickhouse/schema/027_create_llm_model_aggregates_v1.sql b/internal-packages/clickhouse/schema/027_create_llm_model_aggregates_v1.sql new file mode 100644 index 00000000000..3797f744bc2 --- /dev/null +++ b/internal-packages/clickhouse/schema/027_create_llm_model_aggregates_v1.sql @@ -0,0 +1,57 @@ +-- +goose Up + +-- Pre-aggregated model performance metrics with no tenant information. +-- Used for cross-tenant model comparisons in the Model Registry. +-- Aggregated per minute for high-resolution model performance tracking. +CREATE TABLE IF NOT EXISTS trigger_dev.llm_model_aggregates_v1 +( + response_model String, + base_response_model String DEFAULT '', + gen_ai_system LowCardinality(String), + minute DateTime, + + -- Counts & totals (SimpleAggregateFunction for sum) + call_count SimpleAggregateFunction(sum, UInt64), + total_input_tokens SimpleAggregateFunction(sum, UInt64), + total_output_tokens SimpleAggregateFunction(sum, UInt64), + total_cost SimpleAggregateFunction(sum, Float64), + + -- Performance quantiles (AggregateFunction for merge across parts) + ttfc_quantiles AggregateFunction(quantiles(0.5, 0.9, 0.95, 0.99), Float64), + tps_quantiles AggregateFunction(quantiles(0.5, 0.9, 0.95, 0.99), Float64), + duration_quantiles AggregateFunction(quantiles(0.5, 0.9, 0.95, 0.99), UInt64), + + -- Finish reason distribution + finish_reason_counts SimpleAggregateFunction(sumMap, Map(String, UInt64)) +) +ENGINE = AggregatingMergeTree +PARTITION BY toYYYYMM(minute) +ORDER BY (response_model, base_response_model, gen_ai_system, minute) +TTL toDate(minute) + INTERVAL 365 DAY +SETTINGS ttl_only_drop_parts = 1; + +-- Materialized view that feeds the aggregate table from llm_metrics_v1. +-- Strips all tenant-specific columns (org, project, env, run, span, trace). +-- base_response_model comes from the source table (populated during event enrichment). +CREATE MATERIALIZED VIEW IF NOT EXISTS trigger_dev.llm_model_aggregates_mv_v1 +TO trigger_dev.llm_model_aggregates_v1 +AS SELECT + response_model, + base_response_model, + gen_ai_system, + toStartOfMinute(start_time) AS minute, + count() AS call_count, + sum(input_tokens) AS total_input_tokens, + sum(output_tokens) AS total_output_tokens, + sum(total_cost) AS total_cost, + quantilesStateIf(0.5, 0.9, 0.95, 0.99)(ms_to_first_chunk, ms_to_first_chunk > 0) AS ttfc_quantiles, + quantilesStateIf(0.5, 0.9, 0.95, 0.99)(tokens_per_second, tokens_per_second > 0) AS tps_quantiles, + quantilesState(0.5, 0.9, 0.95, 0.99)(duration) AS duration_quantiles, + sumMap(map(finish_reason, toUInt64(1))) AS finish_reason_counts +FROM trigger_dev.llm_metrics_v1 +WHERE response_model != '' +GROUP BY response_model, base_response_model, gen_ai_system, minute; + +-- +goose Down +DROP TABLE IF EXISTS trigger_dev.llm_model_aggregates_mv_v1; +DROP TABLE IF EXISTS trigger_dev.llm_model_aggregates_v1; diff --git a/internal-packages/clickhouse/src/client/queryBuilder.ts b/internal-packages/clickhouse/src/client/queryBuilder.ts index dc0fb297cc9..fb0430fd0db 100644 --- a/internal-packages/clickhouse/src/client/queryBuilder.ts +++ b/internal-packages/clickhouse/src/client/queryBuilder.ts @@ -36,6 +36,12 @@ export class ClickhouseQueryBuilder { this.settings = settings; } + /** Set query parameters without adding a WHERE clause. Use for base queries with inline params. */ + setParams(params: QueryParams): this { + Object.assign(this.params, params); + return this; + } + where(clause: string, params?: QueryParams): this { this.whereClauses.push(clause); if (params) { diff --git a/internal-packages/clickhouse/src/index.ts b/internal-packages/clickhouse/src/index.ts index 18e52483627..99d22a5a18e 100644 --- a/internal-packages/clickhouse/src/index.ts +++ b/internal-packages/clickhouse/src/index.ts @@ -28,6 +28,11 @@ import { } from "./taskEvents.js"; import { insertMetrics } from "./metrics.js"; import { insertLlmMetrics } from "./llmMetrics.js"; +import { + getGlobalModelMetrics, + getGlobalModelComparison, + getPopularModels, +} from "./llmModelAggregates.js"; import { getErrorGroups, getErrorInstances, @@ -46,6 +51,7 @@ export type * from "./taskRuns.js"; export type * from "./taskEvents.js"; export type * from "./metrics.js"; export type * from "./llmMetrics.js"; +export type * from "./llmModelAggregates.js"; export type * from "./errors.js"; export type * from "./client/queryBuilder.js"; @@ -233,6 +239,14 @@ export class ClickHouse { }; } + get llmModelAggregates() { + return { + globalMetrics: getGlobalModelMetrics(this.reader), + comparison: getGlobalModelComparison(this.reader), + popular: getPopularModels(this.reader), + }; + } + get taskEventsV2() { return { insert: insertTaskEventsV2(this.writer), diff --git a/internal-packages/clickhouse/src/llmMetrics.ts b/internal-packages/clickhouse/src/llmMetrics.ts index 200979f7b61..ba064df125f 100644 --- a/internal-packages/clickhouse/src/llmMetrics.ts +++ b/internal-packages/clickhouse/src/llmMetrics.ts @@ -13,6 +13,7 @@ export const LlmMetricsV1Input = z.object({ gen_ai_system: z.string(), request_model: z.string(), response_model: z.string(), + base_response_model: z.string(), matched_model_id: z.string(), operation_id: z.string(), finish_reason: z.string(), diff --git a/internal-packages/clickhouse/src/llmModelAggregates.ts b/internal-packages/clickhouse/src/llmModelAggregates.ts new file mode 100644 index 00000000000..4e7bbba7bc9 --- /dev/null +++ b/internal-packages/clickhouse/src/llmModelAggregates.ts @@ -0,0 +1,138 @@ +import { z } from "zod"; +import { ClickhouseQueryBuilder } from "./client/queryBuilder.js"; +import type { ClickhouseReader } from "./client/types.js"; + +// --- Schemas --- + +const ModelMetricsRow = z.object({ + response_model: z.string(), + gen_ai_system: z.string(), + minute: z.string(), + call_count: z.coerce.number(), + total_input_tokens: z.coerce.number(), + total_output_tokens: z.coerce.number(), + total_cost: z.coerce.number(), + ttfc_p50: z.coerce.number(), + ttfc_p90: z.coerce.number(), + ttfc_p95: z.coerce.number(), + ttfc_p99: z.coerce.number(), + tps_p50: z.coerce.number(), + tps_p90: z.coerce.number(), + tps_p95: z.coerce.number(), + tps_p99: z.coerce.number(), + duration_p50: z.coerce.number(), + duration_p90: z.coerce.number(), + duration_p95: z.coerce.number(), + duration_p99: z.coerce.number(), +}); + +const ModelSummaryRow = z.object({ + response_model: z.string(), + gen_ai_system: z.string(), + call_count: z.coerce.number(), + total_input_tokens: z.coerce.number(), + total_output_tokens: z.coerce.number(), + total_cost: z.coerce.number(), + ttfc_p50: z.coerce.number(), + ttfc_p90: z.coerce.number(), + tps_p50: z.coerce.number(), + tps_p90: z.coerce.number(), +}); + +const PopularModelRow = z.object({ + response_model: z.string(), + gen_ai_system: z.string(), + call_count: z.coerce.number(), + total_cost: z.coerce.number(), + ttfc_p50: z.coerce.number(), +}); + +// --- Query builders --- + +/** Get per-minute metrics for a specific model over a date range. */ +export function getGlobalModelMetrics(reader: ClickhouseReader) { + return new ClickhouseQueryBuilder( + "getGlobalModelMetrics", + `SELECT + response_model, + gen_ai_system, + minute, + sum(call_count) AS call_count, + sum(total_input_tokens) AS total_input_tokens, + sum(total_output_tokens) AS total_output_tokens, + sum(total_cost) AS total_cost, + quantilesMerge(0.5, 0.9, 0.95, 0.99)(ttfc_quantiles) AS ttfc_arr, + ttfc_arr[1] AS ttfc_p50, + ttfc_arr[2] AS ttfc_p90, + ttfc_arr[3] AS ttfc_p95, + ttfc_arr[4] AS ttfc_p99, + quantilesMerge(0.5, 0.9, 0.95, 0.99)(tps_quantiles) AS tps_arr, + tps_arr[1] AS tps_p50, + tps_arr[2] AS tps_p90, + tps_arr[3] AS tps_p95, + tps_arr[4] AS tps_p99, + quantilesMerge(0.5, 0.9, 0.95, 0.99)(duration_quantiles) AS dur_arr, + dur_arr[1] AS duration_p50, + dur_arr[2] AS duration_p90, + dur_arr[3] AS duration_p95, + dur_arr[4] AS duration_p99 + FROM trigger_dev.llm_model_aggregates_v1 + WHERE response_model = {responseModel: String} + AND minute >= {startTime: DateTime} + AND minute <= {endTime: DateTime} + GROUP BY response_model, gen_ai_system, minute + ORDER BY minute`, + reader, + ModelMetricsRow + ); +} + +/** Get summary metrics for multiple models (for comparison). */ +export function getGlobalModelComparison(reader: ClickhouseReader) { + return new ClickhouseQueryBuilder( + "getGlobalModelComparison", + `SELECT + response_model, + gen_ai_system, + sum(call_count) AS call_count, + sum(total_input_tokens) AS total_input_tokens, + sum(total_output_tokens) AS total_output_tokens, + sum(total_cost) AS total_cost, + quantilesMerge(0.5, 0.9)(ttfc_quantiles) AS ttfc_arr, + ttfc_arr[1] AS ttfc_p50, + ttfc_arr[2] AS ttfc_p90, + quantilesMerge(0.5, 0.9)(tps_quantiles) AS tps_arr, + tps_arr[1] AS tps_p50, + tps_arr[2] AS tps_p90 + FROM trigger_dev.llm_model_aggregates_v1 + WHERE response_model IN {responseModels: Array(String)} + AND minute >= {startTime: DateTime} + AND minute <= {endTime: DateTime} + GROUP BY response_model, gen_ai_system + ORDER BY call_count DESC`, + reader, + ModelSummaryRow + ); +} + +/** Get the most popular models by call count. */ +export function getPopularModels(reader: ClickhouseReader) { + return new ClickhouseQueryBuilder( + "getPopularModels", + `SELECT + response_model, + gen_ai_system, + sum(call_count) AS call_count, + sum(total_cost) AS total_cost, + quantilesMerge(0.5)(ttfc_quantiles) AS ttfc_arr, + ttfc_arr[1] AS ttfc_p50 + FROM trigger_dev.llm_model_aggregates_v1 + WHERE minute >= {startTime: DateTime} + AND minute <= {endTime: DateTime} + GROUP BY response_model, gen_ai_system + ORDER BY call_count DESC + LIMIT {limit: UInt32}`, + reader, + PopularModelRow + ); +} diff --git a/internal-packages/database/prisma/migrations/20260323104144_add_catalog_columns_to_llm_models/migration.sql b/internal-packages/database/prisma/migrations/20260323104144_add_catalog_columns_to_llm_models/migration.sql new file mode 100644 index 00000000000..759d17dc93e --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260323104144_add_catalog_columns_to_llm_models/migration.sql @@ -0,0 +1,7 @@ +-- AlterTable +ALTER TABLE "public"."llm_models" ADD COLUMN "capabilities" TEXT[] DEFAULT ARRAY[]::TEXT[], +ADD COLUMN "context_window" INTEGER, +ADD COLUMN "description" TEXT, +ADD COLUMN "is_hidden" BOOLEAN NOT NULL DEFAULT false, +ADD COLUMN "max_output_tokens" INTEGER, +ADD COLUMN "provider" TEXT; diff --git a/internal-packages/database/prisma/migrations/20260324142824_add_base_model_name_to_llm_models/migration.sql b/internal-packages/database/prisma/migrations/20260324142824_add_base_model_name_to_llm_models/migration.sql new file mode 100644 index 00000000000..acdae6e37ba --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260324142824_add_base_model_name_to_llm_models/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "public"."llm_models" ADD COLUMN "base_model_name" TEXT; diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index 5ebc78508b9..bf3c946a985 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -2670,6 +2670,15 @@ model LlmModel { createdAt DateTime @default(now()) @map("created_at") updatedAt DateTime @updatedAt @map("updated_at") + // Catalog metadata for model registry + provider String? @map("provider") + description String? @map("description") + contextWindow Int? @map("context_window") + maxOutputTokens Int? @map("max_output_tokens") + capabilities String[] @default([]) @map("capabilities") + isHidden Boolean @default(false) @map("is_hidden") + baseModelName String? @map("base_model_name") + pricingTiers LlmPricingTier[] prices LlmPrice[] diff --git a/internal-packages/llm-model-catalog/package.json b/internal-packages/llm-model-catalog/package.json new file mode 100644 index 00000000000..be27ce3529d --- /dev/null +++ b/internal-packages/llm-model-catalog/package.json @@ -0,0 +1,20 @@ +{ + "name": "@internal/llm-model-catalog", + "private": true, + "version": "0.0.1", + "main": "./src/index.ts", + "types": "./src/index.ts", + "type": "module", + "dependencies": { + "@trigger.dev/core": "workspace:*", + "@trigger.dev/database": "workspace:*" + }, + "scripts": { + "typecheck": "tsc --noEmit", + "generate": "node scripts/generate.mjs", + "sync-prices": "bash scripts/sync-model-prices.sh && node scripts/generate.mjs", + "sync-prices:check": "bash scripts/sync-model-prices.sh --check", + "generate-catalog": "bash scripts/generate-model-catalog.sh && node scripts/generate.mjs", + "generate-catalog:dry-run": "bash scripts/generate-model-catalog.sh --dry-run" + } +} diff --git a/internal-packages/llm-model-catalog/scripts/.gitignore b/internal-packages/llm-model-catalog/scripts/.gitignore new file mode 100644 index 00000000000..333c1e910a3 --- /dev/null +++ b/internal-packages/llm-model-catalog/scripts/.gitignore @@ -0,0 +1 @@ +logs/ diff --git a/internal-packages/llm-model-catalog/scripts/generate-model-catalog.sh b/internal-packages/llm-model-catalog/scripts/generate-model-catalog.sh new file mode 100755 index 00000000000..c1d945e04b3 --- /dev/null +++ b/internal-packages/llm-model-catalog/scripts/generate-model-catalog.sh @@ -0,0 +1,346 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Generate model-catalog.json by researching each unique base model using Claude Code CLI. +# Usage: ./scripts/generate-model-catalog.sh [options] +# +# Options: +# --dry-run Print models that would be researched without running Claude +# --filter Only research models matching this ERE pattern (e.g. "gpt-4o|claude") +# --max Maximum number of models to research (useful for testing) +# --stale-days Re-research models older than N days (default: 7) +# --force Re-research all models regardless of resolvedAt timestamp +# --skip-hidden Skip models already marked as hidden/deprecated (saves time) +# --concurrency Number of models to research in parallel (default: 5) +# +# The script: +# 1. Extracts all modelNames from defaultPrices.ts +# 2. Groups dated variants to their base model +# 3. Runs research-model.sh for each base model (in parallel) +# 4. Writes results incrementally to model-catalog.json +# +# Logs are written to scripts/logs/ for debugging failures. + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PACKAGE_DIR="$(dirname "$SCRIPT_DIR")" +DEFAULTS_FILE="$PACKAGE_DIR/src/defaultPrices.ts" +CATALOG_FILE="$PACKAGE_DIR/src/model-catalog.json" +RESEARCH_SCRIPT="$SCRIPT_DIR/research-model.sh" + +LOG_DIR="$SCRIPT_DIR/logs" +mkdir -p "$LOG_DIR" + +DRY_RUN=false +FILTER="" +MAX_MODELS=0 +STALE_DAYS=7 +FORCE=false +SKIP_HIDDEN=false +CONCURRENCY=5 + +while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) DRY_RUN=true; shift ;; + --filter) FILTER="$2"; shift 2 ;; + --max) MAX_MODELS="$2"; shift 2 ;; + --stale-days) STALE_DAYS="$2"; shift 2 ;; + --force) FORCE=true; shift ;; + --skip-hidden) SKIP_HIDDEN=true; shift ;; + --concurrency) CONCURRENCY="$2"; shift 2 ;; + *) echo "Unknown option: $1" >&2; exit 1 ;; + esac +done + +# Extract all model names from defaultPrices.ts +ALL_MODELS=$(grep -o '"modelName": "[^"]*"' "$DEFAULTS_FILE" | sed 's/"modelName": "//;s/"//' | sort -u) + +# Skip embedding, legacy completion, and fine-tuned models +SKIP_PATTERNS="^text-embedding|^textembedding|^text-ada|^text-babbage|^text-curie|^text-davinci|^text-bison|^text-unicorn|^code-bison|^code-gecko|^codechat-bison|^chat-bison|^babbage-002|^davinci-002|^ft:|^gemini-live" + +FILTERED_MODELS=$(echo "$ALL_MODELS" | grep -vE "$SKIP_PATTERNS") + +if [[ -n "$FILTER" ]]; then + FILTERED_MODELS=$(echo "$FILTERED_MODELS" | grep -E "$FILTER" || true) +fi + +# Group dated variants to base models +declare -A BASE_TO_VARIANTS +declare -A MODEL_TO_BASE + +for model in $FILTERED_MODELS; do + base=$(echo "$model" | sed -E 's/-[0-9]{4}-?[0-9]{2}-?[0-9]{2}$//') + base_no_latest=$(echo "$base" | sed -E 's/-latest$//') + if [[ ${#base_no_latest} -lt ${#base} ]]; then + base="$base_no_latest" + fi + + MODEL_TO_BASE["$model"]="$base" + + if [[ -n "${BASE_TO_VARIANTS[$base]:-}" ]]; then + BASE_TO_VARIANTS["$base"]="${BASE_TO_VARIANTS[$base]} $model" + else + BASE_TO_VARIANTS["$base"]="$model" + fi +done + +BASE_MODELS=$(printf '%s\n' "${!BASE_TO_VARIANTS[@]}" | sort -u) +TOTAL=$(echo "$BASE_MODELS" | wc -l | tr -d ' ') + +if [[ "$MAX_MODELS" -gt 0 ]]; then + BASE_MODELS=$(echo "$BASE_MODELS" | head -n "$MAX_MODELS") + TOTAL=$(echo "$BASE_MODELS" | wc -l | tr -d ' ') +fi + +echo "Found $TOTAL unique base models (concurrency: $CONCURRENCY)" + +if $DRY_RUN; then + echo "" + echo "Base models and their variants:" + for base in $BASE_MODELS; do + echo " $base → ${BASE_TO_VARIANTS[$base]}" + done + exit 0 +fi + +# Load existing catalog +if [[ -f "$CATALOG_FILE" ]]; then + EXISTING_CATALOG=$(cat "$CATALOG_FILE") +else + EXISTING_CATALOG="{}" +fi + +# Lock file for thread-safe catalog writes +LOCK_FILE="$LOG_DIR/.catalog.lock" +RESULTS_DIR="$LOG_DIR/results" +mkdir -p "$RESULTS_DIR" + +ERRORS=0 +FAILED_MODELS="" +SKIPPED=0 +RESEARCHED=0 +CHANGED=0 + +# --- Determine which models need research --- + +MODELS_TO_RESEARCH="" +COUNT=0 + +for base in $BASE_MODELS; do + COUNT=$((COUNT + 1)) + + SKIP_REASON=$(echo "$EXISTING_CATALOG" | node -e " + const data = JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8')); + const entry = data['$base']; + if (!entry) { process.stdout.write('missing'); return; } + if ($FORCE) { process.stdout.write('force'); return; } + if ($SKIP_HIDDEN && entry.isHidden) { process.stdout.write('hidden'); return; } + const resolvedAt = entry.resolvedAt ? new Date(entry.resolvedAt) : null; + if (!resolvedAt) { process.stdout.write('no_timestamp'); return; } + const staleMs = $STALE_DAYS * 24 * 60 * 60 * 1000; + if (Date.now() - resolvedAt.getTime() > staleMs) { process.stdout.write('stale'); return; } + process.stdout.write('fresh'); + " 2>/dev/null || echo "missing") + + case "$SKIP_REASON" in + fresh) + RESOLVED_DATE=$(echo "$EXISTING_CATALOG" | node -e "const d=JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8'));const r=d['$base']?.resolvedAt;console.log(r?r.split('T')[0]:'?')" 2>/dev/null) + echo "[$COUNT/$TOTAL] Skipping $base (resolved $RESOLVED_DATE)" + SKIPPED=$((SKIPPED + 1)) + ;; + hidden) + echo "[$COUNT/$TOTAL] Skipping $base (hidden/deprecated)" + SKIPPED=$((SKIPPED + 1)) + ;; + *) + MODELS_TO_RESEARCH="$MODELS_TO_RESEARCH $base" + ;; + esac +done + +RESEARCH_COUNT=$(echo "$MODELS_TO_RESEARCH" | wc -w | tr -d ' ') +echo "" +echo "Researching $RESEARCH_COUNT models, skipped $SKIPPED" +echo "" + +if [[ "$RESEARCH_COUNT" -eq 0 ]]; then + echo "Nothing to do." + exit 0 +fi + +# --- Research function (called per model, may run in parallel) --- + +research_model() { + local base="$1" + local idx="$2" + local total="$3" + local model_log="$LOG_DIR/$base.log" + local result_file="$RESULTS_DIR/$base.json" + + echo "[$idx/$total] Researching $base..." + + local raw + raw=$("$RESEARCH_SCRIPT" "$base" 3 2>&1) || { + echo "$raw" > "$model_log" + echo " ERROR: Failed to research $base (after retries). Log: $model_log" >&2 + echo '{"error":true}' > "$result_file" + return 1 + } + + echo "$raw" > "$model_log" + + local entry + entry=$(echo "$raw" | node -e " + try { + const d = JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8')); + let text = (typeof d.result === 'string' ? d.result : JSON.stringify(d)).trim(); + text = text.replace(/^\`\`\`(?:json)?\s*/i, '').replace(/\s*\`\`\`\s*$/, '').trim(); + const jsonMatch = text.match(/\{[\s\S]*\}/); + if (jsonMatch) text = jsonMatch[0]; + const r = JSON.parse(text); + if (!r.provider) throw new Error('missing provider field'); + process.stdout.write(JSON.stringify({ + provider: r.provider, + description: r.description || '', + contextWindow: r.contextWindow || null, + maxOutputTokens: r.maxOutputTokens || null, + capabilities: r.capabilities || [], + releaseDate: r.releaseDate || null, + isHidden: r.isHidden === true, + supportsStructuredOutput: r.supportsStructuredOutput === true, + supportsParallelToolCalls: r.supportsParallelToolCalls === true, + supportsStreamingToolCalls: r.supportsStreamingToolCalls === true, + deprecationDate: r.deprecationDate || null, + knowledgeCutoff: r.knowledgeCutoff || null, + resolvedAt: new Date().toISOString() + })); + } catch(e) { + process.stderr.write(e.message); + process.exit(1); + } + " 2>"$LOG_DIR/$base.parse-error") || { + local parse_err + parse_err=$(cat "$LOG_DIR/$base.parse-error" 2>/dev/null) + echo " ERROR: Failed to parse response for $base: $parse_err" >&2 + echo " Raw response saved to: $model_log" >&2 + echo '{"error":true}' > "$result_file" + return 1 + } + + echo "$entry" > "$result_file" + echo " OK: $(echo "$entry" | node -e "const d=JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8'));console.log(d.provider + ' / ' + (d.contextWindow||'?') + ' ctx / ' + d.capabilities.length + ' caps')" 2>/dev/null)" +} + +export -f research_model +export RESEARCH_SCRIPT LOG_DIR RESULTS_DIR + +# --- Run research in parallel --- + +IDX=0 +PIDS=() +MODEL_LIST=($MODELS_TO_RESEARCH) + +for base in "${MODEL_LIST[@]}"; do + IDX=$((IDX + 1)) + + research_model "$base" "$IDX" "$RESEARCH_COUNT" & + PIDS+=($!) + + # Throttle concurrency + if [[ ${#PIDS[@]} -ge $CONCURRENCY ]]; then + wait "${PIDS[0]}" 2>/dev/null || true + PIDS=("${PIDS[@]:1}") + fi +done + +# Wait for remaining +for pid in "${PIDS[@]}"; do + wait "$pid" 2>/dev/null || true +done + +echo "" +echo "Research complete. Merging results..." + +# --- Merge results into catalog --- + +CATALOG="$EXISTING_CATALOG" + +for base in "${MODEL_LIST[@]}"; do + RESULT_FILE="$RESULTS_DIR/$base.json" + + if [[ ! -f "$RESULT_FILE" ]]; then + ERRORS=$((ERRORS + 1)) + FAILED_MODELS="$FAILED_MODELS $base" + continue + fi + + ENTRY=$(cat "$RESULT_FILE") + + # Check for error marker + if echo "$ENTRY" | node -e "const d=JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8'));process.exit(d.error?0:1)" 2>/dev/null; then + ERRORS=$((ERRORS + 1)) + FAILED_MODELS="$FAILED_MODELS $base" + continue + fi + + RESEARCHED=$((RESEARCHED + 1)) + + # Diff detection: compare with existing entry + OLD_ENTRY=$(echo "$EXISTING_CATALOG" | node -e " + const d = JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8')); + const e = d['$base']; + if (e) { delete e.resolvedAt; process.stdout.write(JSON.stringify(e)); } + else process.stdout.write('null'); + " 2>/dev/null) + + NEW_FOR_DIFF=$(echo "$ENTRY" | node -e " + const d = JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8')); + delete d.resolvedAt; + process.stdout.write(JSON.stringify(d)); + " 2>/dev/null) + + if [[ "$OLD_ENTRY" != "null" && "$OLD_ENTRY" != "$NEW_FOR_DIFF" ]]; then + CHANGED=$((CHANGED + 1)) + # Log what changed + node -e " + const old = JSON.parse('$OLD_ENTRY'); + const cur = JSON.parse('$NEW_FOR_DIFF'); + const changes = []; + for (const k of new Set([...Object.keys(old), ...Object.keys(cur)])) { + const o = JSON.stringify(old[k]); const n = JSON.stringify(cur[k]); + if (o !== n) changes.push(k + ': ' + o + ' → ' + n); + } + if (changes.length) console.log(' CHANGED: ' + changes.join(', ')); + " 2>/dev/null || true + fi + + # Apply to all variants of this base model + for variant in ${BASE_TO_VARIANTS[$base]}; do + CATALOG=$(echo "$CATALOG" | node -e " + const catalog = JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8')); + catalog['$variant'] = $ENTRY; + process.stdout.write(JSON.stringify(catalog)); + ") + done +done + +# Write final catalog +echo "$CATALOG" | node -e " + const data = JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8')); + const sorted = Object.keys(data).sort().reduce((acc, k) => { acc[k] = data[k]; return acc; }, {}); + process.stdout.write(JSON.stringify(sorted, null, 2) + '\n'); +" > "$CATALOG_FILE" + +# Cleanup results +rm -rf "$RESULTS_DIR" + +FINAL_COUNT=$(node -e "console.log(Object.keys(JSON.parse(require('fs').readFileSync('$CATALOG_FILE','utf-8'))).length)") +echo "" +echo "Done! $FINAL_COUNT entries in catalog" +echo " Researched: $RESEARCHED | Changed: $CHANGED | Skipped: $SKIPPED | Errors: $ERRORS" + +if [[ "$ERRORS" -gt 0 ]]; then + echo "" + echo "Failed models:$FAILED_MODELS" + RETRY_PATTERN=$(echo "$FAILED_MODELS" | tr ' ' '\n' | grep -v '^$' | sed 's/\./\\./g; s/^/^/; s/$/$/' | paste -sd '|' -) + echo "Retry with: $0 --filter \"$RETRY_PATTERN\"" +fi diff --git a/internal-packages/llm-model-catalog/scripts/generate.mjs b/internal-packages/llm-model-catalog/scripts/generate.mjs new file mode 100644 index 00000000000..115643b1dff --- /dev/null +++ b/internal-packages/llm-model-catalog/scripts/generate.mjs @@ -0,0 +1,93 @@ +#!/usr/bin/env node + +// Cross-platform generation script for the llm-pricing package. +// Generates TypeScript modules from JSON data files: +// 1. defaultPrices.ts ← default-model-prices.json (synced from Langfuse) +// 2. modelCatalog.ts ← model-catalog.json (our maintained catalog metadata) +// +// Usage: node scripts/generate.mjs +// +// To update the source JSON files: +// - Pricing: pnpm run sync-prices (fetches from Langfuse, requires curl) +// - Catalog: pnpm run generate-catalog (uses Claude CLI to research models) + +import { readFileSync, writeFileSync, existsSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const srcDir = join(__dirname, "..", "src"); + +// --- 1. Generate defaultPrices.ts from default-model-prices.json --- + +const pricesJsonPath = join(srcDir, "default-model-prices.json"); + +if (existsSync(pricesJsonPath)) { + const raw = JSON.parse(readFileSync(pricesJsonPath, "utf-8")); + const stripped = raw.map((e) => ({ + modelName: e.modelName.trim(), + matchPattern: e.matchPattern, + startDate: e.createdAt, + pricingTiers: e.pricingTiers.map((t) => ({ + name: t.name, + isDefault: t.isDefault, + priority: t.priority, + conditions: t.conditions.map((c) => ({ + usageDetailPattern: c.usageDetailPattern, + operator: c.operator, + value: c.value, + })), + prices: t.prices, + })), + })); + + let out = 'import type { DefaultModelDefinition } from "./types.js";\n\n'; + out += "// Auto-generated from default-model-prices.json — do not edit manually.\n"; + out += "// Run `pnpm run sync-prices` to update the JSON, then `pnpm run generate` to regenerate.\n"; + out += "// Source: https://github.com/langfuse/langfuse\n\n"; + out += "export const defaultModelPrices: DefaultModelDefinition[] = "; + out += JSON.stringify(stripped, null, 2) + ";\n"; + + writeFileSync(join(srcDir, "defaultPrices.ts"), out); + console.log(`Generated defaultPrices.ts (${stripped.length} models)`); +} else { + console.log("Skipping defaultPrices.ts — default-model-prices.json not found"); +} + +// --- 2. Generate modelCatalog.ts from model-catalog.json --- + +const catalogJsonPath = join(srcDir, "model-catalog.json"); + +if (existsSync(catalogJsonPath)) { + const data = JSON.parse(readFileSync(catalogJsonPath, "utf-8")); + + // Backfill missing fields for old entries + for (const key of Object.keys(data)) { + if (data[key].releaseDate === undefined) data[key].releaseDate = null; + if (data[key].isHidden === undefined) data[key].isHidden = false; + if (data[key].supportsStructuredOutput === undefined) data[key].supportsStructuredOutput = false; + if (data[key].supportsParallelToolCalls === undefined) data[key].supportsParallelToolCalls = false; + if (data[key].supportsStreamingToolCalls === undefined) data[key].supportsStreamingToolCalls = false; + if (data[key].deprecationDate === undefined) data[key].deprecationDate = null; + if (data[key].knowledgeCutoff === undefined) data[key].knowledgeCutoff = null; + if (data[key].resolvedAt === undefined) data[key].resolvedAt = new Date().toISOString(); + { + // Always recompute base model name (don't trust existing values) + // Strip trailing date (-YYYYMMDD or -YYYY-MM-DD) and -latest suffix + // Keep original naming (dots, etc.) — don't normalize + let base = key.replace(/-\d{4}-?\d{2}-?\d{2}$/, "").replace(/-latest$/, ""); + data[key].baseModelName = base !== key ? base : null; + } + } + + let out = 'import type { ModelCatalogEntry } from "./types.js";\n\n'; + out += "// Auto-generated from model-catalog.json — do not edit manually.\n"; + out += "// Run `pnpm run generate-catalog` to update the JSON, then `pnpm run generate` to regenerate.\n\n"; + out += "export const modelCatalog: Record = "; + out += JSON.stringify(data, null, 2) + ";\n"; + + writeFileSync(join(srcDir, "modelCatalog.ts"), out); + console.log(`Generated modelCatalog.ts (${Object.keys(data).length} entries)`); +} else { + console.log("Skipping modelCatalog.ts — model-catalog.json not found"); +} diff --git a/internal-packages/llm-model-catalog/scripts/research-model.sh b/internal-packages/llm-model-catalog/scripts/research-model.sh new file mode 100755 index 00000000000..422b7a5689a --- /dev/null +++ b/internal-packages/llm-model-catalog/scripts/research-model.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Research a single LLM model using Claude Code CLI and output structured JSON. +# Usage: ./scripts/research-model.sh +# +# Example: +# ./scripts/research-model.sh gpt-4o +# → {"provider":"openai","description":"...","contextWindow":128000,...} + +if [[ $# -lt 1 ]]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +MODEL_NAME="$1" +MAX_RETRIES="${2:-3}" + +PROMPT="Research the LLM model '${MODEL_NAME}' and return ONLY a valid JSON object (no markdown, no explanation, no code fences) with these exact fields: + +{ + \"provider\": \"\", + \"description\": \"<1-2 sentence description of the model>\", + \"contextWindow\": , + \"maxOutputTokens\": , + \"capabilities\": [], + \"releaseDate\": \"\", + \"isHidden\": , + \"supportsStructuredOutput\": , + \"supportsParallelToolCalls\": , + \"supportsStreamingToolCalls\": , + \"deprecationDate\": \"\", + \"knowledgeCutoff\": \"\" +} + +Rules: +- provider must be one of: \"openai\", \"anthropic\", \"google\", \"meta\", \"mistral\", \"cohere\", \"ai21\", \"amazon\", \"xai\", \"deepseek\", \"qwen\", \"perplexity\" or the correct provider lowercase id +- description should be concise and factual (what the model is good at, its position in the provider's lineup) +- contextWindow is the maximum input context in tokens (e.g. 128000 for GPT-4o, 200000 for Claude Sonnet 4) +- maxOutputTokens is the maximum output the model can generate in a single response +- capabilities should be drawn from: \"vision\", \"tool_use\", \"streaming\", \"json_mode\", \"extended_thinking\", \"code_execution\", \"image_generation\", \"audio_input\", \"audio_output\", \"embedding\", \"fine_tunable\" +- Only include capabilities you are confident the model supports +- releaseDate is when the model was first publicly available (API launch date), in YYYY-MM-DD format. Use null if unknown. If the model is a dated variant (e.g. gpt-4o-2024-08-06), the date in the name IS the release date. +- isHidden should be true if the model is deprecated, discontinued, no longer available via API, or superseded by a newer version. Examples: gpt-3.5-turbo, claude-1.x, claude-2.x, text-davinci-003, gpt-4-0314 are hidden. Current/active models like gpt-4o, claude-sonnet-4-6, gemini-2.5-flash are NOT hidden. +- supportsStructuredOutput: true if the model reliably follows JSON schemas / structured output mode (e.g. OpenAI's response_format, Anthropic's tool_use for structured output). false for older models that don't support it well. +- supportsParallelToolCalls: true if the model can call multiple tools in a single assistant turn. Most modern models support this. +- supportsStreamingToolCalls: true if the model supports streaming partial tool call arguments as they're generated. +- deprecationDate: the date the provider has announced the model will be sunset/removed from their API, in YYYY-MM-DD format. Use null if no deprecation date has been announced. Only use dates that have been officially published by the provider. +- knowledgeCutoff: the date when the model's training data ends, in YYYY-MM-DD format. Use null if unknown. This is different from releaseDate — it's when the training data was cut off, not when the model launched. +- Output ONLY the JSON object, nothing else" + +for attempt in $(seq 1 "$MAX_RETRIES"); do + RESULT=$(claude -p "$PROMPT" --model opus --output-format json --permission-mode bypassPermissions --tools WebSearch,WebFetch 2>/dev/null) && { + echo "$RESULT" + exit 0 + } + if [[ "$attempt" -lt "$MAX_RETRIES" ]]; then + echo " Retry $attempt/$MAX_RETRIES for $MODEL_NAME..." >&2 + sleep 2 + fi +done + +echo " Failed after $MAX_RETRIES attempts for $MODEL_NAME" >&2 +exit 1 diff --git a/internal-packages/llm-pricing/scripts/sync-model-prices.sh b/internal-packages/llm-model-catalog/scripts/sync-model-prices.sh similarity index 53% rename from internal-packages/llm-pricing/scripts/sync-model-prices.sh rename to internal-packages/llm-model-catalog/scripts/sync-model-prices.sh index d72aa6714c6..74ad04f6bb3 100755 --- a/internal-packages/llm-pricing/scripts/sync-model-prices.sh +++ b/internal-packages/llm-model-catalog/scripts/sync-model-prices.sh @@ -8,7 +8,6 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PACKAGE_DIR="$(dirname "$SCRIPT_DIR")" JSON_TARGET="$PACKAGE_DIR/src/default-model-prices.json" -TS_TARGET="$PACKAGE_DIR/src/defaultPrices.ts" SOURCE_URL="https://raw.githubusercontent.com/langfuse/langfuse/main/worker/src/constants/default-model-prices.json" CHECK_MODE=false @@ -37,41 +36,11 @@ if $CHECK_MODE; then echo "Model prices are up to date ($MODEL_COUNT models)" exit 0 else - echo "Model prices are OUTDATED. Run 'pnpm run sync-prices' in @internal/llm-pricing to update." + echo "Model prices are OUTDATED. Run 'pnpm run sync-prices' in @internal/llm-model-catalog to update." exit 1 fi fi cp "$TMPFILE" "$JSON_TARGET" echo "Updated default-model-prices.json ($MODEL_COUNT models)" - -# Generate the TypeScript module from the JSON -echo "Generating defaultPrices.ts..." -node -e " -const data = JSON.parse(require('fs').readFileSync('$JSON_TARGET', 'utf-8')); -const stripped = data.map(e => ({ - modelName: e.modelName.trim(), - matchPattern: e.matchPattern, - startDate: e.createdAt, - pricingTiers: e.pricingTiers.map(t => ({ - name: t.name, - isDefault: t.isDefault, - priority: t.priority, - conditions: t.conditions.map(c => ({ - usageDetailPattern: c.usageDetailPattern, - operator: c.operator, - value: c.value, - })), - prices: t.prices, - })), -})); - -let out = 'import type { DefaultModelDefinition } from \"./types.js\";\n\n'; -out += '// Auto-generated from Langfuse default-model-prices.json — do not edit manually.\n'; -out += '// Run \`pnpm run sync-prices\` to update from upstream.\n'; -out += '// Source: https://github.com/langfuse/langfuse\n\n'; -out += 'export const defaultModelPrices: DefaultModelDefinition[] = '; -out += JSON.stringify(stripped, null, 2) + ';\n'; -require('fs').writeFileSync('$TS_TARGET', out); -console.log('Generated defaultPrices.ts with ' + stripped.length + ' models'); -" +echo "Run 'pnpm run generate' to regenerate defaultPrices.ts" diff --git a/internal-packages/llm-pricing/src/default-model-prices.json b/internal-packages/llm-model-catalog/src/default-model-prices.json similarity index 100% rename from internal-packages/llm-pricing/src/default-model-prices.json rename to internal-packages/llm-model-catalog/src/default-model-prices.json diff --git a/internal-packages/llm-pricing/src/defaultPrices.ts b/internal-packages/llm-model-catalog/src/defaultPrices.ts similarity index 99% rename from internal-packages/llm-pricing/src/defaultPrices.ts rename to internal-packages/llm-model-catalog/src/defaultPrices.ts index 2bcc371da10..fb347c2bef6 100644 --- a/internal-packages/llm-pricing/src/defaultPrices.ts +++ b/internal-packages/llm-model-catalog/src/defaultPrices.ts @@ -1,7 +1,7 @@ import type { DefaultModelDefinition } from "./types.js"; -// Auto-generated from Langfuse default-model-prices.json — do not edit manually. -// Run `pnpm run sync-prices` to update from upstream. +// Auto-generated from default-model-prices.json — do not edit manually. +// Run `pnpm run sync-prices` to update the JSON, then `pnpm run generate` to regenerate. // Source: https://github.com/langfuse/langfuse export const defaultModelPrices: DefaultModelDefinition[] = [ diff --git a/internal-packages/llm-pricing/src/index.ts b/internal-packages/llm-model-catalog/src/index.ts similarity index 73% rename from internal-packages/llm-pricing/src/index.ts rename to internal-packages/llm-model-catalog/src/index.ts index 3632434c137..8533e66ddd7 100644 --- a/internal-packages/llm-pricing/src/index.ts +++ b/internal-packages/llm-model-catalog/src/index.ts @@ -1,6 +1,8 @@ export { ModelPricingRegistry } from "./registry.js"; export { seedLlmPricing } from "./seed.js"; +export { syncLlmCatalog } from "./sync.js"; export { defaultModelPrices } from "./defaultPrices.js"; +export { modelCatalog } from "./modelCatalog.js"; export type { LlmModelWithPricing, LlmCostResult, @@ -8,4 +10,5 @@ export type { LlmPriceEntry, PricingCondition, DefaultModelDefinition, + ModelCatalogEntry, } from "./types.js"; diff --git a/internal-packages/llm-model-catalog/src/model-catalog.json b/internal-packages/llm-model-catalog/src/model-catalog.json new file mode 100644 index 00000000000..0f602683528 --- /dev/null +++ b/internal-packages/llm-model-catalog/src/model-catalog.json @@ -0,0 +1,2446 @@ +{ + "chatgpt-4o-latest": { + "provider": "openai", + "description": "OpenAI's flagship multimodal model optimized for speed and cost, capable of processing text, images, and audio with strong performance across reasoning, coding, and creative tasks.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "audio_input", + "audio_output", + "fine_tunable" + ], + "releaseDate": "2024-05-13", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T10:55:46.469Z" + }, + "claude-1.1": { + "provider": "anthropic", + "description": "An early-generation Claude model from Anthropic, offering basic conversational and text completion capabilities. It was quickly superseded by Claude 1.2, 1.3, and the Claude 2 family.", + "contextWindow": 9000, + "maxOutputTokens": 8191, + "capabilities": [ + "streaming" + ], + "releaseDate": "2023-03-14", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": null, + "resolvedAt": "2026-03-24T10:55:47.906Z" + }, + "claude-1.2": { + "provider": "anthropic", + "description": "An early-generation Anthropic model, part of the original Claude 1.x family. It offered improved performance over Claude 1.0 but was quickly superseded by Claude 1.3 and later model families.", + "contextWindow": 9000, + "maxOutputTokens": 8191, + "capabilities": [ + "streaming" + ], + "releaseDate": null, + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": null, + "resolvedAt": "2026-03-24T10:55:46.760Z" + }, + "claude-1.3": { + "provider": "anthropic", + "description": "Early-generation Claude model from Anthropic, offering improved performance over Claude 1.0-1.2 in reasoning and instruction-following tasks.", + "contextWindow": 100000, + "maxOutputTokens": null, + "capabilities": [ + "streaming" + ], + "releaseDate": "2023-03-14", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": null, + "resolvedAt": "2026-03-24T10:55:46.227Z" + }, + "claude-2.0": { + "provider": "anthropic", + "description": "Anthropic's second-generation large language model, offering improved performance over Claude 1.x with longer context support. Succeeded by Claude 2.1 and later the Claude 3 family.", + "contextWindow": 100000, + "maxOutputTokens": 4096, + "capabilities": [ + "streaming" + ], + "releaseDate": "2023-07-11", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2023-02-01", + "resolvedAt": "2026-03-24T10:55:45.922Z" + }, + "claude-2.1": { + "provider": "anthropic", + "description": "Anthropic's Claude 2.1 model featuring a 200K context window, reduced hallucination rates compared to Claude 2.0, and improved accuracy on long document comprehension.", + "contextWindow": 200000, + "maxOutputTokens": 4096, + "capabilities": [ + "streaming", + "tool_use" + ], + "releaseDate": "2023-11-21", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2023-01-01", + "resolvedAt": "2026-03-24T10:56:22.743Z" + }, + "claude-3-5-haiku-20241022": { + "provider": "anthropic", + "description": "Anthropic's fastest and most cost-effective model in the Claude 3.5 family, optimized for speed and efficiency while maintaining strong performance across common tasks.", + "contextWindow": 200000, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-10-22", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-07-01", + "resolvedAt": "2026-03-24T10:56:25.724Z" + }, + "claude-3-5-sonnet-20240620": { + "provider": "anthropic", + "description": "Anthropic's Claude 3.5 Sonnet is a mid-tier model balancing intelligence and speed, excelling at coding, analysis, and vision tasks while being faster and cheaper than Opus.", + "contextWindow": 200000, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-06-20", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-04-01", + "resolvedAt": "2026-03-24T10:56:35.401Z" + }, + "claude-3-haiku-20240307": { + "provider": "anthropic", + "description": "Anthropic's fastest and most compact Claude 3 model, optimized for speed and cost-efficiency while maintaining strong performance on everyday tasks.", + "contextWindow": 200000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-03-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-08-01", + "resolvedAt": "2026-03-24T10:56:25.288Z" + }, + "claude-3-opus-20240229": { + "provider": "anthropic", + "description": "Anthropic's most capable model in the Claude 3 family, excelling at complex analysis, nuanced content generation, and advanced reasoning tasks.", + "contextWindow": 200000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-03-04", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-08-01", + "resolvedAt": "2026-03-24T10:56:26.008Z" + }, + "claude-3-sonnet-20240229": { + "provider": "anthropic", + "description": "Mid-tier model in Anthropic's Claude 3 family, balancing performance and speed for a wide range of tasks including analysis, coding, and content generation.", + "contextWindow": 200000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-03-04", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-02-01", + "resolvedAt": "2026-03-24T10:56:59.532Z" + }, + "claude-3.5-haiku-latest": { + "provider": "anthropic", + "description": "Anthropic's fastest and most cost-effective model in the Claude 3.5 family, optimized for speed and efficiency while maintaining strong performance across a wide range of tasks.", + "contextWindow": 200000, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-10-29", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-07-01", + "resolvedAt": "2026-03-24T10:57:04.392Z" + }, + "claude-3.5-sonnet-20241022": { + "provider": "anthropic", + "description": "Anthropic's mid-tier model offering strong reasoning, coding, and analysis capabilities at a balance of speed and intelligence, positioned between Haiku and Opus in the Claude 3.5 family.", + "contextWindow": 200000, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-06-20", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-04-01", + "resolvedAt": "2026-03-24T10:57:13.346Z" + }, + "claude-3.5-sonnet-latest": { + "provider": "anthropic", + "description": "Anthropic's mid-tier model offering strong reasoning, coding, and analysis capabilities at a balance of speed and intelligence, positioned between Haiku and Opus in the Claude 3.5 family.", + "contextWindow": 200000, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-06-20", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-04-01", + "resolvedAt": "2026-03-24T10:57:13.346Z" + }, + "claude-3.7-sonnet-20250219": { + "provider": "anthropic", + "description": "Anthropic's Claude 3.7 Sonnet is a hybrid reasoning model that introduced extended thinking capabilities, offering strong performance on coding, math, and complex reasoning tasks.", + "contextWindow": 200000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-02-24", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-04-01", + "resolvedAt": "2026-03-24T10:57:12.967Z" + }, + "claude-3.7-sonnet-latest": { + "provider": "anthropic", + "description": "Anthropic's Claude 3.7 Sonnet is a hybrid reasoning model that introduced extended thinking capabilities, offering strong performance on coding, math, and complex reasoning tasks.", + "contextWindow": 200000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-02-24", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-04-01", + "resolvedAt": "2026-03-24T10:57:12.967Z" + }, + "claude-haiku-4-5-20251001": { + "provider": "anthropic", + "description": "Anthropic's fastest model with near-frontier intelligence, optimized for speed and cost efficiency while supporting extended thinking and vision.", + "contextWindow": 200000, + "maxOutputTokens": 64000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-10-01", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-07-01", + "resolvedAt": "2026-03-24T10:57:29.685Z" + }, + "claude-instant-1": { + "provider": "anthropic", + "description": "Anthropic's fast and cost-effective model optimized for speed and efficiency, positioned as a lighter alternative to Claude 1.x for tasks requiring lower latency.", + "contextWindow": 100000, + "maxOutputTokens": 8191, + "capabilities": [ + "streaming" + ], + "releaseDate": "2023-03-14", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-01-06", + "knowledgeCutoff": "2023-01-01", + "resolvedAt": "2026-03-24T10:57:36.888Z" + }, + "claude-instant-1.2": { + "provider": "anthropic", + "description": "Anthropic's fast and cost-effective model, optimized for speed and efficiency while maintaining strong performance on conversational and text generation tasks.", + "contextWindow": 100000, + "maxOutputTokens": 8191, + "capabilities": [ + "streaming" + ], + "releaseDate": "2023-08-09", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2023-01-01", + "resolvedAt": "2026-03-24T10:57:41.865Z" + }, + "claude-opus-4-1-20250805": { + "provider": "anthropic", + "description": "Anthropic's hybrid reasoning model with strong software engineering and agentic capabilities, scoring 74.5% on SWE-bench Verified. Supports both rapid responses and step-by-step extended thinking.", + "contextWindow": 200000, + "maxOutputTokens": 32000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-08-05", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-03-01", + "resolvedAt": "2026-03-24T10:58:36.876Z" + }, + "claude-opus-4-20250514": { + "provider": "anthropic", + "description": "Anthropic's flagship model from the Claude 4 family, excelling at complex coding tasks, long-running agent workflows, and deep reasoning with extended thinking support.", + "contextWindow": 200000, + "maxOutputTokens": 32000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-05-14", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-03-01", + "resolvedAt": "2026-03-24T10:58:47.518Z" + }, + "claude-opus-4-5-20251101": { + "provider": "anthropic", + "description": "Anthropic's flagship intelligence model released in November 2025, excelling at complex reasoning, vision, and extended thinking with the best performance in Anthropic's lineup before Opus 4.6.", + "contextWindow": 200000, + "maxOutputTokens": 64000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-11-01", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-03-01", + "resolvedAt": "2026-03-24T10:58:48.961Z" + }, + "claude-opus-4-6": { + "provider": "anthropic", + "description": "Anthropic's most intelligent model, optimized for building agents and coding with exceptional reasoning capabilities and extended agentic task horizons.", + "contextWindow": 1000000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2026-02-05", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-05-01", + "resolvedAt": "2026-03-24T10:58:42.061Z" + }, + "claude-sonnet-4-20250514": { + "provider": "anthropic", + "description": "Anthropic's balanced Claude 4 model offering strong coding, reasoning, and multilingual performance at moderate cost. Now a legacy model superseded by Claude Sonnet 4.5 and 4.6.", + "contextWindow": 200000, + "maxOutputTokens": 64000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-05-14", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-03-01", + "resolvedAt": "2026-03-24T10:58:39.601Z" + }, + "claude-sonnet-4-5-20250929": { + "provider": "anthropic", + "description": "Anthropic's high-performance mid-tier model with strong coding, reasoning, and multi-step problem solving capabilities. Successor to Claude Sonnet 4, offering improved benchmarks at the same price point.", + "contextWindow": 200000, + "maxOutputTokens": 64000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-09-29", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T10:59:54.426Z" + }, + "claude-sonnet-4-6": { + "provider": "anthropic", + "description": "Anthropic's best combination of speed and intelligence, excelling at coding, agentic tasks, and computer use, with a 1M token context window and performance rivaling prior Opus-class models.", + "contextWindow": 1000000, + "maxOutputTokens": 64000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2026-02-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2026-01-01", + "resolvedAt": "2026-03-24T10:59:59.014Z" + }, + "claude-sonnet-4-latest": { + "provider": "anthropic", + "description": "Anthropic's balanced Claude 4 model offering strong coding, reasoning, and multilingual performance at moderate cost. Now a legacy model superseded by Claude Sonnet 4.5 and 4.6.", + "contextWindow": 200000, + "maxOutputTokens": 64000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-05-14", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-03-01", + "resolvedAt": "2026-03-24T10:58:39.601Z" + }, + "gemini-1.0-pro": { + "provider": "google", + "description": "Google's first-generation Gemini Pro model, a mid-size multimodal model designed for text generation, reasoning, and chat applications. Succeeded by Gemini 1.5 Pro.", + "contextWindow": 32760, + "maxOutputTokens": 8192, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-12-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-02-15", + "knowledgeCutoff": "2023-04-01", + "resolvedAt": "2026-03-24T10:59:26.767Z" + }, + "gemini-1.0-pro-001": { + "provider": "google", + "description": "Google's first-generation Pro model optimized for text generation, reasoning, and multi-turn conversation tasks, part of the original Gemini 1.0 lineup.", + "contextWindow": 30720, + "maxOutputTokens": 2048, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-02-15", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-02-15", + "knowledgeCutoff": "2023-04-01", + "resolvedAt": "2026-03-24T10:59:27.391Z" + }, + "gemini-1.0-pro-latest": { + "provider": "google", + "description": "Google's first-generation Gemini Pro model, a mid-size multimodal model designed for text generation, reasoning, and chat applications. Succeeded by Gemini 1.5 Pro.", + "contextWindow": 32760, + "maxOutputTokens": 8192, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-12-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-02-15", + "knowledgeCutoff": "2023-04-01", + "resolvedAt": "2026-03-24T10:59:26.767Z" + }, + "gemini-1.5-pro-latest": { + "provider": "google", + "description": "Google's mid-size multimodal model with a massive context window, strong at long-document understanding, code generation, and multi-turn conversation.", + "contextWindow": 2097152, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "audio_input" + ], + "releaseDate": "2024-02-15", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-09-24", + "knowledgeCutoff": "2024-04-01", + "resolvedAt": "2026-03-24T10:59:25.463Z" + }, + "gemini-2.0-flash": { + "provider": "google", + "description": "Google's second-generation workhorse model optimized for speed, with native tool use, multimodal input (text, images, audio, video), and a 1M token context window.", + "contextWindow": 1048576, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "code_execution", + "audio_input" + ], + "releaseDate": "2025-02-05", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-06-01", + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:01:15.429Z" + }, + "gemini-2.0-flash-001": { + "provider": "google", + "description": "Google's fast and efficient multimodal model that outperforms Gemini 1.5 Pro on key benchmarks at twice the speed, supporting text, image, audio, and video inputs with native tool use.", + "contextWindow": 1048576, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "audio_input", + "image_generation", + "audio_output", + "code_execution" + ], + "releaseDate": "2025-02-05", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-06-01", + "knowledgeCutoff": "2024-08-01", + "resolvedAt": "2026-03-24T11:01:04.084Z" + }, + "gemini-2.0-flash-lite-preview": { + "provider": "google", + "description": "A lightweight, cost-efficient variant of Gemini 2.0 Flash optimized for low latency and high throughput, supporting multimodal input with text output.", + "contextWindow": 1048576, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-02-05", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-06-01", + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:00:56.775Z" + }, + "gemini-2.0-flash-lite-preview-02-05": { + "provider": "google", + "description": "Google's cost-optimized, low-latency model in the Gemini 2.0 family, designed for high-volume tasks like summarization, multimodal processing, and categorization.", + "contextWindow": 1048576, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-02-05", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-12-09", + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:01:34.165Z" + }, + "gemini-2.5-flash": { + "provider": "google", + "description": "Google's best price-performance model optimized for low-latency, high-volume tasks requiring reasoning, with built-in thinking capabilities and multimodal input support.", + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking", + "code_execution", + "audio_input" + ], + "releaseDate": "2025-06-01", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:01:25.200Z" + }, + "gemini-2.5-flash-lite": { + "provider": "google", + "description": "Google's most cost-efficient Gemini model, optimized for low-latency use cases with strong reasoning, multilingual, and long-context capabilities at minimal cost.", + "contextWindow": 1048576, + "maxOutputTokens": 65535, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking", + "code_execution", + "audio_input" + ], + "releaseDate": "2025-07-22", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-07-22", + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:02:30.060Z" + }, + "gemini-2.5-pro": { + "provider": "google", + "description": "Google's most advanced reasoning model with deep thinking capabilities, excelling at complex tasks like coding, math, and multimodal understanding across text, images, audio, and video.", + "contextWindow": 1048576, + "maxOutputTokens": 65535, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking", + "code_execution", + "audio_input" + ], + "releaseDate": "2025-03-25", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-06-17", + "knowledgeCutoff": "2025-01-31", + "resolvedAt": "2026-03-24T11:02:25.573Z" + }, + "gemini-3-flash-preview": { + "provider": "google", + "description": "Google's high-speed thinking model that matches Gemini 2.5 Pro performance at ~3x faster speed and lower cost, designed for agentic workflows, multi-turn chat, and coding assistance with configurable reasoning levels.", + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking", + "code_execution", + "audio_input" + ], + "releaseDate": "2025-12-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:02:13.388Z" + }, + "gemini-3-pro-preview": { + "provider": "google", + "description": "Google's flagship reasoning and multimodal model with strong coding and agentic capabilities, now deprecated in favor of Gemini 3.1 Pro.", + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking", + "code_execution", + "audio_input" + ], + "releaseDate": "2025-11-01", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-03-09", + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:02:29.313Z" + }, + "gemini-3.1-flash-lite-preview": { + "provider": "google", + "description": "Google's most cost-efficient multimodal model in the Gemini 3 series, optimized for high-volume, low-latency tasks like translation, classification, and simple data extraction. Offers 2.5x faster time-to-first-token than Gemini 2.5 Flash.", + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking", + "code_execution", + "audio_input" + ], + "releaseDate": "2026-03-03", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:02:29.253Z" + }, + "gemini-3.1-pro-preview": { + "provider": "google", + "description": "Google's most advanced reasoning model in the Gemini 3.1 family, excelling at complex problem-solving across text, audio, images, video, and code with a 1M token context window and extended thinking capabilities.", + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking", + "code_execution", + "audio_input" + ], + "releaseDate": "2026-02-19", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:03:33.071Z" + }, + "gemini-pro": { + "provider": "google", + "description": "Google's first-generation Gemini model for text generation, reasoning, and multi-turn conversation. Superseded by Gemini 1.5 Pro and later models.", + "contextWindow": 32768, + "maxOutputTokens": 8192, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-12-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-04-09", + "knowledgeCutoff": "2023-04-01", + "resolvedAt": "2026-03-24T11:03:45.401Z" + }, + "gpt-3.5-turbo": { + "provider": "openai", + "description": "OpenAI's fast and cost-effective model optimized for chat and instruction-following tasks, now superseded by GPT-4o mini.", + "contextWindow": 16385, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2023-03-01", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-09-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:03:11.412Z" + }, + "gpt-3.5-turbo-0125": { + "provider": "openai", + "description": "A fast and cost-effective GPT-3.5 Turbo snapshot optimized for chat completions, offering improved accuracy for function calling and reduced instances of incomplete responses.", + "contextWindow": 16385, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2024-01-25", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-09-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:03:11.310Z" + }, + "gpt-3.5-turbo-0301": { + "provider": "openai", + "description": "Early snapshot of GPT-3.5 Turbo, OpenAI's first ChatGPT-optimized model for chat completions. Fast and cost-effective for simple tasks but superseded by later revisions.", + "contextWindow": 4096, + "maxOutputTokens": 4096, + "capabilities": [ + "streaming", + "fine_tunable" + ], + "releaseDate": "2023-03-01", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2024-06-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:03:12.060Z" + }, + "gpt-3.5-turbo-0613": { + "provider": "openai", + "description": "A snapshot of GPT-3.5 Turbo from June 2023, optimized for chat and instruction-following tasks with function calling support.", + "contextWindow": 4096, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2023-06-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2024-09-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:04:04.463Z" + }, + "gpt-3.5-turbo-1106": { + "provider": "openai", + "description": "A dated snapshot of GPT-3.5 Turbo released in November 2023, offering improved instruction following, JSON mode, and parallel function calling over previous GPT-3.5 variants.", + "contextWindow": 16385, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2023-11-06", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-09-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:04:23.054Z" + }, + "gpt-3.5-turbo-16k": { + "provider": "openai", + "description": "Extended context version of GPT-3.5 Turbo with 16K token context window, offering the same capabilities as the base model but able to process longer inputs.", + "contextWindow": 16384, + "maxOutputTokens": 4096, + "capabilities": [ + "streaming", + "json_mode", + "fine_tunable", + "tool_use" + ], + "releaseDate": "2023-06-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-09-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:04:36.307Z" + }, + "gpt-3.5-turbo-16k-0613": { + "provider": "openai", + "description": "Extended context window variant of GPT-3.5 Turbo with 16K token context, snapshot from June 2023. Optimized for chat completions with longer document processing.", + "contextWindow": 16384, + "maxOutputTokens": 4096, + "capabilities": [ + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2023-06-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2024-09-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:04:22.894Z" + }, + "gpt-3.5-turbo-instruct": { + "provider": "openai", + "description": "OpenAI's GPT-3.5 Turbo Instruct is a completions-only model (not chat) optimized for following explicit instructions, replacing the legacy text-davinci-003 model.", + "contextWindow": 4096, + "maxOutputTokens": 4096, + "capabilities": [ + "fine_tunable" + ], + "releaseDate": "2023-09-19", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-01-27", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:04:22.309Z" + }, + "gpt-4": { + "provider": "openai", + "description": "OpenAI's flagship large language model that preceded GPT-4o, known for strong reasoning and instruction-following capabilities across a wide range of tasks.", + "contextWindow": 8192, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-03-14", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-06-06", + "knowledgeCutoff": "2023-12-01", + "resolvedAt": "2026-03-24T11:04:36.773Z" + }, + "gpt-4-0125-preview": { + "provider": "openai", + "description": "An improved GPT-4 Turbo preview model with better task completion, reduced laziness in code generation, and enhanced instruction following.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-01-25", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-12-01", + "resolvedAt": "2026-03-24T11:04:54.196Z" + }, + "gpt-4-0314": { + "provider": "openai", + "description": "Original GPT-4 snapshot from March 2023, a large multimodal model (text-only at launch) that was one of OpenAI's first GPT-4 releases. Now deprecated and replaced by newer GPT-4 variants.", + "contextWindow": 8192, + "maxOutputTokens": 4096, + "capabilities": [ + "streaming" + ], + "releaseDate": "2023-03-14", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2024-06-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:05:14.112Z" + }, + "gpt-4-0613": { + "provider": "openai", + "description": "A snapshot of GPT-4 from June 2023, offering strong reasoning and instruction-following capabilities. It was one of the first widely available GPT-4 variants with function calling support.", + "contextWindow": 8192, + "maxOutputTokens": 8192, + "capabilities": [ + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2023-06-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-06-06", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:05:13.885Z" + }, + "gpt-4-1106-preview": { + "provider": "openai", + "description": "GPT-4 Turbo preview model with 128K context window, offering improved instruction following and JSON mode support at reduced cost compared to GPT-4.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-11-06", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-04-01", + "resolvedAt": "2026-03-24T11:05:12.960Z" + }, + "gpt-4-32k": { + "provider": "openai", + "description": "Extended context window variant of GPT-4 with 32,768 token capacity, offering the same capabilities as GPT-4 but able to process longer documents and conversations.", + "contextWindow": 32768, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-03-14", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-06-06", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:05:14.584Z" + }, + "gpt-4-32k-0314": { + "provider": "openai", + "description": "Extended context (32k token) variant of the original GPT-4 launch snapshot from March 2024, offering the same capabilities as gpt-4-0314 but with 4x the context window.", + "contextWindow": 32768, + "maxOutputTokens": 4096, + "capabilities": [ + "streaming" + ], + "releaseDate": "2023-03-14", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2024-06-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:05:32.044Z" + }, + "gpt-4-32k-0613": { + "provider": "openai", + "description": "Extended context window variant of GPT-4 with 32,768 token context, based on the June 2023 snapshot. Offers the same capabilities as GPT-4 but with 4x the context length.", + "contextWindow": 32768, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-06-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-06-06", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:05:53.070Z" + }, + "gpt-4-preview": { + "provider": "openai", + "description": "GPT-4 Turbo preview model with 128K context window, JSON mode, and parallel function calling. A preview release in the GPT-4 Turbo series, now deprecated in favor of newer models.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-11-06", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-06-06", + "knowledgeCutoff": "2023-04-01", + "resolvedAt": "2026-03-24T11:06:54.248Z" + }, + "gpt-4-turbo": { + "provider": "openai", + "description": "OpenAI's optimized GPT-4 variant offering faster inference and lower cost than the original GPT-4, with vision capabilities and a 128K context window.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-04-09", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-12-01", + "resolvedAt": "2026-03-24T11:05:51.415Z" + }, + "gpt-4-turbo-2024-04-09": { + "provider": "openai", + "description": "OpenAI's optimized GPT-4 variant offering faster inference and lower cost than the original GPT-4, with vision capabilities and a 128K context window.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-04-09", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-12-01", + "resolvedAt": "2026-03-24T11:05:51.415Z" + }, + "gpt-4-turbo-preview": { + "provider": "openai", + "description": "An early preview of GPT-4 Turbo with a 128K context window, offering improved instruction following and JSON mode support at reduced cost compared to GPT-4.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-01-25", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-12-01", + "resolvedAt": "2026-03-24T11:05:52.346Z" + }, + "gpt-4-turbo-vision": { + "provider": "openai", + "description": "OpenAI's GPT-4 Turbo model with vision capabilities, able to analyze and understand images alongside text. It was a preview model later superseded by GPT-4 Turbo (gpt-4-turbo-2024-04-09) and then GPT-4o.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-11-06", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2024-12-06", + "knowledgeCutoff": "2023-04-01", + "resolvedAt": "2026-03-24T11:06:38.455Z" + }, + "gpt-4.1": { + "provider": "openai", + "description": "OpenAI's flagship model optimized for coding, instruction following, and tool calling with a 1M token context window. Excels at structured outputs and long-context tasks.", + "contextWindow": 1047576, + "maxOutputTokens": 32768, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-04-14", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:07:00.439Z" + }, + "gpt-4.1-2025-04-14": { + "provider": "openai", + "description": "OpenAI's flagship model optimized for coding, instruction following, and tool calling with a 1M token context window. Excels at structured outputs and long-context tasks.", + "contextWindow": 1047576, + "maxOutputTokens": 32768, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-04-14", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:07:00.439Z" + }, + "gpt-4.1-mini": { + "provider": "openai", + "description": "A compact, cost-efficient model in OpenAI's GPT-4.1 family that matches or exceeds GPT-4o on many benchmarks while offering nearly half the latency and significantly lower cost.", + "contextWindow": 1000000, + "maxOutputTokens": 32768, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2025-04-14", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:08:14.524Z" + }, + "gpt-4.1-mini-2025-04-14": { + "provider": "openai", + "description": "A compact, cost-efficient model in OpenAI's GPT-4.1 family that matches or exceeds GPT-4o on many benchmarks while offering nearly half the latency and significantly lower cost.", + "contextWindow": 1000000, + "maxOutputTokens": 32768, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2025-04-14", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:08:14.524Z" + }, + "gpt-4.1-nano": { + "provider": "openai", + "description": "OpenAI's fastest and most cost-effective model in the GPT-4.1 family, optimized for low-latency tasks like classification, autocompletion, and lightweight agentic workflows with strong instruction-following and tool-calling capabilities.", + "contextWindow": 1047576, + "maxOutputTokens": 32768, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-04-14", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:08:04.533Z" + }, + "gpt-4.1-nano-2025-04-14": { + "provider": "openai", + "description": "OpenAI's fastest and most cost-effective model in the GPT-4.1 family, optimized for low-latency tasks like classification, autocompletion, and lightweight agentic workflows with strong instruction-following and tool-calling capabilities.", + "contextWindow": 1047576, + "maxOutputTokens": 32768, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-04-14", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:08:04.533Z" + }, + "gpt-4.5-preview": { + "provider": "openai", + "description": "OpenAI's largest pretrained model before the GPT-5 series, emphasizing broad knowledge, creative writing, and improved emotional intelligence over reasoning-focused models.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-02-27", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-07-14", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:07:57.880Z" + }, + "gpt-4.5-preview-2025-02-27": { + "provider": "openai", + "description": "OpenAI's largest pretrained model before the GPT-5 series, emphasizing broad knowledge, creative writing, and improved emotional intelligence over reasoning-focused models.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-02-27", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-07-14", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:07:57.880Z" + }, + "gpt-4o": { + "provider": "openai", + "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "audio_input", + "audio_output", + "fine_tunable" + ], + "releaseDate": "2024-05-13", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:07:31.638Z" + }, + "gpt-4o-2024-05-13": { + "provider": "openai", + "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "audio_input", + "audio_output", + "fine_tunable" + ], + "releaseDate": "2024-05-13", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:07:31.638Z" + }, + "gpt-4o-2024-08-06": { + "provider": "openai", + "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "audio_input", + "audio_output", + "fine_tunable" + ], + "releaseDate": "2024-05-13", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:07:31.638Z" + }, + "gpt-4o-2024-11-20": { + "provider": "openai", + "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "audio_input", + "audio_output", + "fine_tunable" + ], + "releaseDate": "2024-05-13", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:07:31.638Z" + }, + "gpt-4o-audio-preview": { + "provider": "openai", + "description": "GPT-4o variant with native audio input and output capabilities via the Chat Completions API, supporting both text and audio modalities for conversational and voice-based applications.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "audio_input", + "audio_output", + "tool_use", + "streaming" + ], + "releaseDate": "2024-10-01", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-05-07", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:08:09.590Z" + }, + "gpt-4o-audio-preview-2024-10-01": { + "provider": "openai", + "description": "GPT-4o variant with native audio input and output capabilities via the Chat Completions API, supporting both text and audio modalities for conversational and voice-based applications.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "audio_input", + "audio_output", + "tool_use", + "streaming" + ], + "releaseDate": "2024-10-01", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-05-07", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:08:09.590Z" + }, + "gpt-4o-mini": { + "provider": "openai", + "description": "Fast, affordable small model optimized for focused tasks. Positioned as OpenAI's cost-efficient option with strong performance on benchmarks relative to its size.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2024-07-18", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:09:50.130Z" + }, + "gpt-4o-mini-2024-07-18": { + "provider": "openai", + "description": "Fast, affordable small model optimized for focused tasks. Positioned as OpenAI's cost-efficient option with strong performance on benchmarks relative to its size.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2024-07-18", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:09:50.130Z" + }, + "gpt-4o-realtime-preview": { + "provider": "openai", + "description": "OpenAI's real-time multimodal model capable of processing and generating both text and audio over WebRTC or WebSocket, enabling low-latency voice conversations and audio interactions.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "audio_input", + "audio_output", + "tool_use", + "streaming" + ], + "releaseDate": "2024-10-01", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": false, + "deprecationDate": "2026-05-07", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:09:35.495Z" + }, + "gpt-4o-realtime-preview-2024-10-01": { + "provider": "openai", + "description": "OpenAI's real-time multimodal model capable of processing and generating both text and audio over WebRTC or WebSocket, enabling low-latency voice conversations and audio interactions.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "audio_input", + "audio_output", + "tool_use", + "streaming" + ], + "releaseDate": "2024-10-01", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": false, + "deprecationDate": "2026-05-07", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:09:35.495Z" + }, + "gpt-5": { + "provider": "openai", + "description": "OpenAI's flagship reasoning model released August 2025, featuring a 400K token context window with strong coding, reasoning, and agentic capabilities.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "image_generation", + "code_execution" + ], + "releaseDate": "2025-08-07", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-09-30", + "resolvedAt": "2026-03-24T11:09:28.216Z" + }, + "gpt-5-2025-08-07": { + "provider": "openai", + "description": "OpenAI's flagship reasoning model released August 2025, featuring a 400K token context window with strong coding, reasoning, and agentic capabilities.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "image_generation", + "code_execution" + ], + "releaseDate": "2025-08-07", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-09-30", + "resolvedAt": "2026-03-24T11:09:28.216Z" + }, + "gpt-5-chat-latest": { + "provider": "openai", + "description": "Non-reasoning GPT-5 model used in ChatGPT, optimized for conversational tasks. Supports text and image inputs with function calling and structured outputs.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-08-07", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-09-30", + "resolvedAt": "2026-03-24T11:09:24.834Z" + }, + "gpt-5-mini": { + "provider": "openai", + "description": "A faster, more cost-efficient version of GPT-5 designed for well-defined tasks and precise prompts. Supports reasoning with configurable effort levels and offers reduced latency compared to the full GPT-5 model.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-08-07", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-05-31", + "resolvedAt": "2026-03-24T11:09:42.822Z" + }, + "gpt-5-mini-2025-08-07": { + "provider": "openai", + "description": "A faster, more cost-efficient version of GPT-5 designed for well-defined tasks and precise prompts. Supports reasoning with configurable effort levels and offers reduced latency compared to the full GPT-5 model.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-08-07", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-05-31", + "resolvedAt": "2026-03-24T11:09:42.822Z" + }, + "gpt-5-nano": { + "provider": "openai", + "description": "The smallest and fastest variant in the GPT-5 family, optimized for developer tools, rapid interactions, and ultra-low latency environments. Best suited for classification, data extraction, ranking, and sub-agent tasks.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-08-07", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-05-31", + "resolvedAt": "2026-03-24T11:11:24.884Z" + }, + "gpt-5-nano-2025-08-07": { + "provider": "openai", + "description": "The smallest and fastest variant in the GPT-5 family, optimized for developer tools, rapid interactions, and ultra-low latency environments. Best suited for classification, data extraction, ranking, and sub-agent tasks.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-08-07", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-05-31", + "resolvedAt": "2026-03-24T11:11:24.884Z" + }, + "gpt-5-pro": { + "provider": "openai", + "description": "OpenAI's enhanced GPT-5 variant optimized for complex tasks requiring step-by-step reasoning, with reduced hallucination and improved code quality compared to the base GPT-5.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-10-06", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-10-01", + "resolvedAt": "2026-03-24T11:11:37.048Z" + }, + "gpt-5-pro-2025-10-06": { + "provider": "openai", + "description": "OpenAI's enhanced GPT-5 variant optimized for complex tasks requiring step-by-step reasoning, with reduced hallucination and improved code quality compared to the base GPT-5.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-10-06", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-10-01", + "resolvedAt": "2026-03-24T11:11:37.048Z" + }, + "gpt-5.1": { + "provider": "openai", + "description": "GPT-5.1 is OpenAI's frontier-grade model in the GPT-5 series, offering adaptive reasoning with configurable effort levels, improved coding and math performance, and a more natural conversational style compared to GPT-5.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-11-13", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-09-30", + "resolvedAt": "2026-03-24T11:11:47.327Z" + }, + "gpt-5.1-2025-11-13": { + "provider": "openai", + "description": "GPT-5.1 is OpenAI's frontier-grade model in the GPT-5 series, offering adaptive reasoning with configurable effort levels, improved coding and math performance, and a more natural conversational style compared to GPT-5.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-11-13", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-09-30", + "resolvedAt": "2026-03-24T11:11:47.327Z" + }, + "gpt-5.2": { + "provider": "openai", + "description": "OpenAI's flagship multimodal model released December 2025, excelling at long-context reasoning, agentic tool use, software engineering, and professional knowledge work. Available in Instant, Thinking, and Pro variants.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-12-11", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:11:13.129Z" + }, + "gpt-5.2-2025-12-11": { + "provider": "openai", + "description": "OpenAI's flagship multimodal model released December 2025, excelling at long-context reasoning, agentic tool use, software engineering, and professional knowledge work. Available in Instant, Thinking, and Pro variants.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-12-11", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:11:13.129Z" + }, + "gpt-5.2-pro": { + "provider": "openai", + "description": "OpenAI's previous pro-tier reasoning model optimized for complex professional work requiring step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. Superseded by GPT-5.4 pro.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "extended_thinking" + ], + "releaseDate": "2025-12-11", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:11:12.711Z" + }, + "gpt-5.2-pro-2025-12-11": { + "provider": "openai", + "description": "OpenAI's previous pro-tier reasoning model optimized for complex professional work requiring step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. Superseded by GPT-5.4 pro.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "extended_thinking" + ], + "releaseDate": "2025-12-11", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:11:12.711Z" + }, + "gpt-5.4": { + "provider": "openai", + "description": "OpenAI's most capable frontier model as of March 2026, featuring state-of-the-art coding, native computer-use capabilities, and a 1M-token context window for professional and agentic workflows.", + "contextWindow": 1050000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "code_execution" + ], + "releaseDate": "2026-03-05", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:09.220Z" + }, + "gpt-5.4-2026-03-05": { + "provider": "openai", + "description": "OpenAI's most capable frontier model as of March 2026, featuring state-of-the-art coding, native computer-use capabilities, and a 1M-token context window for professional and agentic workflows.", + "contextWindow": 1050000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "code_execution" + ], + "releaseDate": "2026-03-05", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:09.220Z" + }, + "gpt-5.4-mini": { + "provider": "openai", + "description": "OpenAI's fast and efficient small model from the GPT-5.4 family, designed for high-volume workloads. Approaches GPT-5.4 performance on coding and reasoning while running over 2x faster than GPT-5 mini.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2026-03-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:35.473Z" + }, + "gpt-5.4-mini-2026-03-17": { + "provider": "openai", + "description": "OpenAI's fast and efficient small model from the GPT-5.4 family, designed for high-volume workloads. Approaches GPT-5.4 performance on coding and reasoning while running over 2x faster than GPT-5 mini.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2026-03-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:35.473Z" + }, + "gpt-5.4-nano": { + "provider": "openai", + "description": "OpenAI's cheapest GPT-5.4-class model optimized for simple high-volume tasks like classification, data extraction, ranking, and sub-agent delegation in agentic workflows.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2026-03-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:52.285Z" + }, + "gpt-5.4-nano-2026-03-17": { + "provider": "openai", + "description": "OpenAI's cheapest GPT-5.4-class model optimized for simple high-volume tasks like classification, data extraction, ranking, and sub-agent delegation in agentic workflows.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2026-03-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:52.285Z" + }, + "gpt-5.4-pro": { + "provider": "openai", + "description": "OpenAI's highest-capability GPT-5.4 variant, using additional compute for harder problems. Available via Responses API only, designed for complex reasoning, coding, and agentic workflows.", + "contextWindow": 1050000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "extended_thinking" + ], + "releaseDate": "2026-03-05", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:56.903Z" + }, + "gpt-5.4-pro-2026-03-05": { + "provider": "openai", + "description": "OpenAI's highest-capability GPT-5.4 variant, using additional compute for harder problems. Available via Responses API only, designed for complex reasoning, coding, and agentic workflows.", + "contextWindow": 1050000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "extended_thinking" + ], + "releaseDate": "2026-03-05", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:56.903Z" + }, + "o1": { + "provider": "openai", + "description": "OpenAI's reasoning model designed for complex tasks requiring multi-step logical thinking, excelling at math, science, and coding problems.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-12-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:12:23.948Z" + }, + "o1-2024-12-17": { + "provider": "openai", + "description": "OpenAI's reasoning model designed for complex tasks requiring multi-step logical thinking, excelling at math, science, and coding problems.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-12-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:12:23.948Z" + }, + "o1-mini": { + "provider": "openai", + "description": "A smaller, faster, and cheaper reasoning model in OpenAI's o1 series, optimized for coding, math, and science tasks requiring multi-step reasoning.", + "contextWindow": 128000, + "maxOutputTokens": 65536, + "capabilities": [ + "streaming", + "json_mode" + ], + "releaseDate": "2024-09-12", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-06-30", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:12:37.030Z" + }, + "o1-mini-2024-09-12": { + "provider": "openai", + "description": "A smaller, faster, and cheaper reasoning model in OpenAI's o1 series, optimized for coding, math, and science tasks requiring multi-step reasoning.", + "contextWindow": 128000, + "maxOutputTokens": 65536, + "capabilities": [ + "streaming", + "json_mode" + ], + "releaseDate": "2024-09-12", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-06-30", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:12:37.030Z" + }, + "o1-preview": { + "provider": "openai", + "description": "OpenAI's first reasoning model using chain-of-thought to solve complex problems in science, coding, and math. Predecessor to o1 and o3 series.", + "contextWindow": 128000, + "maxOutputTokens": 32768, + "capabilities": [ + "streaming" + ], + "releaseDate": "2024-09-12", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-10-31", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:12:59.198Z" + }, + "o1-preview-2024-09-12": { + "provider": "openai", + "description": "OpenAI's first reasoning model using chain-of-thought to solve complex problems in science, coding, and math. Predecessor to o1 and o3 series.", + "contextWindow": 128000, + "maxOutputTokens": 32768, + "capabilities": [ + "streaming" + ], + "releaseDate": "2024-09-12", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-10-31", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:12:59.198Z" + }, + "o1-pro": { + "provider": "openai", + "description": "A version of OpenAI's o1 reasoning model that uses significantly more compute to deliver better, more consistent answers on complex reasoning tasks in science, coding, and math.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-03-19", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:13:57.532Z" + }, + "o1-pro-2025-03-19": { + "provider": "openai", + "description": "A version of OpenAI's o1 reasoning model that uses significantly more compute to deliver better, more consistent answers on complex reasoning tasks in science, coding, and math.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-03-19", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:13:57.532Z" + }, + "o3": { + "provider": "openai", + "description": "OpenAI's advanced reasoning model designed for complex tasks requiring deep reasoning, excelling at software engineering, mathematics, scientific reasoning, and visual reasoning tasks.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-04-16", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:14:04.906Z" + }, + "o3-2025-04-16": { + "provider": "openai", + "description": "OpenAI's advanced reasoning model designed for complex tasks requiring deep reasoning, excelling at software engineering, mathematics, scientific reasoning, and visual reasoning tasks.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-04-16", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:14:04.906Z" + }, + "o3-mini": { + "provider": "openai", + "description": "OpenAI's compact reasoning model optimized for STEM tasks, offering strong performance in math, science, and coding at lower cost than o3.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-01-31", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:13:33.788Z" + }, + "o3-mini-2025-01-31": { + "provider": "openai", + "description": "OpenAI's compact reasoning model optimized for STEM tasks, offering strong performance in math, science, and coding at lower cost than o3.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-01-31", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:13:33.788Z" + }, + "o3-pro": { + "provider": "openai", + "description": "OpenAI's most reliable reasoning model, a version of o3 designed to think longer and provide more consistently accurate answers for challenging math, science, and coding problems.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-06-10", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:14:10.900Z" + }, + "o3-pro-2025-06-10": { + "provider": "openai", + "description": "OpenAI's most reliable reasoning model, a version of o3 designed to think longer and provide more consistently accurate answers for challenging math, science, and coding problems.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-06-10", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:14:10.900Z" + }, + "o4-mini": { + "provider": "openai", + "description": "OpenAI's small reasoning model optimized for fast, cost-efficient reasoning with strong performance in math, coding, and visual tasks.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-04-16", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:14:16.050Z" + }, + "o4-mini-2025-04-16": { + "provider": "openai", + "description": "OpenAI's small reasoning model optimized for fast, cost-efficient reasoning with strong performance in math, coding, and visual tasks.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-04-16", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:14:16.050Z" + } +} diff --git a/internal-packages/llm-model-catalog/src/modelCatalog.ts b/internal-packages/llm-model-catalog/src/modelCatalog.ts new file mode 100644 index 00000000000..71ae921c3e7 --- /dev/null +++ b/internal-packages/llm-model-catalog/src/modelCatalog.ts @@ -0,0 +1,2572 @@ +import type { ModelCatalogEntry } from "./types.js"; + +// Auto-generated from model-catalog.json — do not edit manually. +// Run `pnpm run generate-catalog` to update the JSON, then `pnpm run generate` to regenerate. + +export const modelCatalog: Record = { + "chatgpt-4o-latest": { + "provider": "openai", + "description": "OpenAI's flagship multimodal model optimized for speed and cost, capable of processing text, images, and audio with strong performance across reasoning, coding, and creative tasks.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "audio_input", + "audio_output", + "fine_tunable" + ], + "releaseDate": "2024-05-13", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T10:55:46.469Z", + "baseModelName": "chatgpt-4o" + }, + "claude-1.1": { + "provider": "anthropic", + "description": "An early-generation Claude model from Anthropic, offering basic conversational and text completion capabilities. It was quickly superseded by Claude 1.2, 1.3, and the Claude 2 family.", + "contextWindow": 9000, + "maxOutputTokens": 8191, + "capabilities": [ + "streaming" + ], + "releaseDate": "2023-03-14", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": null, + "resolvedAt": "2026-03-24T10:55:47.906Z", + "baseModelName": null + }, + "claude-1.2": { + "provider": "anthropic", + "description": "An early-generation Anthropic model, part of the original Claude 1.x family. It offered improved performance over Claude 1.0 but was quickly superseded by Claude 1.3 and later model families.", + "contextWindow": 9000, + "maxOutputTokens": 8191, + "capabilities": [ + "streaming" + ], + "releaseDate": null, + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": null, + "resolvedAt": "2026-03-24T10:55:46.760Z", + "baseModelName": null + }, + "claude-1.3": { + "provider": "anthropic", + "description": "Early-generation Claude model from Anthropic, offering improved performance over Claude 1.0-1.2 in reasoning and instruction-following tasks.", + "contextWindow": 100000, + "maxOutputTokens": null, + "capabilities": [ + "streaming" + ], + "releaseDate": "2023-03-14", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": null, + "resolvedAt": "2026-03-24T10:55:46.227Z", + "baseModelName": null + }, + "claude-2.0": { + "provider": "anthropic", + "description": "Anthropic's second-generation large language model, offering improved performance over Claude 1.x with longer context support. Succeeded by Claude 2.1 and later the Claude 3 family.", + "contextWindow": 100000, + "maxOutputTokens": 4096, + "capabilities": [ + "streaming" + ], + "releaseDate": "2023-07-11", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2023-02-01", + "resolvedAt": "2026-03-24T10:55:45.922Z", + "baseModelName": null + }, + "claude-2.1": { + "provider": "anthropic", + "description": "Anthropic's Claude 2.1 model featuring a 200K context window, reduced hallucination rates compared to Claude 2.0, and improved accuracy on long document comprehension.", + "contextWindow": 200000, + "maxOutputTokens": 4096, + "capabilities": [ + "streaming", + "tool_use" + ], + "releaseDate": "2023-11-21", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2023-01-01", + "resolvedAt": "2026-03-24T10:56:22.743Z", + "baseModelName": null + }, + "claude-3-5-haiku-20241022": { + "provider": "anthropic", + "description": "Anthropic's fastest and most cost-effective model in the Claude 3.5 family, optimized for speed and efficiency while maintaining strong performance across common tasks.", + "contextWindow": 200000, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-10-22", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-07-01", + "resolvedAt": "2026-03-24T10:56:25.724Z", + "baseModelName": "claude-3-5-haiku" + }, + "claude-3-5-sonnet-20240620": { + "provider": "anthropic", + "description": "Anthropic's Claude 3.5 Sonnet is a mid-tier model balancing intelligence and speed, excelling at coding, analysis, and vision tasks while being faster and cheaper than Opus.", + "contextWindow": 200000, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-06-20", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-04-01", + "resolvedAt": "2026-03-24T10:56:35.401Z", + "baseModelName": "claude-3-5-sonnet" + }, + "claude-3-haiku-20240307": { + "provider": "anthropic", + "description": "Anthropic's fastest and most compact Claude 3 model, optimized for speed and cost-efficiency while maintaining strong performance on everyday tasks.", + "contextWindow": 200000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-03-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-08-01", + "resolvedAt": "2026-03-24T10:56:25.288Z", + "baseModelName": "claude-3-haiku" + }, + "claude-3-opus-20240229": { + "provider": "anthropic", + "description": "Anthropic's most capable model in the Claude 3 family, excelling at complex analysis, nuanced content generation, and advanced reasoning tasks.", + "contextWindow": 200000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-03-04", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-08-01", + "resolvedAt": "2026-03-24T10:56:26.008Z", + "baseModelName": "claude-3-opus" + }, + "claude-3-sonnet-20240229": { + "provider": "anthropic", + "description": "Mid-tier model in Anthropic's Claude 3 family, balancing performance and speed for a wide range of tasks including analysis, coding, and content generation.", + "contextWindow": 200000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-03-04", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-02-01", + "resolvedAt": "2026-03-24T10:56:59.532Z", + "baseModelName": "claude-3-sonnet" + }, + "claude-3.5-haiku-latest": { + "provider": "anthropic", + "description": "Anthropic's fastest and most cost-effective model in the Claude 3.5 family, optimized for speed and efficiency while maintaining strong performance across a wide range of tasks.", + "contextWindow": 200000, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-10-29", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-07-01", + "resolvedAt": "2026-03-24T10:57:04.392Z", + "baseModelName": "claude-3.5-haiku" + }, + "claude-3.5-sonnet-20241022": { + "provider": "anthropic", + "description": "Anthropic's mid-tier model offering strong reasoning, coding, and analysis capabilities at a balance of speed and intelligence, positioned between Haiku and Opus in the Claude 3.5 family.", + "contextWindow": 200000, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-06-20", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-04-01", + "resolvedAt": "2026-03-24T10:57:13.346Z", + "baseModelName": "claude-3.5-sonnet" + }, + "claude-3.5-sonnet-latest": { + "provider": "anthropic", + "description": "Anthropic's mid-tier model offering strong reasoning, coding, and analysis capabilities at a balance of speed and intelligence, positioned between Haiku and Opus in the Claude 3.5 family.", + "contextWindow": 200000, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-06-20", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-04-01", + "resolvedAt": "2026-03-24T10:57:13.346Z", + "baseModelName": "claude-3.5-sonnet" + }, + "claude-3.7-sonnet-20250219": { + "provider": "anthropic", + "description": "Anthropic's Claude 3.7 Sonnet is a hybrid reasoning model that introduced extended thinking capabilities, offering strong performance on coding, math, and complex reasoning tasks.", + "contextWindow": 200000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-02-24", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-04-01", + "resolvedAt": "2026-03-24T10:57:12.967Z", + "baseModelName": "claude-3.7-sonnet" + }, + "claude-3.7-sonnet-latest": { + "provider": "anthropic", + "description": "Anthropic's Claude 3.7 Sonnet is a hybrid reasoning model that introduced extended thinking capabilities, offering strong performance on coding, math, and complex reasoning tasks.", + "contextWindow": 200000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-02-24", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-04-01", + "resolvedAt": "2026-03-24T10:57:12.967Z", + "baseModelName": "claude-3.7-sonnet" + }, + "claude-haiku-4-5-20251001": { + "provider": "anthropic", + "description": "Anthropic's fastest model with near-frontier intelligence, optimized for speed and cost efficiency while supporting extended thinking and vision.", + "contextWindow": 200000, + "maxOutputTokens": 64000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-10-01", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-07-01", + "resolvedAt": "2026-03-24T10:57:29.685Z", + "baseModelName": "claude-haiku-4-5" + }, + "claude-instant-1": { + "provider": "anthropic", + "description": "Anthropic's fast and cost-effective model optimized for speed and efficiency, positioned as a lighter alternative to Claude 1.x for tasks requiring lower latency.", + "contextWindow": 100000, + "maxOutputTokens": 8191, + "capabilities": [ + "streaming" + ], + "releaseDate": "2023-03-14", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-01-06", + "knowledgeCutoff": "2023-01-01", + "resolvedAt": "2026-03-24T10:57:36.888Z", + "baseModelName": null + }, + "claude-instant-1.2": { + "provider": "anthropic", + "description": "Anthropic's fast and cost-effective model, optimized for speed and efficiency while maintaining strong performance on conversational and text generation tasks.", + "contextWindow": 100000, + "maxOutputTokens": 8191, + "capabilities": [ + "streaming" + ], + "releaseDate": "2023-08-09", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2023-01-01", + "resolvedAt": "2026-03-24T10:57:41.865Z", + "baseModelName": null + }, + "claude-opus-4-1-20250805": { + "provider": "anthropic", + "description": "Anthropic's hybrid reasoning model with strong software engineering and agentic capabilities, scoring 74.5% on SWE-bench Verified. Supports both rapid responses and step-by-step extended thinking.", + "contextWindow": 200000, + "maxOutputTokens": 32000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-08-05", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-03-01", + "resolvedAt": "2026-03-24T10:58:36.876Z", + "baseModelName": "claude-opus-4-1" + }, + "claude-opus-4-20250514": { + "provider": "anthropic", + "description": "Anthropic's flagship model from the Claude 4 family, excelling at complex coding tasks, long-running agent workflows, and deep reasoning with extended thinking support.", + "contextWindow": 200000, + "maxOutputTokens": 32000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-05-14", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-03-01", + "resolvedAt": "2026-03-24T10:58:47.518Z", + "baseModelName": "claude-opus-4" + }, + "claude-opus-4-5-20251101": { + "provider": "anthropic", + "description": "Anthropic's flagship intelligence model released in November 2025, excelling at complex reasoning, vision, and extended thinking with the best performance in Anthropic's lineup before Opus 4.6.", + "contextWindow": 200000, + "maxOutputTokens": 64000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-11-01", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-03-01", + "resolvedAt": "2026-03-24T10:58:48.961Z", + "baseModelName": "claude-opus-4-5" + }, + "claude-opus-4-6": { + "provider": "anthropic", + "description": "Anthropic's most intelligent model, optimized for building agents and coding with exceptional reasoning capabilities and extended agentic task horizons.", + "contextWindow": 1000000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2026-02-05", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-05-01", + "resolvedAt": "2026-03-24T10:58:42.061Z", + "baseModelName": null + }, + "claude-sonnet-4-20250514": { + "provider": "anthropic", + "description": "Anthropic's balanced Claude 4 model offering strong coding, reasoning, and multilingual performance at moderate cost. Now a legacy model superseded by Claude Sonnet 4.5 and 4.6.", + "contextWindow": 200000, + "maxOutputTokens": 64000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-05-14", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-03-01", + "resolvedAt": "2026-03-24T10:58:39.601Z", + "baseModelName": "claude-sonnet-4" + }, + "claude-sonnet-4-5-20250929": { + "provider": "anthropic", + "description": "Anthropic's high-performance mid-tier model with strong coding, reasoning, and multi-step problem solving capabilities. Successor to Claude Sonnet 4, offering improved benchmarks at the same price point.", + "contextWindow": 200000, + "maxOutputTokens": 64000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-09-29", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T10:59:54.426Z", + "baseModelName": "claude-sonnet-4-5" + }, + "claude-sonnet-4-6": { + "provider": "anthropic", + "description": "Anthropic's best combination of speed and intelligence, excelling at coding, agentic tasks, and computer use, with a 1M token context window and performance rivaling prior Opus-class models.", + "contextWindow": 1000000, + "maxOutputTokens": 64000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2026-02-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2026-01-01", + "resolvedAt": "2026-03-24T10:59:59.014Z", + "baseModelName": null + }, + "claude-sonnet-4-latest": { + "provider": "anthropic", + "description": "Anthropic's balanced Claude 4 model offering strong coding, reasoning, and multilingual performance at moderate cost. Now a legacy model superseded by Claude Sonnet 4.5 and 4.6.", + "contextWindow": 200000, + "maxOutputTokens": 64000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-05-14", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-03-01", + "resolvedAt": "2026-03-24T10:58:39.601Z", + "baseModelName": "claude-sonnet-4" + }, + "gemini-1.0-pro": { + "provider": "google", + "description": "Google's first-generation Gemini Pro model, a mid-size multimodal model designed for text generation, reasoning, and chat applications. Succeeded by Gemini 1.5 Pro.", + "contextWindow": 32760, + "maxOutputTokens": 8192, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-12-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-02-15", + "knowledgeCutoff": "2023-04-01", + "resolvedAt": "2026-03-24T10:59:26.767Z", + "baseModelName": null + }, + "gemini-1.0-pro-001": { + "provider": "google", + "description": "Google's first-generation Pro model optimized for text generation, reasoning, and multi-turn conversation tasks, part of the original Gemini 1.0 lineup.", + "contextWindow": 30720, + "maxOutputTokens": 2048, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-02-15", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-02-15", + "knowledgeCutoff": "2023-04-01", + "resolvedAt": "2026-03-24T10:59:27.391Z", + "baseModelName": null + }, + "gemini-1.0-pro-latest": { + "provider": "google", + "description": "Google's first-generation Gemini Pro model, a mid-size multimodal model designed for text generation, reasoning, and chat applications. Succeeded by Gemini 1.5 Pro.", + "contextWindow": 32760, + "maxOutputTokens": 8192, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-12-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-02-15", + "knowledgeCutoff": "2023-04-01", + "resolvedAt": "2026-03-24T10:59:26.767Z", + "baseModelName": "gemini-1.0-pro" + }, + "gemini-1.5-pro-latest": { + "provider": "google", + "description": "Google's mid-size multimodal model with a massive context window, strong at long-document understanding, code generation, and multi-turn conversation.", + "contextWindow": 2097152, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "audio_input" + ], + "releaseDate": "2024-02-15", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-09-24", + "knowledgeCutoff": "2024-04-01", + "resolvedAt": "2026-03-24T10:59:25.463Z", + "baseModelName": "gemini-1.5-pro" + }, + "gemini-2.0-flash": { + "provider": "google", + "description": "Google's second-generation workhorse model optimized for speed, with native tool use, multimodal input (text, images, audio, video), and a 1M token context window.", + "contextWindow": 1048576, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "code_execution", + "audio_input" + ], + "releaseDate": "2025-02-05", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-06-01", + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:01:15.429Z", + "baseModelName": null + }, + "gemini-2.0-flash-001": { + "provider": "google", + "description": "Google's fast and efficient multimodal model that outperforms Gemini 1.5 Pro on key benchmarks at twice the speed, supporting text, image, audio, and video inputs with native tool use.", + "contextWindow": 1048576, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "audio_input", + "image_generation", + "audio_output", + "code_execution" + ], + "releaseDate": "2025-02-05", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-06-01", + "knowledgeCutoff": "2024-08-01", + "resolvedAt": "2026-03-24T11:01:04.084Z", + "baseModelName": null + }, + "gemini-2.0-flash-lite-preview": { + "provider": "google", + "description": "A lightweight, cost-efficient variant of Gemini 2.0 Flash optimized for low latency and high throughput, supporting multimodal input with text output.", + "contextWindow": 1048576, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-02-05", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-06-01", + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:00:56.775Z", + "baseModelName": null + }, + "gemini-2.0-flash-lite-preview-02-05": { + "provider": "google", + "description": "Google's cost-optimized, low-latency model in the Gemini 2.0 family, designed for high-volume tasks like summarization, multimodal processing, and categorization.", + "contextWindow": 1048576, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-02-05", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-12-09", + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:01:34.165Z", + "baseModelName": null + }, + "gemini-2.5-flash": { + "provider": "google", + "description": "Google's best price-performance model optimized for low-latency, high-volume tasks requiring reasoning, with built-in thinking capabilities and multimodal input support.", + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking", + "code_execution", + "audio_input" + ], + "releaseDate": "2025-06-01", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:01:25.200Z", + "baseModelName": null + }, + "gemini-2.5-flash-lite": { + "provider": "google", + "description": "Google's most cost-efficient Gemini model, optimized for low-latency use cases with strong reasoning, multilingual, and long-context capabilities at minimal cost.", + "contextWindow": 1048576, + "maxOutputTokens": 65535, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking", + "code_execution", + "audio_input" + ], + "releaseDate": "2025-07-22", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-07-22", + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:02:30.060Z", + "baseModelName": null + }, + "gemini-2.5-pro": { + "provider": "google", + "description": "Google's most advanced reasoning model with deep thinking capabilities, excelling at complex tasks like coding, math, and multimodal understanding across text, images, audio, and video.", + "contextWindow": 1048576, + "maxOutputTokens": 65535, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking", + "code_execution", + "audio_input" + ], + "releaseDate": "2025-03-25", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-06-17", + "knowledgeCutoff": "2025-01-31", + "resolvedAt": "2026-03-24T11:02:25.573Z", + "baseModelName": null + }, + "gemini-3-flash-preview": { + "provider": "google", + "description": "Google's high-speed thinking model that matches Gemini 2.5 Pro performance at ~3x faster speed and lower cost, designed for agentic workflows, multi-turn chat, and coding assistance with configurable reasoning levels.", + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking", + "code_execution", + "audio_input" + ], + "releaseDate": "2025-12-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:02:13.388Z", + "baseModelName": null + }, + "gemini-3-pro-preview": { + "provider": "google", + "description": "Google's flagship reasoning and multimodal model with strong coding and agentic capabilities, now deprecated in favor of Gemini 3.1 Pro.", + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking", + "code_execution", + "audio_input" + ], + "releaseDate": "2025-11-01", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-03-09", + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:02:29.313Z", + "baseModelName": null + }, + "gemini-3.1-flash-lite-preview": { + "provider": "google", + "description": "Google's most cost-efficient multimodal model in the Gemini 3 series, optimized for high-volume, low-latency tasks like translation, classification, and simple data extraction. Offers 2.5x faster time-to-first-token than Gemini 2.5 Flash.", + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking", + "code_execution", + "audio_input" + ], + "releaseDate": "2026-03-03", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:02:29.253Z", + "baseModelName": null + }, + "gemini-3.1-pro-preview": { + "provider": "google", + "description": "Google's most advanced reasoning model in the Gemini 3.1 family, excelling at complex problem-solving across text, audio, images, video, and code with a 1M token context window and extended thinking capabilities.", + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking", + "code_execution", + "audio_input" + ], + "releaseDate": "2026-02-19", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:03:33.071Z", + "baseModelName": null + }, + "gemini-pro": { + "provider": "google", + "description": "Google's first-generation Gemini model for text generation, reasoning, and multi-turn conversation. Superseded by Gemini 1.5 Pro and later models.", + "contextWindow": 32768, + "maxOutputTokens": 8192, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-12-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-04-09", + "knowledgeCutoff": "2023-04-01", + "resolvedAt": "2026-03-24T11:03:45.401Z", + "baseModelName": null + }, + "gpt-3.5-turbo": { + "provider": "openai", + "description": "OpenAI's fast and cost-effective model optimized for chat and instruction-following tasks, now superseded by GPT-4o mini.", + "contextWindow": 16385, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2023-03-01", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-09-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:03:11.412Z", + "baseModelName": null + }, + "gpt-3.5-turbo-0125": { + "provider": "openai", + "description": "A fast and cost-effective GPT-3.5 Turbo snapshot optimized for chat completions, offering improved accuracy for function calling and reduced instances of incomplete responses.", + "contextWindow": 16385, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2024-01-25", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-09-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:03:11.310Z", + "baseModelName": null + }, + "gpt-3.5-turbo-0301": { + "provider": "openai", + "description": "Early snapshot of GPT-3.5 Turbo, OpenAI's first ChatGPT-optimized model for chat completions. Fast and cost-effective for simple tasks but superseded by later revisions.", + "contextWindow": 4096, + "maxOutputTokens": 4096, + "capabilities": [ + "streaming", + "fine_tunable" + ], + "releaseDate": "2023-03-01", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2024-06-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:03:12.060Z", + "baseModelName": null + }, + "gpt-3.5-turbo-0613": { + "provider": "openai", + "description": "A snapshot of GPT-3.5 Turbo from June 2023, optimized for chat and instruction-following tasks with function calling support.", + "contextWindow": 4096, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2023-06-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2024-09-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:04:04.463Z", + "baseModelName": null + }, + "gpt-3.5-turbo-1106": { + "provider": "openai", + "description": "A dated snapshot of GPT-3.5 Turbo released in November 2023, offering improved instruction following, JSON mode, and parallel function calling over previous GPT-3.5 variants.", + "contextWindow": 16385, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2023-11-06", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-09-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:04:23.054Z", + "baseModelName": null + }, + "gpt-3.5-turbo-16k": { + "provider": "openai", + "description": "Extended context version of GPT-3.5 Turbo with 16K token context window, offering the same capabilities as the base model but able to process longer inputs.", + "contextWindow": 16384, + "maxOutputTokens": 4096, + "capabilities": [ + "streaming", + "json_mode", + "fine_tunable", + "tool_use" + ], + "releaseDate": "2023-06-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-09-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:04:36.307Z", + "baseModelName": null + }, + "gpt-3.5-turbo-16k-0613": { + "provider": "openai", + "description": "Extended context window variant of GPT-3.5 Turbo with 16K token context, snapshot from June 2023. Optimized for chat completions with longer document processing.", + "contextWindow": 16384, + "maxOutputTokens": 4096, + "capabilities": [ + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2023-06-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2024-09-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:04:22.894Z", + "baseModelName": null + }, + "gpt-3.5-turbo-instruct": { + "provider": "openai", + "description": "OpenAI's GPT-3.5 Turbo Instruct is a completions-only model (not chat) optimized for following explicit instructions, replacing the legacy text-davinci-003 model.", + "contextWindow": 4096, + "maxOutputTokens": 4096, + "capabilities": [ + "fine_tunable" + ], + "releaseDate": "2023-09-19", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-01-27", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:04:22.309Z", + "baseModelName": null + }, + "gpt-4": { + "provider": "openai", + "description": "OpenAI's flagship large language model that preceded GPT-4o, known for strong reasoning and instruction-following capabilities across a wide range of tasks.", + "contextWindow": 8192, + "maxOutputTokens": 8192, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-03-14", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-06-06", + "knowledgeCutoff": "2023-12-01", + "resolvedAt": "2026-03-24T11:04:36.773Z", + "baseModelName": null + }, + "gpt-4-0125-preview": { + "provider": "openai", + "description": "An improved GPT-4 Turbo preview model with better task completion, reduced laziness in code generation, and enhanced instruction following.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-01-25", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-12-01", + "resolvedAt": "2026-03-24T11:04:54.196Z", + "baseModelName": null + }, + "gpt-4-0314": { + "provider": "openai", + "description": "Original GPT-4 snapshot from March 2023, a large multimodal model (text-only at launch) that was one of OpenAI's first GPT-4 releases. Now deprecated and replaced by newer GPT-4 variants.", + "contextWindow": 8192, + "maxOutputTokens": 4096, + "capabilities": [ + "streaming" + ], + "releaseDate": "2023-03-14", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2024-06-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:05:14.112Z", + "baseModelName": null + }, + "gpt-4-0613": { + "provider": "openai", + "description": "A snapshot of GPT-4 from June 2023, offering strong reasoning and instruction-following capabilities. It was one of the first widely available GPT-4 variants with function calling support.", + "contextWindow": 8192, + "maxOutputTokens": 8192, + "capabilities": [ + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2023-06-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-06-06", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:05:13.885Z", + "baseModelName": null + }, + "gpt-4-1106-preview": { + "provider": "openai", + "description": "GPT-4 Turbo preview model with 128K context window, offering improved instruction following and JSON mode support at reduced cost compared to GPT-4.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-11-06", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-04-01", + "resolvedAt": "2026-03-24T11:05:12.960Z", + "baseModelName": null + }, + "gpt-4-32k": { + "provider": "openai", + "description": "Extended context window variant of GPT-4 with 32,768 token capacity, offering the same capabilities as GPT-4 but able to process longer documents and conversations.", + "contextWindow": 32768, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-03-14", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-06-06", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:05:14.584Z", + "baseModelName": null + }, + "gpt-4-32k-0314": { + "provider": "openai", + "description": "Extended context (32k token) variant of the original GPT-4 launch snapshot from March 2024, offering the same capabilities as gpt-4-0314 but with 4x the context window.", + "contextWindow": 32768, + "maxOutputTokens": 4096, + "capabilities": [ + "streaming" + ], + "releaseDate": "2023-03-14", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2024-06-13", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:05:32.044Z", + "baseModelName": null + }, + "gpt-4-32k-0613": { + "provider": "openai", + "description": "Extended context window variant of GPT-4 with 32,768 token context, based on the June 2023 snapshot. Offers the same capabilities as GPT-4 but with 4x the context length.", + "contextWindow": 32768, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-06-13", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-06-06", + "knowledgeCutoff": "2021-09-01", + "resolvedAt": "2026-03-24T11:05:53.070Z", + "baseModelName": null + }, + "gpt-4-preview": { + "provider": "openai", + "description": "GPT-4 Turbo preview model with 128K context window, JSON mode, and parallel function calling. A preview release in the GPT-4 Turbo series, now deprecated in favor of newer models.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-11-06", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-06-06", + "knowledgeCutoff": "2023-04-01", + "resolvedAt": "2026-03-24T11:06:54.248Z", + "baseModelName": null + }, + "gpt-4-turbo": { + "provider": "openai", + "description": "OpenAI's optimized GPT-4 variant offering faster inference and lower cost than the original GPT-4, with vision capabilities and a 128K context window.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-04-09", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-12-01", + "resolvedAt": "2026-03-24T11:05:51.415Z", + "baseModelName": null + }, + "gpt-4-turbo-2024-04-09": { + "provider": "openai", + "description": "OpenAI's optimized GPT-4 variant offering faster inference and lower cost than the original GPT-4, with vision capabilities and a 128K context window.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-04-09", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-12-01", + "resolvedAt": "2026-03-24T11:05:51.415Z", + "baseModelName": "gpt-4-turbo" + }, + "gpt-4-turbo-preview": { + "provider": "openai", + "description": "An early preview of GPT-4 Turbo with a 128K context window, offering improved instruction following and JSON mode support at reduced cost compared to GPT-4.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-01-25", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-12-01", + "resolvedAt": "2026-03-24T11:05:52.346Z", + "baseModelName": null + }, + "gpt-4-turbo-vision": { + "provider": "openai", + "description": "OpenAI's GPT-4 Turbo model with vision capabilities, able to analyze and understand images alongside text. It was a preview model later superseded by GPT-4 Turbo (gpt-4-turbo-2024-04-09) and then GPT-4o.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2023-11-06", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2024-12-06", + "knowledgeCutoff": "2023-04-01", + "resolvedAt": "2026-03-24T11:06:38.455Z", + "baseModelName": null + }, + "gpt-4.1": { + "provider": "openai", + "description": "OpenAI's flagship model optimized for coding, instruction following, and tool calling with a 1M token context window. Excels at structured outputs and long-context tasks.", + "contextWindow": 1047576, + "maxOutputTokens": 32768, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-04-14", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:07:00.439Z", + "baseModelName": null + }, + "gpt-4.1-2025-04-14": { + "provider": "openai", + "description": "OpenAI's flagship model optimized for coding, instruction following, and tool calling with a 1M token context window. Excels at structured outputs and long-context tasks.", + "contextWindow": 1047576, + "maxOutputTokens": 32768, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-04-14", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:07:00.439Z", + "baseModelName": "gpt-4.1" + }, + "gpt-4.1-mini": { + "provider": "openai", + "description": "A compact, cost-efficient model in OpenAI's GPT-4.1 family that matches or exceeds GPT-4o on many benchmarks while offering nearly half the latency and significantly lower cost.", + "contextWindow": 1000000, + "maxOutputTokens": 32768, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2025-04-14", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:08:14.524Z", + "baseModelName": null + }, + "gpt-4.1-mini-2025-04-14": { + "provider": "openai", + "description": "A compact, cost-efficient model in OpenAI's GPT-4.1 family that matches or exceeds GPT-4o on many benchmarks while offering nearly half the latency and significantly lower cost.", + "contextWindow": 1000000, + "maxOutputTokens": 32768, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2025-04-14", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:08:14.524Z", + "baseModelName": "gpt-4.1-mini" + }, + "gpt-4.1-nano": { + "provider": "openai", + "description": "OpenAI's fastest and most cost-effective model in the GPT-4.1 family, optimized for low-latency tasks like classification, autocompletion, and lightweight agentic workflows with strong instruction-following and tool-calling capabilities.", + "contextWindow": 1047576, + "maxOutputTokens": 32768, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-04-14", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:08:04.533Z", + "baseModelName": null + }, + "gpt-4.1-nano-2025-04-14": { + "provider": "openai", + "description": "OpenAI's fastest and most cost-effective model in the GPT-4.1 family, optimized for low-latency tasks like classification, autocompletion, and lightweight agentic workflows with strong instruction-following and tool-calling capabilities.", + "contextWindow": 1047576, + "maxOutputTokens": 32768, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-04-14", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:08:04.533Z", + "baseModelName": "gpt-4.1-nano" + }, + "gpt-4.5-preview": { + "provider": "openai", + "description": "OpenAI's largest pretrained model before the GPT-5 series, emphasizing broad knowledge, creative writing, and improved emotional intelligence over reasoning-focused models.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-02-27", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-07-14", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:07:57.880Z", + "baseModelName": null + }, + "gpt-4.5-preview-2025-02-27": { + "provider": "openai", + "description": "OpenAI's largest pretrained model before the GPT-5 series, emphasizing broad knowledge, creative writing, and improved emotional intelligence over reasoning-focused models.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-02-27", + "isHidden": true, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2025-07-14", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:07:57.880Z", + "baseModelName": "gpt-4.5-preview" + }, + "gpt-4o": { + "provider": "openai", + "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "audio_input", + "audio_output", + "fine_tunable" + ], + "releaseDate": "2024-05-13", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:07:31.638Z", + "baseModelName": null + }, + "gpt-4o-2024-05-13": { + "provider": "openai", + "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "audio_input", + "audio_output", + "fine_tunable" + ], + "releaseDate": "2024-05-13", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:07:31.638Z", + "baseModelName": "gpt-4o" + }, + "gpt-4o-2024-08-06": { + "provider": "openai", + "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "audio_input", + "audio_output", + "fine_tunable" + ], + "releaseDate": "2024-05-13", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:07:31.638Z", + "baseModelName": "gpt-4o" + }, + "gpt-4o-2024-11-20": { + "provider": "openai", + "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "audio_input", + "audio_output", + "fine_tunable" + ], + "releaseDate": "2024-05-13", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:07:31.638Z", + "baseModelName": "gpt-4o" + }, + "gpt-4o-audio-preview": { + "provider": "openai", + "description": "GPT-4o variant with native audio input and output capabilities via the Chat Completions API, supporting both text and audio modalities for conversational and voice-based applications.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "audio_input", + "audio_output", + "tool_use", + "streaming" + ], + "releaseDate": "2024-10-01", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-05-07", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:08:09.590Z", + "baseModelName": null + }, + "gpt-4o-audio-preview-2024-10-01": { + "provider": "openai", + "description": "GPT-4o variant with native audio input and output capabilities via the Chat Completions API, supporting both text and audio modalities for conversational and voice-based applications.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "audio_input", + "audio_output", + "tool_use", + "streaming" + ], + "releaseDate": "2024-10-01", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": "2026-05-07", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:08:09.590Z", + "baseModelName": "gpt-4o-audio-preview" + }, + "gpt-4o-mini": { + "provider": "openai", + "description": "Fast, affordable small model optimized for focused tasks. Positioned as OpenAI's cost-efficient option with strong performance on benchmarks relative to its size.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2024-07-18", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:09:50.130Z", + "baseModelName": null + }, + "gpt-4o-mini-2024-07-18": { + "provider": "openai", + "description": "Fast, affordable small model optimized for focused tasks. Positioned as OpenAI's cost-efficient option with strong performance on benchmarks relative to its size.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "fine_tunable" + ], + "releaseDate": "2024-07-18", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:09:50.130Z", + "baseModelName": "gpt-4o-mini" + }, + "gpt-4o-realtime-preview": { + "provider": "openai", + "description": "OpenAI's real-time multimodal model capable of processing and generating both text and audio over WebRTC or WebSocket, enabling low-latency voice conversations and audio interactions.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "audio_input", + "audio_output", + "tool_use", + "streaming" + ], + "releaseDate": "2024-10-01", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": false, + "deprecationDate": "2026-05-07", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:09:35.495Z", + "baseModelName": null + }, + "gpt-4o-realtime-preview-2024-10-01": { + "provider": "openai", + "description": "OpenAI's real-time multimodal model capable of processing and generating both text and audio over WebRTC or WebSocket, enabling low-latency voice conversations and audio interactions.", + "contextWindow": 128000, + "maxOutputTokens": 4096, + "capabilities": [ + "audio_input", + "audio_output", + "tool_use", + "streaming" + ], + "releaseDate": "2024-10-01", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": false, + "deprecationDate": "2026-05-07", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:09:35.495Z", + "baseModelName": "gpt-4o-realtime-preview" + }, + "gpt-5": { + "provider": "openai", + "description": "OpenAI's flagship reasoning model released August 2025, featuring a 400K token context window with strong coding, reasoning, and agentic capabilities.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "image_generation", + "code_execution" + ], + "releaseDate": "2025-08-07", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-09-30", + "resolvedAt": "2026-03-24T11:09:28.216Z", + "baseModelName": null + }, + "gpt-5-2025-08-07": { + "provider": "openai", + "description": "OpenAI's flagship reasoning model released August 2025, featuring a 400K token context window with strong coding, reasoning, and agentic capabilities.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "image_generation", + "code_execution" + ], + "releaseDate": "2025-08-07", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-09-30", + "resolvedAt": "2026-03-24T11:09:28.216Z", + "baseModelName": "gpt-5" + }, + "gpt-5-chat-latest": { + "provider": "openai", + "description": "Non-reasoning GPT-5 model used in ChatGPT, optimized for conversational tasks. Supports text and image inputs with function calling and structured outputs.", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-08-07", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-09-30", + "resolvedAt": "2026-03-24T11:09:24.834Z", + "baseModelName": "gpt-5-chat" + }, + "gpt-5-mini": { + "provider": "openai", + "description": "A faster, more cost-efficient version of GPT-5 designed for well-defined tasks and precise prompts. Supports reasoning with configurable effort levels and offers reduced latency compared to the full GPT-5 model.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-08-07", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-05-31", + "resolvedAt": "2026-03-24T11:09:42.822Z", + "baseModelName": null + }, + "gpt-5-mini-2025-08-07": { + "provider": "openai", + "description": "A faster, more cost-efficient version of GPT-5 designed for well-defined tasks and precise prompts. Supports reasoning with configurable effort levels and offers reduced latency compared to the full GPT-5 model.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-08-07", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-05-31", + "resolvedAt": "2026-03-24T11:09:42.822Z", + "baseModelName": "gpt-5-mini" + }, + "gpt-5-nano": { + "provider": "openai", + "description": "The smallest and fastest variant in the GPT-5 family, optimized for developer tools, rapid interactions, and ultra-low latency environments. Best suited for classification, data extraction, ranking, and sub-agent tasks.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-08-07", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-05-31", + "resolvedAt": "2026-03-24T11:11:24.884Z", + "baseModelName": null + }, + "gpt-5-nano-2025-08-07": { + "provider": "openai", + "description": "The smallest and fastest variant in the GPT-5 family, optimized for developer tools, rapid interactions, and ultra-low latency environments. Best suited for classification, data extraction, ranking, and sub-agent tasks.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-08-07", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-05-31", + "resolvedAt": "2026-03-24T11:11:24.884Z", + "baseModelName": "gpt-5-nano" + }, + "gpt-5-pro": { + "provider": "openai", + "description": "OpenAI's enhanced GPT-5 variant optimized for complex tasks requiring step-by-step reasoning, with reduced hallucination and improved code quality compared to the base GPT-5.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-10-06", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-10-01", + "resolvedAt": "2026-03-24T11:11:37.048Z", + "baseModelName": null + }, + "gpt-5-pro-2025-10-06": { + "provider": "openai", + "description": "OpenAI's enhanced GPT-5 variant optimized for complex tasks requiring step-by-step reasoning, with reduced hallucination and improved code quality compared to the base GPT-5.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-10-06", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-10-01", + "resolvedAt": "2026-03-24T11:11:37.048Z", + "baseModelName": "gpt-5-pro" + }, + "gpt-5.1": { + "provider": "openai", + "description": "GPT-5.1 is OpenAI's frontier-grade model in the GPT-5 series, offering adaptive reasoning with configurable effort levels, improved coding and math performance, and a more natural conversational style compared to GPT-5.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-11-13", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-09-30", + "resolvedAt": "2026-03-24T11:11:47.327Z", + "baseModelName": null + }, + "gpt-5.1-2025-11-13": { + "provider": "openai", + "description": "GPT-5.1 is OpenAI's frontier-grade model in the GPT-5 series, offering adaptive reasoning with configurable effort levels, improved coding and math performance, and a more natural conversational style compared to GPT-5.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2025-11-13", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-09-30", + "resolvedAt": "2026-03-24T11:11:47.327Z", + "baseModelName": "gpt-5.1" + }, + "gpt-5.2": { + "provider": "openai", + "description": "OpenAI's flagship multimodal model released December 2025, excelling at long-context reasoning, agentic tool use, software engineering, and professional knowledge work. Available in Instant, Thinking, and Pro variants.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-12-11", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:11:13.129Z", + "baseModelName": null + }, + "gpt-5.2-2025-12-11": { + "provider": "openai", + "description": "OpenAI's flagship multimodal model released December 2025, excelling at long-context reasoning, agentic tool use, software engineering, and professional knowledge work. Available in Instant, Thinking, and Pro variants.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-12-11", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:11:13.129Z", + "baseModelName": "gpt-5.2" + }, + "gpt-5.2-pro": { + "provider": "openai", + "description": "OpenAI's previous pro-tier reasoning model optimized for complex professional work requiring step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. Superseded by GPT-5.4 pro.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "extended_thinking" + ], + "releaseDate": "2025-12-11", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:11:12.711Z", + "baseModelName": null + }, + "gpt-5.2-pro-2025-12-11": { + "provider": "openai", + "description": "OpenAI's previous pro-tier reasoning model optimized for complex professional work requiring step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. Superseded by GPT-5.4 pro.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "extended_thinking" + ], + "releaseDate": "2025-12-11", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:11:12.711Z", + "baseModelName": "gpt-5.2-pro" + }, + "gpt-5.4": { + "provider": "openai", + "description": "OpenAI's most capable frontier model as of March 2026, featuring state-of-the-art coding, native computer-use capabilities, and a 1M-token context window for professional and agentic workflows.", + "contextWindow": 1050000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "code_execution" + ], + "releaseDate": "2026-03-05", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:09.220Z", + "baseModelName": null + }, + "gpt-5.4-2026-03-05": { + "provider": "openai", + "description": "OpenAI's most capable frontier model as of March 2026, featuring state-of-the-art coding, native computer-use capabilities, and a 1M-token context window for professional and agentic workflows.", + "contextWindow": 1050000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "code_execution" + ], + "releaseDate": "2026-03-05", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:09.220Z", + "baseModelName": "gpt-5.4" + }, + "gpt-5.4-mini": { + "provider": "openai", + "description": "OpenAI's fast and efficient small model from the GPT-5.4 family, designed for high-volume workloads. Approaches GPT-5.4 performance on coding and reasoning while running over 2x faster than GPT-5 mini.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2026-03-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:35.473Z", + "baseModelName": null + }, + "gpt-5.4-mini-2026-03-17": { + "provider": "openai", + "description": "OpenAI's fast and efficient small model from the GPT-5.4 family, designed for high-volume workloads. Approaches GPT-5.4 performance on coding and reasoning while running over 2x faster than GPT-5 mini.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2026-03-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:35.473Z", + "baseModelName": "gpt-5.4-mini" + }, + "gpt-5.4-nano": { + "provider": "openai", + "description": "OpenAI's cheapest GPT-5.4-class model optimized for simple high-volume tasks like classification, data extraction, ranking, and sub-agent delegation in agentic workflows.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2026-03-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:52.285Z", + "baseModelName": null + }, + "gpt-5.4-nano-2026-03-17": { + "provider": "openai", + "description": "OpenAI's cheapest GPT-5.4-class model optimized for simple high-volume tasks like classification, data extraction, ranking, and sub-agent delegation in agentic workflows.", + "contextWindow": 400000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2026-03-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:52.285Z", + "baseModelName": "gpt-5.4-nano" + }, + "gpt-5.4-pro": { + "provider": "openai", + "description": "OpenAI's highest-capability GPT-5.4 variant, using additional compute for harder problems. Available via Responses API only, designed for complex reasoning, coding, and agentic workflows.", + "contextWindow": 1050000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "extended_thinking" + ], + "releaseDate": "2026-03-05", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:56.903Z", + "baseModelName": null + }, + "gpt-5.4-pro-2026-03-05": { + "provider": "openai", + "description": "OpenAI's highest-capability GPT-5.4 variant, using additional compute for harder problems. Available via Responses API only, designed for complex reasoning, coding, and agentic workflows.", + "contextWindow": 1050000, + "maxOutputTokens": 128000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "extended_thinking" + ], + "releaseDate": "2026-03-05", + "isHidden": false, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2025-08-31", + "resolvedAt": "2026-03-24T11:12:56.903Z", + "baseModelName": "gpt-5.4-pro" + }, + "o1": { + "provider": "openai", + "description": "OpenAI's reasoning model designed for complex tasks requiring multi-step logical thinking, excelling at math, science, and coding problems.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-12-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:12:23.948Z", + "baseModelName": null + }, + "o1-2024-12-17": { + "provider": "openai", + "description": "OpenAI's reasoning model designed for complex tasks requiring multi-step logical thinking, excelling at math, science, and coding problems.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode" + ], + "releaseDate": "2024-12-17", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:12:23.948Z", + "baseModelName": "o1" + }, + "o1-mini": { + "provider": "openai", + "description": "A smaller, faster, and cheaper reasoning model in OpenAI's o1 series, optimized for coding, math, and science tasks requiring multi-step reasoning.", + "contextWindow": 128000, + "maxOutputTokens": 65536, + "capabilities": [ + "streaming", + "json_mode" + ], + "releaseDate": "2024-09-12", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-06-30", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:12:37.030Z", + "baseModelName": null + }, + "o1-mini-2024-09-12": { + "provider": "openai", + "description": "A smaller, faster, and cheaper reasoning model in OpenAI's o1 series, optimized for coding, math, and science tasks requiring multi-step reasoning.", + "contextWindow": 128000, + "maxOutputTokens": 65536, + "capabilities": [ + "streaming", + "json_mode" + ], + "releaseDate": "2024-09-12", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-06-30", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:12:37.030Z", + "baseModelName": "o1-mini" + }, + "o1-preview": { + "provider": "openai", + "description": "OpenAI's first reasoning model using chain-of-thought to solve complex problems in science, coding, and math. Predecessor to o1 and o3 series.", + "contextWindow": 128000, + "maxOutputTokens": 32768, + "capabilities": [ + "streaming" + ], + "releaseDate": "2024-09-12", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-10-31", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:12:59.198Z", + "baseModelName": null + }, + "o1-preview-2024-09-12": { + "provider": "openai", + "description": "OpenAI's first reasoning model using chain-of-thought to solve complex problems in science, coding, and math. Predecessor to o1 and o3 series.", + "contextWindow": 128000, + "maxOutputTokens": 32768, + "capabilities": [ + "streaming" + ], + "releaseDate": "2024-09-12", + "isHidden": true, + "supportsStructuredOutput": false, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": "2025-10-31", + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:12:59.198Z", + "baseModelName": "o1-preview" + }, + "o1-pro": { + "provider": "openai", + "description": "A version of OpenAI's o1 reasoning model that uses significantly more compute to deliver better, more consistent answers on complex reasoning tasks in science, coding, and math.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-03-19", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:13:57.532Z", + "baseModelName": null + }, + "o1-pro-2025-03-19": { + "provider": "openai", + "description": "A version of OpenAI's o1 reasoning model that uses significantly more compute to deliver better, more consistent answers on complex reasoning tasks in science, coding, and math.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-03-19", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2023-10-01", + "resolvedAt": "2026-03-24T11:13:57.532Z", + "baseModelName": "o1-pro" + }, + "o3": { + "provider": "openai", + "description": "OpenAI's advanced reasoning model designed for complex tasks requiring deep reasoning, excelling at software engineering, mathematics, scientific reasoning, and visual reasoning tasks.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-04-16", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:14:04.906Z", + "baseModelName": null + }, + "o3-2025-04-16": { + "provider": "openai", + "description": "OpenAI's advanced reasoning model designed for complex tasks requiring deep reasoning, excelling at software engineering, mathematics, scientific reasoning, and visual reasoning tasks.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-04-16", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:14:04.906Z", + "baseModelName": "o3" + }, + "o3-mini": { + "provider": "openai", + "description": "OpenAI's compact reasoning model optimized for STEM tasks, offering strong performance in math, science, and coding at lower cost than o3.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-01-31", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:13:33.788Z", + "baseModelName": null + }, + "o3-mini-2025-01-31": { + "provider": "openai", + "description": "OpenAI's compact reasoning model optimized for STEM tasks, offering strong performance in math, science, and coding at lower cost than o3.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-01-31", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2025-01-01", + "resolvedAt": "2026-03-24T11:13:33.788Z", + "baseModelName": "o3-mini" + }, + "o3-pro": { + "provider": "openai", + "description": "OpenAI's most reliable reasoning model, a version of o3 designed to think longer and provide more consistently accurate answers for challenging math, science, and coding problems.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-06-10", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:14:10.900Z", + "baseModelName": null + }, + "o3-pro-2025-06-10": { + "provider": "openai", + "description": "OpenAI's most reliable reasoning model, a version of o3 designed to think longer and provide more consistently accurate answers for challenging math, science, and coding problems.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-06-10", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": false, + "supportsStreamingToolCalls": false, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:14:10.900Z", + "baseModelName": "o3-pro" + }, + "o4-mini": { + "provider": "openai", + "description": "OpenAI's small reasoning model optimized for fast, cost-efficient reasoning with strong performance in math, coding, and visual tasks.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-04-16", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:14:16.050Z", + "baseModelName": null + }, + "o4-mini-2025-04-16": { + "provider": "openai", + "description": "OpenAI's small reasoning model optimized for fast, cost-efficient reasoning with strong performance in math, coding, and visual tasks.", + "contextWindow": 200000, + "maxOutputTokens": 100000, + "capabilities": [ + "vision", + "tool_use", + "streaming", + "json_mode", + "extended_thinking" + ], + "releaseDate": "2025-04-16", + "isHidden": false, + "supportsStructuredOutput": true, + "supportsParallelToolCalls": true, + "supportsStreamingToolCalls": true, + "deprecationDate": null, + "knowledgeCutoff": "2024-06-01", + "resolvedAt": "2026-03-24T11:14:16.050Z", + "baseModelName": "o4-mini" + } +}; diff --git a/internal-packages/llm-pricing/src/registry.test.ts b/internal-packages/llm-model-catalog/src/registry.test.ts similarity index 100% rename from internal-packages/llm-pricing/src/registry.test.ts rename to internal-packages/llm-model-catalog/src/registry.test.ts diff --git a/internal-packages/llm-pricing/src/registry.ts b/internal-packages/llm-model-catalog/src/registry.ts similarity index 100% rename from internal-packages/llm-pricing/src/registry.ts rename to internal-packages/llm-model-catalog/src/registry.ts diff --git a/internal-packages/llm-pricing/src/seed.ts b/internal-packages/llm-model-catalog/src/seed.ts similarity index 67% rename from internal-packages/llm-pricing/src/seed.ts rename to internal-packages/llm-model-catalog/src/seed.ts index d068c62a66d..72d212a9120 100644 --- a/internal-packages/llm-pricing/src/seed.ts +++ b/internal-packages/llm-model-catalog/src/seed.ts @@ -1,10 +1,13 @@ import type { PrismaClient } from "@trigger.dev/database"; import { generateFriendlyId } from "@trigger.dev/core/v3/isomorphic"; import { defaultModelPrices } from "./defaultPrices.js"; +import { modelCatalog } from "./modelCatalog.js"; +import { syncLlmCatalog } from "./sync.js"; export async function seedLlmPricing(prisma: PrismaClient): Promise<{ modelsCreated: number; modelsSkipped: number; + modelsUpdated: number; }> { let modelsCreated = 0; let modelsSkipped = 0; @@ -23,6 +26,9 @@ export async function seedLlmPricing(prisma: PrismaClient): Promise<{ continue; } + // Look up catalog metadata for this model + const catalog = modelCatalog[modelDef.modelName]; + // Create model + tiers atomically so partial models can't be left behind await prisma.$transaction(async (tx) => { const model = await tx.llmModel.create({ @@ -32,6 +38,14 @@ export async function seedLlmPricing(prisma: PrismaClient): Promise<{ matchPattern: modelDef.matchPattern, startDate: modelDef.startDate ? new Date(modelDef.startDate) : null, source: "default", + // Catalog metadata (from model-catalog.json) + provider: catalog?.provider ?? null, + description: catalog?.description ?? null, + contextWindow: catalog?.contextWindow ?? null, + maxOutputTokens: catalog?.maxOutputTokens ?? null, + capabilities: catalog?.capabilities ?? [], + isHidden: catalog?.isHidden ?? false, + baseModelName: catalog?.baseModelName ?? null, }, }); @@ -58,5 +72,8 @@ export async function seedLlmPricing(prisma: PrismaClient): Promise<{ modelsCreated++; } - return { modelsCreated, modelsSkipped }; + // Sync catalog metadata on existing default models + const syncResult = await syncLlmCatalog(prisma); + + return { modelsCreated, modelsSkipped, modelsUpdated: syncResult.modelsUpdated }; } diff --git a/internal-packages/llm-model-catalog/src/sync.ts b/internal-packages/llm-model-catalog/src/sync.ts new file mode 100644 index 00000000000..b600e39a692 --- /dev/null +++ b/internal-packages/llm-model-catalog/src/sync.ts @@ -0,0 +1,55 @@ +import type { PrismaClient } from "@trigger.dev/database"; +import { defaultModelPrices } from "./defaultPrices.js"; +import { modelCatalog } from "./modelCatalog.js"; + +export async function syncLlmCatalog(prisma: PrismaClient): Promise<{ + modelsUpdated: number; + modelsSkipped: number; +}> { + let modelsUpdated = 0; + let modelsSkipped = 0; + + for (const modelDef of defaultModelPrices) { + const existing = await prisma.llmModel.findFirst({ + where: { + projectId: null, + modelName: modelDef.modelName, + }, + }); + + // Skip if model doesn't exist yet (seed handles creation) + if (!existing) { + modelsSkipped++; + continue; + } + + // Don't overwrite admin-edited models + if (existing.source !== "default") { + modelsSkipped++; + continue; + } + + const catalog = modelCatalog[modelDef.modelName]; + + await prisma.llmModel.update({ + where: { id: existing.id }, + data: { + // Update match pattern and start date from Langfuse (may have changed) + matchPattern: modelDef.matchPattern, + startDate: modelDef.startDate ? new Date(modelDef.startDate) : null, + // Update catalog metadata + provider: catalog?.provider ?? existing.provider, + description: catalog?.description ?? existing.description, + contextWindow: catalog?.contextWindow ?? existing.contextWindow, + maxOutputTokens: catalog?.maxOutputTokens ?? existing.maxOutputTokens, + capabilities: catalog?.capabilities ?? existing.capabilities, + isHidden: catalog?.isHidden ?? existing.isHidden, + baseModelName: catalog?.baseModelName ?? existing.baseModelName, + }, + }); + + modelsUpdated++; + } + + return { modelsUpdated, modelsSkipped }; +} diff --git a/internal-packages/llm-model-catalog/src/types.ts b/internal-packages/llm-model-catalog/src/types.ts new file mode 100644 index 00000000000..d6c6638d620 --- /dev/null +++ b/internal-packages/llm-model-catalog/src/types.ts @@ -0,0 +1,87 @@ +import type { Decimal } from "@trigger.dev/database"; + +export type PricingCondition = { + usageDetailPattern: string; + operator: "gt" | "gte" | "lt" | "lte" | "eq" | "neq"; + value: number; +}; + +export type LlmPriceEntry = { + usageType: string; + price: number; +}; + +export type LlmPricingTierWithPrices = { + id: string; + name: string; + isDefault: boolean; + priority: number; + conditions: PricingCondition[]; + prices: LlmPriceEntry[]; +}; + +export type LlmModelWithPricing = { + id: string; + friendlyId: string; + modelName: string; + matchPattern: string; + startDate: Date | null; + pricingTiers: LlmPricingTierWithPrices[]; +}; + +export type LlmCostResult = { + matchedModelId: string; + matchedModelName: string; + pricingTierId: string; + pricingTierName: string; + inputCost: number; + outputCost: number; + totalCost: number; + costDetails: Record; +}; + +export type ModelCatalogEntry = { + provider: string; + description: string; + contextWindow: number | null; + maxOutputTokens: number | null; + capabilities: string[]; + /** ISO date string of when the model was publicly released (e.g. "2025-06-15"). */ + releaseDate: string | null; + /** Whether the model is deprecated/legacy and should be hidden from the registry by default. */ + isHidden: boolean; + /** Whether the model supports reliable structured JSON output (schema adherence). */ + supportsStructuredOutput: boolean; + /** Whether the model can call multiple tools in a single turn. */ + supportsParallelToolCalls: boolean; + /** Whether the model supports streaming partial tool call results. */ + supportsStreamingToolCalls: boolean; + /** ISO date string of when the model will be deprecated/sunset, if known. */ + deprecationDate: string | null; + /** ISO date string of the model's training data cutoff (e.g. "2024-10-01"). */ + knowledgeCutoff: string | null; + /** ISO timestamp of when this entry was last researched/resolved. */ + resolvedAt: string; + /** The base model this is a variant of, or null if this IS the base model. */ + baseModelName: string | null; +}; + +export type DefaultModelDefinition = { + modelName: string; + matchPattern: string; + startDate?: string; + // Catalog metadata (merged from model-catalog.json during seed) + provider?: string; + description?: string; + contextWindow?: number | null; + maxOutputTokens?: number | null; + capabilities?: string[]; + isHidden?: boolean; + pricingTiers: Array<{ + name: string; + isDefault: boolean; + priority: number; + conditions: PricingCondition[]; + prices: Record; + }>; +}; diff --git a/internal-packages/llm-pricing/tsconfig.json b/internal-packages/llm-model-catalog/tsconfig.json similarity index 100% rename from internal-packages/llm-pricing/tsconfig.json rename to internal-packages/llm-model-catalog/tsconfig.json diff --git a/internal-packages/llm-pricing/package.json b/internal-packages/llm-pricing/package.json deleted file mode 100644 index 8cf9e366f2c..00000000000 --- a/internal-packages/llm-pricing/package.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "name": "@internal/llm-pricing", - "private": true, - "version": "0.0.1", - "main": "./src/index.ts", - "types": "./src/index.ts", - "type": "module", - "dependencies": { - "@trigger.dev/core": "workspace:*", - "@trigger.dev/database": "workspace:*" - }, - "scripts": { - "typecheck": "tsc --noEmit", - "generate": "echo 'defaultPrices.ts is pre-committed — run sync-prices to update'", - "sync-prices": "bash scripts/sync-model-prices.sh", - "sync-prices:check": "bash scripts/sync-model-prices.sh --check" - } -} diff --git a/internal-packages/llm-pricing/src/types.ts b/internal-packages/llm-pricing/src/types.ts deleted file mode 100644 index 2deec6246ed..00000000000 --- a/internal-packages/llm-pricing/src/types.ts +++ /dev/null @@ -1,54 +0,0 @@ -import type { Decimal } from "@trigger.dev/database"; - -export type PricingCondition = { - usageDetailPattern: string; - operator: "gt" | "gte" | "lt" | "lte" | "eq" | "neq"; - value: number; -}; - -export type LlmPriceEntry = { - usageType: string; - price: number; -}; - -export type LlmPricingTierWithPrices = { - id: string; - name: string; - isDefault: boolean; - priority: number; - conditions: PricingCondition[]; - prices: LlmPriceEntry[]; -}; - -export type LlmModelWithPricing = { - id: string; - friendlyId: string; - modelName: string; - matchPattern: string; - startDate: Date | null; - pricingTiers: LlmPricingTierWithPrices[]; -}; - -export type LlmCostResult = { - matchedModelId: string; - matchedModelName: string; - pricingTierId: string; - pricingTierName: string; - inputCost: number; - outputCost: number; - totalCost: number; - costDetails: Record; -}; - -export type DefaultModelDefinition = { - modelName: string; - matchPattern: string; - startDate?: string; - pricingTiers: Array<{ - name: string; - isDefault: boolean; - priority: number; - conditions: PricingCondition[]; - prices: Record; - }>; -}; diff --git a/internal-packages/tsql/src/index.test.ts b/internal-packages/tsql/src/index.test.ts index 7a5182668d9..a93fb0fd4b0 100644 --- a/internal-packages/tsql/src/index.test.ts +++ b/internal-packages/tsql/src/index.test.ts @@ -53,7 +53,6 @@ const lookupTableSchema: TableSchema = { /** * Test table schema WITHOUT tenant columns (e.g., global reference data) */ -// @ts-expect-error - tenant columns are required but not set const nonTenantTableSchema: TableSchema = { name: "reference_data", clickhouseName: "trigger_dev.reference_data", diff --git a/internal-packages/tsql/src/query/functions.ts b/internal-packages/tsql/src/query/functions.ts index fcb5dd6e3d0..f184ed8f382 100644 --- a/internal-packages/tsql/src/query/functions.ts +++ b/internal-packages/tsql/src/query/functions.ts @@ -531,6 +531,14 @@ export const TSQL_AGGREGATIONS: Record = { quantile: { clickhouseName: "quantile", minArgs: 1, maxArgs: 1, minParams: 1, maxParams: 1, aggregate: true }, quantileIf: { clickhouseName: "quantileIf", minArgs: 2, maxArgs: 2, minParams: 1, maxParams: 1, aggregate: true }, quantiles: { clickhouseName: "quantiles", minArgs: 1, aggregate: true }, + // -Merge combinators for AggregatingMergeTree tables + quantilesMerge: { clickhouseName: "quantilesMerge", minArgs: 1, maxArgs: 1, minParams: 1, aggregate: true }, + quantileMerge: { clickhouseName: "quantileMerge", minArgs: 1, maxArgs: 1, minParams: 1, maxParams: 1, aggregate: true }, + sumMerge: { clickhouseName: "sumMerge", minArgs: 1, maxArgs: 1, aggregate: true }, + avgMerge: { clickhouseName: "avgMerge", minArgs: 1, maxArgs: 1, aggregate: true }, + countMerge: { clickhouseName: "countMerge", minArgs: 1, maxArgs: 1, aggregate: true }, + minMerge: { clickhouseName: "minMerge", minArgs: 1, maxArgs: 1, aggregate: true }, + maxMerge: { clickhouseName: "maxMerge", minArgs: 1, maxArgs: 1, aggregate: true }, // Statistical functions simpleLinearRegression: { clickhouseName: "simpleLinearRegression", minArgs: 2, maxArgs: 2, aggregate: true }, diff --git a/internal-packages/tsql/src/query/schema.ts b/internal-packages/tsql/src/query/schema.ts index 00a28382de5..615d112c2f1 100644 --- a/internal-packages/tsql/src/query/schema.ts +++ b/internal-packages/tsql/src/query/schema.ts @@ -367,8 +367,8 @@ export interface TableSchema { clickhouseName: string; /** Column definitions for this table */ columns: Record; - /** Tenant isolation column configuration */ - tenantColumns: TenantColumnConfig; + /** Tenant isolation column configuration. Omit for global tables with no tenant scoping. */ + tenantColumns?: TenantColumnConfig; /** Description of the table for documentation/autocomplete */ description?: string; /** Whether this table can be joined to other tables */ @@ -866,9 +866,11 @@ export function sanitizeErrorMessage(message: string, schemas: TableSchema[]): s // Map table names tableNameMap.set(table.clickhouseName, table.name); - // Collect tenant column names to strip + // Collect tenant column names to strip (global tables have no tenant columns) const tenantCols = table.tenantColumns; - columnsToStrip.push(tenantCols.organizationId, tenantCols.projectId, tenantCols.environmentId); + if (tenantCols) { + columnsToStrip.push(tenantCols.organizationId, tenantCols.projectId, tenantCols.environmentId); + } // Collect required filter columns to strip if (table.requiredFilters) { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index bc27428575a..7d96d581f40 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -305,9 +305,9 @@ importers: '@internal/cache': specifier: workspace:* version: link:../../internal-packages/cache - '@internal/llm-pricing': + '@internal/llm-model-catalog': specifier: workspace:* - version: link:../../internal-packages/llm-pricing + version: link:../../internal-packages/llm-model-catalog '@internal/redis': specifier: workspace:* version: link:../../internal-packages/redis @@ -1131,7 +1131,7 @@ importers: specifier: 18.2.69 version: 18.2.69 - internal-packages/llm-pricing: + internal-packages/llm-model-catalog: dependencies: '@trigger.dev/core': specifier: workspace:*