diff --git a/apps/webapp/app/components/navigation/SideMenu.tsx b/apps/webapp/app/components/navigation/SideMenu.tsx
index 18a4387c996..90f25fde788 100644
--- a/apps/webapp/app/components/navigation/SideMenu.tsx
+++ b/apps/webapp/app/components/navigation/SideMenu.tsx
@@ -10,6 +10,7 @@ import {
   ClockIcon,
   Cog8ToothIcon,
   CogIcon,
+  CubeIcon,
   ExclamationTriangleIcon,
   FolderIcon,
   FolderOpenIcon,
@@ -77,11 +78,12 @@ import {
   v3EnvironmentVariablesPath,
   v3ErrorsPath,
   v3LogsPath,
+  v3PromptsPath,
+  v3ModelsPath,
   v3ProjectAlertsPath,
   v3ProjectPath,
   v3ProjectSettingsGeneralPath,
   v3ProjectSettingsIntegrationsPath,
-  v3PromptsPath,
   v3QueuesPath,
   v3RunsPath,
   v3SchedulesPath,
@@ -456,34 +458,50 @@ export function SideMenu({
               />
             </div>
 
-            <SideMenuSection
-              title="AI"
-              isSideMenuCollapsed={isCollapsed}
-              itemSpacingClassName="space-y-0"
-              initialCollapsed={getSectionCollapsed(user.dashboardPreferences.sideMenu, "ai")}
-              onCollapseToggle={handleSectionToggle("ai")}
-            >
-              <SideMenuItem
-                name="Prompts"
-                icon={AIPromptsIcon}
-                trailingIconClassName="size-6"
-                activeIconColor="text-aiPrompts"
-                inactiveIconColor="text-aiPrompts"
-                to={v3PromptsPath(organization, project, environment)}
-                data-action="prompts"
-                isCollapsed={isCollapsed}
-              />
-              <SideMenuItem
-                name="AI Metrics"
-                icon={AIMetricsIcon}
-                trailingIconClassName="size-5"
-                activeIconColor="text-aiMetrics"
-                inactiveIconColor="text-aiMetrics"
-                to={v3BuiltInDashboardPath(organization, project, environment, "llm")}
-                data-action="ai-metrics"
-                isCollapsed={isCollapsed}
-              />
-            </SideMenuSection>
+            {(user.admin || user.isImpersonating || featureFlags.hasAiAccess) && (
+              <SideMenuSection
+                title="AI"
+                isSideMenuCollapsed={isCollapsed}
+                itemSpacingClassName="space-y-0"
+                initialCollapsed={getSectionCollapsed(
+                  user.dashboardPreferences.sideMenu,
+                  "ai"
+                )}
+                onCollapseToggle={handleSectionToggle("ai")}
+              >
+                <SideMenuItem
+                  name="Prompts"
+                  icon={AIPromptsIcon}
+                  trailingIconClassName="size-6"
+                  activeIconColor="text-aiPrompts"
+                  inactiveIconColor="text-aiPrompts"
+                  to={v3PromptsPath(organization, project, environment)}
+                  data-action="prompts"
+                  isCollapsed={isCollapsed}
+                />
+                {(user.admin || user.isImpersonating || featureFlags.hasAiModelsAccess) && (
+                  <SideMenuItem
+                    name="Models"
+                    icon={CubeIcon}
+                    activeIconColor="text-purple-500"
+                    inactiveIconColor="text-purple-500"
+                    to={v3ModelsPath(organization, project, environment)}
+                    data-action="models"
+                    isCollapsed={isCollapsed}
+                  />
+                )}
+                <SideMenuItem
+                  name="AI Metrics"
+                  icon={AIMetricsIcon}
+                  trailingIconClassName="size-5"
+                  activeIconColor="text-aiMetrics"
+                  inactiveIconColor="text-aiMetrics"
+                  to={v3BuiltInDashboardPath(organization, project, environment, "llm")}
+                  data-action="ai-metrics"
+                  isCollapsed={isCollapsed}
+                />
+              </SideMenuSection>
+            )}
 
             {(user.admin || user.isImpersonating || featureFlags.hasQueryAccess) && (
               <SideMenuSection
diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts
index 2e6da79fdf1..4b4f22623b0 100644
--- a/apps/webapp/app/env.server.ts
+++ b/apps/webapp/app/env.server.ts
@@ -1222,6 +1222,12 @@ const EnvironmentSchema = z
     // Query feature flag
     QUERY_FEATURE_ENABLED: z.string().default("1"),
 
+    // AI features (Prompts, Models, AI Metrics sidebar section)
+    AI_FEATURES_ENABLED: z.string().default("0"),
+
+    // AI Models feature (Models sidebar item within AI section)
+    AI_MODELS_ENABLED: z.string().default("0"),
+
     // Logs page ClickHouse URL (for logs queries)
     LOGS_CLICKHOUSE_URL: z
       .string()
diff --git a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts
new file mode 100644
index 00000000000..9096ab67b71
--- /dev/null
+++ b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts
@@ -0,0 +1,549 @@
+import { ClickHouse } from "@internal/clickhouse";
+import { modelCatalog } from "@internal/llm-model-catalog";
+import { PrismaClientOrTransaction } from "~/db.server";
+import { BasePresenter } from "./basePresenter.server";
+import { z } from "zod";
+
+/** Format a Date for ClickHouse DateTime64 string params. */
+function formatDateForCH(date: Date): string {
+  return date.toISOString().replace("T", " ").replace(/\.\d{3}Z$/, "");
+}
+
+// --- Helpers ---
+
+/** Infer provider from model name when not stored in the DB. */
+function inferProvider(modelName: string): string {
+  const lower = modelName.toLowerCase();
+  // OpenAI
+  if (/^(gpt-|o[1-9]|chatgpt|davinci|babbage|curie|ada|text-embedding|text-davinci|text-ada|text-babbage|text-curie|ft:)/.test(lower)) return "openai";
+  // Anthropic
+  if (lower.startsWith("claude-")) return "anthropic";
+  // Google
+  if (/^(gemini-|palm-|text-bison|chat-bison|code-bison|codechat-bison|text-unicorn|textembedding-gecko)/.test(lower)) return "google";
+  // Meta
+  if (/^(llama|code-llama|codellama)/.test(lower)) return "meta";
+  // Mistral
+  if (/^(mistral|mixtral|codestral|pixtral|ministral)/.test(lower)) return "mistral";
+  // xAI
+  if (lower.startsWith("grok")) return "xai";
+  // DeepSeek
+  if (lower.startsWith("deepseek")) return "deepseek";
+  // Cohere
+  if (/^(command|embed-|rerank-)/.test(lower)) return "cohere";
+  // AI21
+  if (/^(jamba|j2-)/.test(lower)) return "ai21";
+  // Amazon
+  if (/^(amazon\.|titan)/.test(lower)) return "amazon";
+  // Qwen (Alibaba)
+  if (lower.startsWith("qwen")) return "qwen";
+  // Perplexity
+  if (/^(pplx-|sonar-)/.test(lower)) return "perplexity";
+  // Nous
+  if (lower.startsWith("nous-")) return "nous";
+  // Provider prefix format: "provider/model" (e.g. "openai/gpt-4o")
+  if (lower.includes("/")) {
+    return lower.split("/")[0];
+  }
+  return "unknown";
+}
+
+/** Format a model as provider:name (e.g. "openai:gpt-5"). */
+export function formatModelId(provider: string, modelName: string): string {
+  return `${provider}:${modelName}`;
+}
+
+// --- Types ---
+
+export type ModelCatalogItem = {
+  friendlyId: string;
+  modelName: string;
+  /** Always resolved — from DB, inferred from name, or "unknown". */
+  provider: string;
+  /** Display identifier in provider:name format (e.g. "openai:gpt-5"). */
+  displayId: string;
+  description: string | null;
+  contextWindow: number | null;
+  maxOutputTokens: number | null;
+  capabilities: string[];
+  inputPrice: number | null;
+  outputPrice: number | null;
+  /** When the model was publicly released (from startDate on LlmModel). */
+  releaseDate: string | null;
+  supportsStructuredOutput: boolean;
+  supportsParallelToolCalls: boolean;
+  supportsStreamingToolCalls: boolean;
+  /** Dated variants of this model (only populated on base models). */
+  variants: ModelVariant[];
+};
+
+export type ModelVariant = {
+  friendlyId: string;
+  modelName: string;
+  displayId: string;
+  releaseDate: string | null;
+};
+
+export type ModelCatalogGroup = {
+  provider: string;
+  models: ModelCatalogItem[];
+};
+
+export type ModelDetail = ModelCatalogItem & {
+  matchPattern: string;
+  source: string;
+  pricingTiers: Array<{
+    name: string;
+    isDefault: boolean;
+    prices: Record<string, number>;
+  }>;
+};
+
+export type ModelMetricsPoint = {
+  minute: string;
+  callCount: number;
+  totalInputTokens: number;
+  totalOutputTokens: number;
+  totalCost: number;
+  ttfcP50: number;
+  ttfcP90: number;
+  ttfcP95: number;
+  ttfcP99: number;
+  tpsP50: number;
+  tpsP90: number;
+  tpsP95: number;
+  tpsP99: number;
+  durationP50: number;
+  durationP90: number;
+  durationP95: number;
+  durationP99: number;
+};
+
+export type UserModelMetrics = {
+  totalCalls: number;
+  totalCost: number;
+  totalInputTokens: number;
+  totalOutputTokens: number;
+  avgTtfc: number;
+  avgTps: number;
+  taskBreakdown: Array<{
+    taskIdentifier: string;
+    calls: number;
+    cost: number;
+  }>;
+};
+
+export type ModelComparisonItem = {
+  responseModel: string;
+  genAiSystem: string;
+  callCount: number;
+  totalInputTokens: number;
+  totalOutputTokens: number;
+  totalCost: number;
+  ttfcP50: number;
+  ttfcP90: number;
+  tpsP50: number;
+  tpsP90: number;
+};
+
+export type PopularModel = {
+  responseModel: string;
+  genAiSystem: string;
+  callCount: number;
+  totalCost: number;
+  ttfcP50: number;
+};
+
+// --- ClickHouse schemas for user metrics ---
+
+const UserMetricsSummaryRow = z.object({
+  total_calls: z.coerce.number(),
+  total_cost: z.coerce.number(),
+  total_input_tokens: z.coerce.number(),
+  total_output_tokens: z.coerce.number(),
+  avg_ttfc: z.coerce.number(),
+  avg_tps: z.coerce.number(),
+});
+
+const UserTaskBreakdownRow = z.object({
+  task_identifier: z.string(),
+  calls: z.coerce.number(),
+  cost: z.coerce.number(),
+});
+
+// --- Presenter ---
+
+export class ModelRegistryPresenter extends BasePresenter {
+  private readonly clickhouse: ClickHouse;
+
+  constructor(clickhouse: ClickHouse, replica?: PrismaClientOrTransaction) {
+    super(undefined, replica);
+    this.clickhouse = clickhouse;
+  }
+
+  /** List all visible global models with pricing, grouped by provider. */
+  async getModelCatalog(): Promise<ModelCatalogGroup[]> {
+    const models = await this._replica.llmModel.findMany({
+      where: {
+        projectId: null,
+        isHidden: false,
+      },
+      include: {
+        pricingTiers: {
+          where: { isDefault: true },
+          include: { prices: true },
+          take: 1,
+        },
+      },
+      orderBy: { modelName: "asc" },
+    });
+
+    type CatalogItemWithBase = ModelCatalogItem & { _baseModelName: string | null };
+    const items: CatalogItemWithBase[] = models.map((m) => {
+      const defaultTier = m.pricingTiers[0];
+      const prices = defaultTier?.prices ?? [];
+      const inputPrice = prices.find((p) => p.usageType === "input");
+      const outputPrice = prices.find((p) => p.usageType === "output");
+      const provider = m.provider ?? inferProvider(m.modelName);
+      const catalogEntry = modelCatalog[m.modelName];
+
+      return {
+        friendlyId: m.friendlyId,
+        modelName: m.modelName,
+        provider,
+        displayId: formatModelId(provider, m.modelName),
+        description: m.description,
+        contextWindow: m.contextWindow,
+        maxOutputTokens: m.maxOutputTokens,
+        capabilities: m.capabilities,
+        inputPrice: inputPrice ? Number(inputPrice.price) : null,
+        outputPrice: outputPrice ? Number(outputPrice.price) : null,
+        releaseDate: m.startDate ? m.startDate.toISOString().split("T")[0] : null,
+        supportsStructuredOutput: catalogEntry?.supportsStructuredOutput ?? false,
+        supportsParallelToolCalls: catalogEntry?.supportsParallelToolCalls ?? false,
+        supportsStreamingToolCalls: catalogEntry?.supportsStreamingToolCalls ?? false,
+        variants: [],
+        _baseModelName: m.baseModelName,
+      };
+    });
+
+    // Normalize version dots for grouping: "3.5" → "3-5", "4.1" → "4-1"
+    const normalizeForGrouping = (name: string) => name.replace(/(\d)\.(\d)/g, "$1-$2");
+
+    // Group variants by their normalized base model name
+    const variantGroups = new Map<string, CatalogItemWithBase[]>();
+
+    for (const item of items) {
+      const groupKey = normalizeForGrouping(item._baseModelName ?? item.modelName);
+      const group = variantGroups.get(groupKey) ?? [];
+      group.push(item);
+      variantGroups.set(groupKey, group);
+    }
+
+    // For each group, pick the best representative as the "card" model
+    // and nest the rest as variants
+    const baseModels: ModelCatalogItem[] = [];
+
+    for (const [groupKey, group] of variantGroups) {
+      if (group.length === 1) {
+        // Standalone model, no variants
+        baseModels.push(group[0]);
+        continue;
+      }
+
+      // Pick representative: prefer the actual base model (no _baseModelName),
+      // then "-latest" variant, then the newest by release date
+      let representative = group.find((m) => !m._baseModelName)
+        ?? group.find((m) => m.modelName.endsWith("-latest"))
+        ?? group.sort((a, b) => {
+            if (!a.releaseDate && !b.releaseDate) return 0;
+            if (!a.releaseDate) return 1;
+            if (!b.releaseDate) return -1;
+            return b.releaseDate.localeCompare(a.releaseDate);
+          })[0];
+
+      // Nest the others as variants, sorted newest first
+      const others = group
+        .filter((m) => m !== representative)
+        .sort((a, b) => {
+          if (!a.releaseDate && !b.releaseDate) return a.modelName.localeCompare(b.modelName);
+          if (!a.releaseDate) return 1;
+          if (!b.releaseDate) return -1;
+          return b.releaseDate.localeCompare(a.releaseDate);
+        });
+
+      representative.variants = others.map((m) => ({
+        friendlyId: m.friendlyId,
+        modelName: m.modelName,
+        displayId: m.displayId,
+        releaseDate: m.releaseDate,
+      }));
+
+      baseModels.push(representative);
+    }
+
+    // Group by provider, sort models within each group by release date (newest first)
+    const groups = new Map<string, ModelCatalogItem[]>();
+    for (const item of baseModels) {
+      const group = groups.get(item.provider) ?? [];
+      group.push(item);
+      groups.set(item.provider, group);
+    }
+
+    return Array.from(groups.entries())
+      .sort(([a], [b]) => a.localeCompare(b))
+      .map(([provider, models]) => ({
+        provider,
+        models: models.sort((a, b) => {
+          if (!a.releaseDate && !b.releaseDate) return a.modelName.localeCompare(b.modelName);
+          if (!a.releaseDate) return 1;
+          if (!b.releaseDate) return -1;
+          return b.releaseDate.localeCompare(a.releaseDate);
+        }),
+      }));
+  }
+
+  /** Get a single model with full pricing details. */
+  async getModelDetail(friendlyId: string): Promise<ModelDetail | null> {
+    const model = await this._replica.llmModel.findUnique({
+      where: { friendlyId },
+      include: {
+        pricingTiers: {
+          include: { prices: true },
+          orderBy: { priority: "asc" },
+        },
+      },
+    });
+
+    if (!model) return null;
+
+    const defaultTier = model.pricingTiers.find((t) => t.isDefault) ?? model.pricingTiers[0];
+    const defaultPrices = defaultTier?.prices ?? [];
+    const inputPrice = defaultPrices.find((p) => p.usageType === "input");
+    const outputPrice = defaultPrices.find((p) => p.usageType === "output");
+    const provider = model.provider ?? inferProvider(model.modelName);
+    const catalogEntry = modelCatalog[model.modelName];
+
+    return {
+      friendlyId: model.friendlyId,
+      modelName: model.modelName,
+      provider,
+      displayId: formatModelId(provider, model.modelName),
+      description: model.description,
+      contextWindow: model.contextWindow,
+      maxOutputTokens: model.maxOutputTokens,
+      capabilities: model.capabilities,
+      inputPrice: inputPrice ? Number(inputPrice.price) : null,
+      outputPrice: outputPrice ? Number(outputPrice.price) : null,
+      releaseDate: model.startDate ? model.startDate.toISOString().split("T")[0] : null,
+      supportsStructuredOutput: catalogEntry?.supportsStructuredOutput ?? false,
+      supportsParallelToolCalls: catalogEntry?.supportsParallelToolCalls ?? false,
+      supportsStreamingToolCalls: catalogEntry?.supportsStreamingToolCalls ?? false,
+      variants: [],
+      matchPattern: model.matchPattern,
+      source: model.source,
+      pricingTiers: model.pricingTiers.map((t) => ({
+        name: t.name,
+        isDefault: t.isDefault,
+        prices: Object.fromEntries(t.prices.map((p) => [p.usageType, Number(p.price)])),
+      })),
+    };
+  }
+
+  /** Get global aggregate metrics for a model (no tenant info). */
+  async getGlobalMetrics(
+    responseModel: string,
+    startTime: Date,
+    endTime: Date
+  ): Promise<ModelMetricsPoint[]> {
+    const [error, rows] = await this.clickhouse.llmModelAggregates.globalMetrics
+      .setParams({
+        responseModel,
+        startTime: formatDateForCH(startTime),
+        endTime: formatDateForCH(endTime),
+      })
+      .execute();
+
+    if (error || !rows) return [];
+
+    return rows.map((r) => ({
+      minute: r.minute,
+      callCount: r.call_count,
+      totalInputTokens: r.total_input_tokens,
+      totalOutputTokens: r.total_output_tokens,
+      totalCost: r.total_cost,
+      ttfcP50: r.ttfc_p50,
+      ttfcP90: r.ttfc_p90,
+      ttfcP95: r.ttfc_p95,
+      ttfcP99: r.ttfc_p99,
+      tpsP50: r.tps_p50,
+      tpsP90: r.tps_p90,
+      tpsP95: 0,
+      tpsP99: 0,
+      durationP50: r.duration_p50,
+      durationP90: r.duration_p90,
+      durationP95: 0,
+      durationP99: 0,
+    }));
+  }
+
+  /** Get per-project usage metrics for a model. */
+  async getUserMetrics(
+    responseModel: string,
+    projectId: string,
+    environmentId: string,
+    startTime: Date,
+    endTime: Date
+  ): Promise<UserModelMetrics> {
+    const summaryQuery = this.clickhouse.reader.query({
+      name: "modelRegistryUserSummary",
+      query: `
+        SELECT
+          count() AS total_calls,
+          sum(total_cost) AS total_cost,
+          sum(input_tokens) AS total_input_tokens,
+          sum(output_tokens) AS total_output_tokens,
+          round(avg(ms_to_first_chunk), 1) AS avg_ttfc,
+          round(avg(tokens_per_second), 1) AS avg_tps
+        FROM trigger_dev.llm_metrics_v1
+        WHERE response_model = {responseModel: String}
+          AND project_id = {projectId: String}
+          AND environment_id = {environmentId: String}
+          AND start_time >= {startTime: String}
+          AND start_time <= {endTime: String}
+      `,
+      params: z.object({
+        responseModel: z.string(),
+        projectId: z.string(),
+        environmentId: z.string(),
+        startTime: z.string(),
+        endTime: z.string(),
+      }),
+      schema: UserMetricsSummaryRow,
+    });
+
+    const taskQuery = this.clickhouse.reader.query({
+      name: "modelRegistryUserTasks",
+      query: `
+        SELECT
+          task_identifier,
+          count() AS calls,
+          sum(total_cost) AS cost
+        FROM trigger_dev.llm_metrics_v1
+        WHERE response_model = {responseModel: String}
+          AND project_id = {projectId: String}
+          AND environment_id = {environmentId: String}
+          AND start_time >= {startTime: String}
+          AND start_time <= {endTime: String}
+        GROUP BY task_identifier
+        ORDER BY cost DESC
+        LIMIT 20
+      `,
+      params: z.object({
+        responseModel: z.string(),
+        projectId: z.string(),
+        environmentId: z.string(),
+        startTime: z.string(),
+        endTime: z.string(),
+      }),
+      schema: UserTaskBreakdownRow,
+    });
+
+    const queryParams = {
+      responseModel,
+      projectId,
+      environmentId,
+      startTime: formatDateForCH(startTime),
+      endTime: formatDateForCH(endTime),
+    };
+
+    const [summaryResult, taskResult] = await Promise.all([
+      summaryQuery(queryParams),
+      taskQuery(queryParams),
+    ]);
+
+    const [summaryError, summaryRows] = summaryResult;
+    const [taskError, taskRows] = taskResult;
+
+    const defaultSummary = {
+      total_calls: 0,
+      total_cost: 0,
+      total_input_tokens: 0,
+      total_output_tokens: 0,
+      avg_ttfc: 0,
+      avg_tps: 0,
+    };
+
+    const summary = !summaryError && summaryRows?.[0] ? summaryRows[0] : defaultSummary;
+
+    return {
+      totalCalls: summary.total_calls,
+      totalCost: summary.total_cost,
+      totalInputTokens: summary.total_input_tokens,
+      totalOutputTokens: summary.total_output_tokens,
+      avgTtfc: summary.avg_ttfc,
+      avgTps: summary.avg_tps,
+      taskBreakdown: !taskError && taskRows
+        ? taskRows.map((r) => ({
+            taskIdentifier: r.task_identifier,
+            calls: r.calls,
+            cost: r.cost,
+          }))
+        : [],
+    };
+  }
+
+  /** Get comparison data for 2-4 models. */
+  async getModelComparison(
+    responseModels: string[],
+    startTime: Date,
+    endTime: Date
+  ): Promise<ModelComparisonItem[]> {
+    const [error, rows] = await this.clickhouse.llmModelAggregates.comparison
+      .setParams({
+        responseModels,
+        startTime: formatDateForCH(startTime),
+        endTime: formatDateForCH(endTime),
+      })
+      .execute();
+
+    if (error || !rows) return [];
+
+    return rows.map((r) => ({
+      responseModel: r.response_model,
+      genAiSystem: r.gen_ai_system,
+      callCount: r.call_count,
+      totalInputTokens: r.total_input_tokens,
+      totalOutputTokens: r.total_output_tokens,
+      totalCost: r.total_cost,
+      ttfcP50: r.ttfc_p50,
+      ttfcP90: r.ttfc_p90,
+      tpsP50: r.tps_p50,
+      tpsP90: r.tps_p90,
+    }));
+  }
+
+  /** Get the most popular models by call count. */
+  async getPopularModels(
+    startTime: Date,
+    endTime: Date,
+    limit: number = 20
+  ): Promise<PopularModel[]> {
+    const [error, rows] = await this.clickhouse.llmModelAggregates.popular
+      .setParams({
+        startTime: formatDateForCH(startTime),
+        endTime: formatDateForCH(endTime),
+        limit,
+      })
+      .execute();
+
+    if (error || !rows) return [];
+
+    return rows.map((r) => ({
+      responseModel: r.response_model,
+      genAiSystem: r.gen_ai_system,
+      callCount: r.call_count,
+      totalCost: r.total_cost,
+      ttfcP50: r.ttfc_p50,
+    }));
+  }
+}
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.$modelId/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.$modelId/route.tsx
new file mode 100644
index 00000000000..5b8fc9170db
--- /dev/null
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.$modelId/route.tsx
@@ -0,0 +1,595 @@
+import { ArrowsRightLeftIcon } from "@heroicons/react/20/solid";
+import { type MetaFunction } from "@remix-run/react";
+import { type LoaderFunctionArgs } from "@remix-run/server-runtime";
+import { useState } from "react";
+import { typedjson, useTypedLoaderData } from "remix-typedjson";
+import { z } from "zod";
+import { PageBody, PageContainer } from "~/components/layout/AppLayout";
+import { Badge } from "~/components/primitives/Badge";
+import { LinkButton } from "~/components/primitives/Buttons";
+import { Callout } from "~/components/primitives/Callout";
+import { Header2 } from "~/components/primitives/Headers";
+import { Input } from "~/components/primitives/Input";
+import { Label } from "~/components/primitives/Label";
+import { NavBar, PageAccessories, PageTitle } from "~/components/primitives/PageHeader";
+import * as Property from "~/components/primitives/PropertyTable";
+import {
+  Table,
+  TableBody,
+  TableCell,
+  TableHeader,
+  TableHeaderCell,
+  TableRow,
+} from "~/components/primitives/Table";
+import { TabButton, TabContainer } from "~/components/primitives/Tabs";
+import { InlineCode } from "~/components/code/InlineCode";
+import { MetricWidget } from "~/routes/resources.metric";
+import type { QueryWidgetConfig } from "~/components/metrics/QueryWidget";
+import { findProjectBySlug } from "~/models/project.server";
+import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server";
+import { ModelRegistryPresenter } from "~/presenters/v3/ModelRegistryPresenter.server";
+import { clickhouseClient } from "~/services/clickhouseInstance.server";
+import { requireUserId } from "~/services/session.server";
+import { useOrganization } from "~/hooks/useOrganizations";
+import { useProject } from "~/hooks/useProject";
+import { useEnvironment } from "~/hooks/useEnvironment";
+import {
+  EnvironmentParamSchema,
+  v3ModelComparePath,
+  v3ModelsPath,
+} from "~/utils/pathBuilder";
+import {
+  formatModelPrice,
+  formatTokenCount,
+  formatModelCost,
+  formatCapability,
+  formatProviderName,
+} from "~/utils/modelFormatters";
+
+const ParamSchema = EnvironmentParamSchema.extend({
+  modelId: z.string(),
+});
+
+export const meta: MetaFunction = () => {
+  return [{ title: "Model Detail | Trigger.dev" }];
+};
+
+export const loader = async ({ request, params }: LoaderFunctionArgs) => {
+  const userId = await requireUserId(request);
+  const { organizationSlug, projectParam, envParam, modelId } = ParamSchema.parse(params);
+
+  const project = await findProjectBySlug(organizationSlug, projectParam, userId);
+  if (!project) {
+    throw new Response("Project not found", { status: 404 });
+  }
+
+  const environment = await findEnvironmentBySlug(project.id, envParam, userId);
+  if (!environment) {
+    throw new Response("Environment not found", { status: 404 });
+  }
+
+  const presenter = new ModelRegistryPresenter(clickhouseClient);
+  const model = await presenter.getModelDetail(modelId);
+
+  if (!model) {
+    throw new Response("Model not found", { status: 404 });
+  }
+
+  const now = new Date();
+  const sevenDaysAgo = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000);
+
+  const userMetrics = await presenter.getUserMetrics(
+    model.modelName,
+    project.id,
+    environment.id,
+    sevenDaysAgo,
+    now
+  );
+
+  return typedjson({
+    model,
+    userMetrics,
+    organizationId: project.organizationId,
+    projectId: project.id,
+    environmentId: environment.id,
+  });
+};
+
+/** Escape a value for safe interpolation into a TSQL single-quoted string. */
+function escapeTSQL(value: string): string {
+  return value.replace(/'/g, "''");
+}
+
+function bignumberConfig(column: string, opts?: { aggregation?: "sum" | "avg" | "first"; suffix?: string; abbreviate?: boolean }): QueryWidgetConfig {
+  return { type: "bignumber", column, aggregation: opts?.aggregation ?? "sum", abbreviate: opts?.abbreviate ?? false, suffix: opts?.suffix };
+}
+
+function chartConfig(opts: { chartType: "bar" | "line"; xAxisColumn: string; yAxisColumns: string[]; aggregation?: "sum" | "avg" }): QueryWidgetConfig {
+  return { type: "chart", chartType: opts.chartType, xAxisColumn: opts.xAxisColumn, yAxisColumns: opts.yAxisColumns, groupByColumn: null, stacked: false, sortByColumn: null, sortDirection: "asc", aggregation: opts.aggregation ?? "sum" };
+}
+
+type Tab = "overview" | "global" | "usage";
+
+const TAB_CONFIG: { id: Tab; label: string }[] = [
+  { id: "overview", label: "Overview" },
+  { id: "global", label: "Global Metrics" },
+  { id: "usage", label: "Your Usage" },
+];
+
+export default function ModelDetailPage() {
+  const { model, userMetrics, organizationId, projectId, environmentId } =
+    useTypedLoaderData<typeof loader>();
+  const organization = useOrganization();
+  const project = useProject();
+  const environment = useEnvironment();
+  const [activeTab, setActiveTab] = useState<Tab>("overview");
+
+  return (
+    <PageContainer>
+      <NavBar>
+        <PageTitle
+          title={model.displayId}
+          backButton={{
+            to: v3ModelsPath(organization, project, environment),
+            text: "Models",
+          }}
+        />
+        <PageAccessories>
+          <LinkButton
+            variant="tertiary/small"
+            to={`${v3ModelComparePath(organization, project, environment)}?models=${model.modelName}`}
+            LeadingIcon={ArrowsRightLeftIcon}
+          >
+            Compare with...
+          </LinkButton>
+        </PageAccessories>
+      </NavBar>
+      <PageBody scrollable>
+        <TabContainer>
+          {TAB_CONFIG.map((tab) => (
+            <TabButton
+              key={tab.id}
+              isActive={activeTab === tab.id}
+              layoutId="model-detail-tabs"
+              onClick={() => setActiveTab(tab.id)}
+            >
+              {tab.label}
+            </TabButton>
+          ))}
+        </TabContainer>
+
+        <div className="p-4">
+          {activeTab === "overview" && <OverviewTab model={model} userMetrics={userMetrics} />}
+          {activeTab === "global" && (
+            <GlobalMetricsTab
+              modelName={model.modelName}
+              organizationId={organizationId}
+              projectId={projectId}
+              environmentId={environmentId}
+            />
+          )}
+          {activeTab === "usage" && (
+            <YourUsageTab
+              modelName={model.modelName}
+              organizationId={organizationId}
+              projectId={projectId}
+              environmentId={environmentId}
+            />
+          )}
+        </div>
+      </PageBody>
+    </PageContainer>
+  );
+}
+
+// --- Cost Estimator ---
+
+function CostEstimator({
+  inputPrice,
+  outputPrice,
+  defaultInputTokens,
+  defaultOutputTokens,
+}: {
+  inputPrice: number | null;
+  outputPrice: number | null;
+  defaultInputTokens?: number;
+  defaultOutputTokens?: number;
+}) {
+  const [inputTokens, setInputTokens] = useState(defaultInputTokens ?? 1000);
+  const [outputTokens, setOutputTokens] = useState(defaultOutputTokens ?? 500);
+  const [numCalls, setNumCalls] = useState(1000);
+
+  if (inputPrice === null && outputPrice === null) return null;
+
+  const inputCost = inputTokens * (inputPrice ?? 0) * numCalls;
+  const outputCost = outputTokens * (outputPrice ?? 0) * numCalls;
+  const totalCost = inputCost + outputCost;
+
+  return (
+    <div className="rounded-md border border-grid-dimmed p-4">
+      <Header2 className="mb-3">Cost Estimator</Header2>
+      <div className="space-y-3">
+        <div className="grid grid-cols-3 gap-3">
+          <div className="space-y-1.5">
+            <Label htmlFor="input-tokens">Input tokens/call</Label>
+            <Input
+              id="input-tokens"
+              variant="medium"
+              fullWidth
+              type="number"
+              value={inputTokens}
+              onChange={(e) => setInputTokens(parseInt(e.target.value) || 0)}
+            />
+          </div>
+          <div className="space-y-1.5">
+            <Label htmlFor="output-tokens">Output tokens/call</Label>
+            <Input
+              id="output-tokens"
+              variant="medium"
+              fullWidth
+              type="number"
+              value={outputTokens}
+              onChange={(e) => setOutputTokens(parseInt(e.target.value) || 0)}
+            />
+          </div>
+          <div className="space-y-1.5">
+            <Label htmlFor="num-calls">Number of calls</Label>
+            <Input
+              id="num-calls"
+              variant="medium"
+              fullWidth
+              type="number"
+              value={numCalls}
+              onChange={(e) => setNumCalls(parseInt(e.target.value) || 0)}
+            />
+          </div>
+        </div>
+        <Callout variant="info">
+          <div className="text-lg font-semibold tabular-nums text-text-bright">
+            {formatModelCost(totalCost)}
+          </div>
+          <div className="mt-1 space-y-0.5 text-xs tabular-nums text-text-dimmed">
+            <div>
+              Input: {formatModelCost(inputCost)} ({formatTokenCount(inputTokens * numCalls)}{" "}
+              tokens x {formatModelPrice(inputPrice)}/1M)
+            </div>
+            <div>
+              Output: {formatModelCost(outputCost)} ({formatTokenCount(outputTokens * numCalls)}{" "}
+              tokens x {formatModelPrice(outputPrice)}/1M)
+            </div>
+          </div>
+        </Callout>
+      </div>
+    </div>
+  );
+}
+
+// --- Overview Tab ---
+
+function OverviewTab({
+  model,
+  userMetrics,
+}: {
+  model: ReturnType<typeof useTypedLoaderData<typeof loader>>["model"];
+  userMetrics: ReturnType<typeof useTypedLoaderData<typeof loader>>["userMetrics"];
+}) {
+  return (
+    <div className="space-y-4">
+      <div className="grid grid-cols-1 gap-4 lg:grid-cols-2">
+        {/* Model Info */}
+        <div className="rounded-md border border-grid-dimmed p-4">
+          <Header2 className="mb-3">Model Info</Header2>
+          <Property.Table>
+            <Property.Item>
+              <Property.Label>Provider</Property.Label>
+              <Property.Value>{formatProviderName(model.provider)}</Property.Value>
+            </Property.Item>
+            <Property.Item>
+              <Property.Label>Model Name</Property.Label>
+              <Property.Value>
+                <InlineCode variant="small">{model.modelName}</InlineCode>
+              </Property.Value>
+            </Property.Item>
+            {model.description && (
+              <Property.Item>
+                <Property.Label>Description</Property.Label>
+                <Property.Value>{model.description}</Property.Value>
+              </Property.Item>
+            )}
+            {model.contextWindow && (
+              <Property.Item>
+                <Property.Label>Context Window</Property.Label>
+                <Property.Value>
+                  {formatTokenCount(model.contextWindow)} tokens
+                </Property.Value>
+              </Property.Item>
+            )}
+            {model.maxOutputTokens && (
+              <Property.Item>
+                <Property.Label>Max Output</Property.Label>
+                <Property.Value>
+                  {formatTokenCount(model.maxOutputTokens)} tokens
+                </Property.Value>
+              </Property.Item>
+            )}
+            {model.capabilities.length > 0 && (
+              <Property.Item>
+                <Property.Label>Capabilities</Property.Label>
+                <Property.Value>
+                  <div className="flex flex-wrap gap-1">
+                    {model.capabilities.map((cap) => (
+                      <Badge key={cap} variant="outline-rounded">
+                        {formatCapability(cap)}
+                      </Badge>
+                    ))}
+                  </div>
+                </Property.Value>
+              </Property.Item>
+            )}
+            <Property.Item>
+              <Property.Label>Match Pattern</Property.Label>
+              <Property.Value>
+                <InlineCode variant="small">{model.matchPattern}</InlineCode>
+              </Property.Value>
+            </Property.Item>
+          </Property.Table>
+        </div>
+
+        {/* Pricing */}
+        <div className="rounded-md border border-grid-dimmed p-4">
+          <Header2 className="mb-3">Pricing</Header2>
+          <Property.Table>
+            <Property.Item>
+              <Property.Label>Input</Property.Label>
+              <Property.Value>
+                {formatModelPrice(model.inputPrice)} / 1M tokens
+              </Property.Value>
+            </Property.Item>
+            <Property.Item>
+              <Property.Label>Output</Property.Label>
+              <Property.Value>
+                {formatModelPrice(model.outputPrice)} / 1M tokens
+              </Property.Value>
+            </Property.Item>
+          </Property.Table>
+          {model.pricingTiers.length > 1 && (
+            <div className="mt-4">
+              <p className="mb-2 text-xs font-medium text-text-dimmed">All pricing tiers</p>
+              {model.pricingTiers.map((tier) => (
+                <div
+                  key={tier.name}
+                  className="mb-2 rounded border border-grid-dimmed p-2 text-xs"
+                >
+                  <span className="font-medium text-text-bright">{tier.name}</span>
+                  {tier.isDefault && (
+                    <Badge variant="outline-rounded" className="ml-2">
+                      default
+                    </Badge>
+                  )}
+                  <div className="mt-1 space-y-0.5 text-text-dimmed">
+                    {Object.entries(tier.prices).map(([usage, price]) => (
+                      <div key={usage}>
+                        {usage}: ${(price * 1_000_000).toFixed(4)} / 1M
+                      </div>
+                    ))}
+                  </div>
+                </div>
+              ))}
+            </div>
+          )}
+        </div>
+      </div>
+
+      {/* Cost Estimator */}
+      <CostEstimator
+        inputPrice={model.inputPrice}
+        outputPrice={model.outputPrice}
+        defaultInputTokens={
+          userMetrics.totalCalls > 0
+            ? Math.round(userMetrics.totalInputTokens / userMetrics.totalCalls)
+            : undefined
+        }
+        defaultOutputTokens={
+          userMetrics.totalCalls > 0
+            ? Math.round(userMetrics.totalOutputTokens / userMetrics.totalCalls)
+            : undefined
+        }
+      />
+    </div>
+  );
+}
+
+// --- Global Metrics Tab ---
+
+function GlobalMetricsTab({
+  modelName,
+  organizationId,
+  projectId,
+  environmentId,
+}: {
+  modelName: string;
+  organizationId: string;
+  projectId: string;
+  environmentId: string;
+}) {
+  const widgetProps = {
+    organizationId,
+    projectId,
+    environmentId,
+    scope: "environment" as const,
+    period: "7d",
+    from: null,
+    to: null,
+  };
+
+  return (
+    <div className="space-y-4">
+      {/* Big numbers */}
+      <div className="grid grid-cols-2 gap-3 sm:grid-cols-4">
+        <div className="h-24">
+          <MetricWidget
+            widgetKey={`${modelName}-calls`}
+            title="Total Calls"
+            query={`SELECT sum(call_count) AS total_calls FROM llm_models WHERE response_model = '${escapeTSQL(modelName)}'`}
+            config={bignumberConfig("total_calls", { abbreviate: true })}
+            {...widgetProps}
+          />
+        </div>
+        <div className="h-24">
+          <MetricWidget
+            widgetKey={`${modelName}-ttfc-p50`}
+            title="p50 TTFC"
+            query={`SELECT round(quantilesMerge(0.5)(ttfc_quantiles)[1], 0) AS ttfc_p50 FROM llm_models WHERE response_model = '${escapeTSQL(modelName)}'`}
+            config={bignumberConfig("ttfc_p50", { aggregation: "avg", suffix: "ms" })}
+            {...widgetProps}
+          />
+        </div>
+        <div className="h-24">
+          <MetricWidget
+            widgetKey={`${modelName}-ttfc-p90`}
+            title="p90 TTFC"
+            query={`SELECT round(quantilesMerge(0.9)(ttfc_quantiles)[1], 0) AS ttfc_p90 FROM llm_models WHERE response_model = '${escapeTSQL(modelName)}'`}
+            config={bignumberConfig("ttfc_p90", { aggregation: "avg", suffix: "ms" })}
+            {...widgetProps}
+          />
+        </div>
+        <div className="h-24">
+          <MetricWidget
+            widgetKey={`${modelName}-tps`}
+            title="Tokens/sec (p50)"
+            query={`SELECT round(quantilesMerge(0.5)(tps_quantiles)[1], 0) AS tps_p50 FROM llm_models WHERE response_model = '${escapeTSQL(modelName)}'`}
+            config={bignumberConfig("tps_p50", { aggregation: "avg" })}
+            {...widgetProps}
+          />
+        </div>
+      </div>
+
+      {/* Charts */}
+      <div className="grid grid-cols-1 gap-3 lg:grid-cols-2">
+        <div className="h-[300px]">
+          <MetricWidget
+            widgetKey={`${modelName}-calls-time`}
+            title="Calls over time"
+            query={`SELECT timeBucket(), sum(call_count) AS calls FROM llm_models WHERE response_model = '${escapeTSQL(modelName)}' GROUP BY timeBucket ORDER BY timeBucket`}
+            config={chartConfig({ chartType: "bar", xAxisColumn: "timebucket", yAxisColumns: ["calls"] })}
+            {...widgetProps}
+          />
+        </div>
+        <div className="h-[300px]">
+          <MetricWidget
+            widgetKey={`${modelName}-ttfc-time`}
+            title="TTFC over time"
+            query={`SELECT timeBucket(), round(quantilesMerge(0.5)(ttfc_quantiles)[1], 0) AS ttfc_p50, round(quantilesMerge(0.9)(ttfc_quantiles)[1], 0) AS ttfc_p90 FROM llm_models WHERE response_model = '${escapeTSQL(modelName)}' GROUP BY timeBucket ORDER BY timeBucket`}
+            config={chartConfig({ chartType: "line", xAxisColumn: "timebucket", yAxisColumns: ["ttfc_p50", "ttfc_p90"], aggregation: "avg" })}
+            {...widgetProps}
+          />
+        </div>
+      </div>
+
+      <Callout variant="info">
+        Aggregated across all Trigger.dev users. No tenant-specific data is exposed.
+      </Callout>
+    </div>
+  );
+}
+
+// --- Your Usage Tab ---
+
+function YourUsageTab({
+  modelName,
+  organizationId,
+  projectId,
+  environmentId,
+}: {
+  modelName: string;
+  organizationId: string;
+  projectId: string;
+  environmentId: string;
+}) {
+  const widgetProps = {
+    organizationId,
+    projectId,
+    environmentId,
+    scope: "environment" as const,
+    period: "7d",
+    from: null,
+    to: null,
+  };
+
+  return (
+    <div className="space-y-4">
+      {/* Big numbers */}
+      <div className="grid grid-cols-2 gap-3 sm:grid-cols-4">
+        <div className="h-24">
+          <MetricWidget
+            widgetKey={`${modelName}-user-calls`}
+            title="Your Calls"
+            query={`SELECT count() AS total_calls FROM llm_metrics WHERE response_model = '${escapeTSQL(modelName)}'`}
+            config={bignumberConfig("total_calls", { abbreviate: true })}
+            {...widgetProps}
+          />
+        </div>
+        <div className="h-24">
+          <MetricWidget
+            widgetKey={`${modelName}-user-cost`}
+            title="Your Cost"
+            query={`SELECT sum(total_cost) AS total_cost FROM llm_metrics WHERE response_model = '${escapeTSQL(modelName)}'`}
+            config={bignumberConfig("total_cost", { aggregation: "sum" })}
+            {...widgetProps}
+          />
+        </div>
+        <div className="h-24">
+          <MetricWidget
+            widgetKey={`${modelName}-user-ttfc`}
+            title="Avg TTFC"
+            query={`SELECT round(avg(ms_to_first_chunk), 0) AS avg_ttfc FROM llm_metrics WHERE response_model = '${escapeTSQL(modelName)}' AND ms_to_first_chunk > 0`}
+            config={bignumberConfig("avg_ttfc", { aggregation: "avg", suffix: "ms" })}
+            {...widgetProps}
+          />
+        </div>
+        <div className="h-24">
+          <MetricWidget
+            widgetKey={`${modelName}-user-tps`}
+            title="Avg Tokens/sec"
+            query={`SELECT round(avg(tokens_per_second), 0) AS avg_tps FROM llm_metrics WHERE response_model = '${escapeTSQL(modelName)}' AND tokens_per_second > 0`}
+            config={bignumberConfig("avg_tps", { aggregation: "avg" })}
+            {...widgetProps}
+          />
+        </div>
+      </div>
+
+      {/* Charts */}
+      <div className="grid grid-cols-1 gap-3 lg:grid-cols-2">
+        <div className="h-[300px]">
+          <MetricWidget
+            widgetKey={`${modelName}-user-cost-time`}
+            title="Cost over time"
+            query={`SELECT timeBucket(), sum(total_cost) AS cost FROM llm_metrics WHERE response_model = '${escapeTSQL(modelName)}' GROUP BY timeBucket ORDER BY timeBucket`}
+            config={chartConfig({ chartType: "bar", xAxisColumn: "timebucket", yAxisColumns: ["cost"] })}
+            {...widgetProps}
+          />
+        </div>
+        <div className="h-[300px]">
+          <MetricWidget
+            widgetKey={`${modelName}-user-tokens-time`}
+            title="Tokens over time"
+            query={`SELECT timeBucket(), sum(input_tokens) AS input_tokens, sum(output_tokens) AS output_tokens FROM llm_metrics WHERE response_model = '${escapeTSQL(modelName)}' GROUP BY timeBucket ORDER BY timeBucket`}
+            config={chartConfig({ chartType: "bar", xAxisColumn: "timebucket", yAxisColumns: ["input_tokens", "output_tokens"] })}
+            {...widgetProps}
+          />
+        </div>
+      </div>
+
+      {/* Task breakdown */}
+      <div className="h-[300px]">
+        <MetricWidget
+          widgetKey={`${modelName}-user-tasks`}
+          title="Cost by task"
+          query={`SELECT task_identifier, count() AS calls, sum(total_cost) AS cost FROM llm_metrics WHERE response_model = '${escapeTSQL(modelName)}' GROUP BY task_identifier ORDER BY cost DESC LIMIT 20`}
+          config={{ type: "table", prettyFormatting: true, sorting: [] }}
+          {...widgetProps}
+        />
+      </div>
+    </div>
+  );
+}
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
new file mode 100644
index 00000000000..e207e290882
--- /dev/null
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx
@@ -0,0 +1,597 @@
+import {
+  AdjustmentsHorizontalIcon,
+  ListBulletIcon,
+  MagnifyingGlassIcon,
+  Squares2X2Icon,
+} from "@heroicons/react/20/solid";
+import { Link, type MetaFunction, useNavigate } from "@remix-run/react";
+import { type LoaderFunctionArgs } from "@remix-run/server-runtime";
+import { useMemo, useState } from "react";
+import { typedjson, useTypedLoaderData } from "remix-typedjson";
+import { PageBody, PageContainer } from "~/components/layout/AppLayout";
+import { AppliedFilter } from "~/components/primitives/AppliedFilter";
+import { Badge } from "~/components/primitives/Badge";
+import { Button } from "~/components/primitives/Buttons";
+import { Checkbox } from "~/components/primitives/Checkbox";
+import { Header2 } from "~/components/primitives/Headers";
+import { Input } from "~/components/primitives/Input";
+import { NavBar, PageAccessories, PageTitle } from "~/components/primitives/PageHeader";
+import {
+  SelectProvider,
+  SelectTrigger,
+  SelectPopover,
+  SelectList,
+  SelectItem,
+} from "~/components/primitives/Select";
+import {
+  Table,
+  TableBody,
+  TableCell,
+  TableHeader,
+  TableHeaderCell,
+  TableRow,
+} from "~/components/primitives/Table";
+import { appliedSummary } from "~/components/runs/v3/SharedFilters";
+import { useSearchParams } from "~/hooks/useSearchParam";
+import { findProjectBySlug } from "~/models/project.server";
+import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server";
+import {
+  type ModelCatalogItem,
+  type PopularModel,
+  ModelRegistryPresenter,
+} from "~/presenters/v3/ModelRegistryPresenter.server";
+import { clickhouseClient } from "~/services/clickhouseInstance.server";
+import { requireUserId } from "~/services/session.server";
+import { useEnvironment } from "~/hooks/useEnvironment";
+import { useOrganization } from "~/hooks/useOrganizations";
+import { useProject } from "~/hooks/useProject";
+import {
+  EnvironmentParamSchema,
+  v3ModelComparePath,
+  v3ModelDetailPath,
+} from "~/utils/pathBuilder";
+import {
+  formatModelPrice,
+  formatTokenCount,
+  formatCapability,
+  formatProviderName,
+} from "~/utils/modelFormatters";
+import { formatNumberCompact } from "~/utils/numberFormatter";
+
+export const meta: MetaFunction = () => {
+  return [{ title: "Models | Trigger.dev" }];
+};
+
+export const loader = async ({ request, params }: LoaderFunctionArgs) => {
+  const userId = await requireUserId(request);
+  const { organizationSlug, projectParam, envParam } = EnvironmentParamSchema.parse(params);
+
+  const project = await findProjectBySlug(organizationSlug, projectParam, userId);
+  if (!project) {
+    throw new Response("Project not found", { status: 404 });
+  }
+
+  const environment = await findEnvironmentBySlug(project.id, envParam, userId);
+  if (!environment) {
+    throw new Response("Environment not found", { status: 404 });
+  }
+
+  const presenter = new ModelRegistryPresenter(clickhouseClient);
+  const catalog = await presenter.getModelCatalog();
+
+  const now = new Date();
+  const sevenDaysAgo = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000);
+  const popularModels = await presenter.getPopularModels(sevenDaysAgo, now, 50);
+
+  const allProviders = catalog.map((g) => g.provider);
+  const allCapabilities = Array.from(
+    new Set(catalog.flatMap((g) => g.models.flatMap((m) => m.capabilities)))
+  ).sort();
+
+  return typedjson({ catalog, popularModels, allProviders, allCapabilities });
+};
+
+// --- Helpers ---
+
+const FEATURE_OPTIONS = [
+  { value: "structuredOutput", label: "Structured Output" },
+  { value: "parallelToolCalls", label: "Parallel Tool Calls" },
+  { value: "streamingToolCalls", label: "Streaming Tool Calls" },
+] as const;
+
+type FeatureKey = (typeof FEATURE_OPTIONS)[number]["value"];
+
+function modelMatchesFeature(model: ModelCatalogItem, feature: FeatureKey): boolean {
+  switch (feature) {
+    case "structuredOutput":
+      return model.supportsStructuredOutput;
+    case "parallelToolCalls":
+      return model.supportsParallelToolCalls;
+    case "streamingToolCalls":
+      return model.supportsStreamingToolCalls;
+  }
+}
+
+// --- Filter Components ---
+
+function ProviderFilter({ providers }: { providers: string[] }) {
+  const { values, replace, del } = useSearchParams();
+  const selected = values("providers");
+
+  return (
+    <>
+      <SelectProvider value={selected} setValue={(v) => replace({ providers: v })}>
+        <SelectTrigger variant="minimal/small" tooltipTitle="Filter by provider">
+          {selected.length === 0 ? (
+            <span className="flex items-center gap-1 text-xs text-text-dimmed">
+              <AdjustmentsHorizontalIcon className="h-3.5 w-3.5" />
+              Provider
+            </span>
+          ) : null}
+        </SelectTrigger>
+        <SelectPopover>
+          <SelectList>
+            {providers.map((p) => (
+              <SelectItem key={p} value={p}>
+                {formatProviderName(p)}
+              </SelectItem>
+            ))}
+          </SelectList>
+        </SelectPopover>
+      </SelectProvider>
+      {selected.length > 0 && (
+        <AppliedFilter
+          label="Provider"
+          value={appliedSummary(selected.map(formatProviderName))!}
+          onRemove={() => del("providers")}
+        />
+      )}
+    </>
+  );
+}
+
+function CapabilityFilter({ capabilities }: { capabilities: string[] }) {
+  const { values, replace, del } = useSearchParams();
+  const selected = values("capabilities");
+
+  return (
+    <>
+      <SelectProvider value={selected} setValue={(v) => replace({ capabilities: v })}>
+        <SelectTrigger variant="minimal/small" tooltipTitle="Filter by capability">
+          {selected.length === 0 ? (
+            <span className="flex items-center gap-1 text-xs text-text-dimmed">
+              <AdjustmentsHorizontalIcon className="h-3.5 w-3.5" />
+              Capability
+            </span>
+          ) : null}
+        </SelectTrigger>
+        <SelectPopover>
+          <SelectList>
+            {capabilities.map((c) => (
+              <SelectItem key={c} value={c}>
+                {formatCapability(c)}
+              </SelectItem>
+            ))}
+          </SelectList>
+        </SelectPopover>
+      </SelectProvider>
+      {selected.length > 0 && (
+        <AppliedFilter
+          label="Capability"
+          value={appliedSummary(selected.map(formatCapability))!}
+          onRemove={() => del("capabilities")}
+        />
+      )}
+    </>
+  );
+}
+
+function FeaturesFilter() {
+  const { values, replace, del } = useSearchParams();
+  const selected = values("features");
+
+  return (
+    <>
+      <SelectProvider value={selected} setValue={(v) => replace({ features: v })}>
+        <SelectTrigger variant="minimal/small" tooltipTitle="Filter by feature support">
+          {selected.length === 0 ? (
+            <span className="flex items-center gap-1 text-xs text-text-dimmed">
+              <AdjustmentsHorizontalIcon className="h-3.5 w-3.5" />
+              Features
+            </span>
+          ) : null}
+        </SelectTrigger>
+        <SelectPopover>
+          <SelectList>
+            {FEATURE_OPTIONS.map((f) => (
+              <SelectItem key={f.value} value={f.value}>
+                {f.label}
+              </SelectItem>
+            ))}
+          </SelectList>
+        </SelectPopover>
+      </SelectProvider>
+      {selected.length > 0 && (
+        <AppliedFilter
+          label="Features"
+          value={
+            appliedSummary(
+              selected.map((s) => FEATURE_OPTIONS.find((f) => f.value === s)?.label ?? s)
+            )!
+          }
+          onRemove={() => del("features")}
+        />
+      )}
+    </>
+  );
+}
+
+// --- Model Card ---
+
+function ModelCard({
+  model,
+  popular,
+  onToggleCompare,
+  isSelected,
+}: {
+  model: ModelCatalogItem;
+  popular?: PopularModel;
+  onToggleCompare: (modelName: string) => void;
+  isSelected: boolean;
+}) {
+  const organization = useOrganization();
+  const project = useProject();
+  const environment = useEnvironment();
+  const detailPath = v3ModelDetailPath(organization, project, environment, model.friendlyId);
+
+  return (
+    <div className="group relative flex flex-col gap-2.5 rounded-md border border-grid-dimmed bg-background-bright p-4 transition-colors hover:border-grid-bright">
+      <div className="absolute right-3 top-3" onClick={(e) => e.stopPropagation()}>
+        <Checkbox
+          checked={isSelected}
+          onChange={() => onToggleCompare(model.modelName)}
+          title="Select for comparison"
+        />
+      </div>
+
+      <Link to={detailPath} className="text-sm font-medium text-text-bright hover:underline">
+        {model.displayId}
+      </Link>
+
+      {model.description && (
+        <p className="line-clamp-2 text-xs text-text-dimmed">{model.description}</p>
+      )}
+
+      <div className="flex items-center gap-4 text-xs tabular-nums text-text-dimmed">
+        <span title="Input price per 1M tokens">
+          {formatModelPrice(model.inputPrice)}/1M in
+        </span>
+        <span title="Output price per 1M tokens">
+          {formatModelPrice(model.outputPrice)}/1M out
+        </span>
+        {model.contextWindow && (
+          <span title="Context window">{formatTokenCount(model.contextWindow)} ctx</span>
+        )}
+      </div>
+
+      {model.capabilities.length > 0 && (
+        <div className="flex flex-wrap gap-1">
+          {model.capabilities.map((cap) => (
+            <Badge key={cap} variant="outline-rounded">
+              {formatCapability(cap)}
+            </Badge>
+          ))}
+        </div>
+      )}
+
+      <div className="flex items-center gap-3 text-xs tabular-nums text-text-dimmed">
+        {popular && popular.callCount > 0 && (
+          <span>{formatNumberCompact(popular.callCount)} calls (7d)</span>
+        )}
+        {popular && popular.ttfcP50 > 0 && (
+          <span title="p50 time to first chunk">{popular.ttfcP50.toFixed(0)}ms TTFC</span>
+        )}
+      </div>
+
+      {model.variants.length > 0 && <VariantsDropdown variants={model.variants} />}
+    </div>
+  );
+}
+
+function VariantsDropdown({ variants }: { variants: ModelCatalogItem["variants"] }) {
+  const [isOpen, setIsOpen] = useState(false);
+  const organization = useOrganization();
+  const project = useProject();
+  const environment = useEnvironment();
+
+  return (
+    <div>
+      <button
+        onClick={() => setIsOpen(!isOpen)}
+        className="flex items-center gap-1 text-xs text-text-dimmed hover:text-text-bright"
+      >
+        <span
+          className={`inline-block text-[10px] transition-transform ${isOpen ? "rotate-90" : ""}`}
+        >
+          &#9654;
+        </span>
+        {variants.length} version{variants.length !== 1 ? "s" : ""}
+      </button>
+      {isOpen && (
+        <div className="mt-1.5 space-y-0.5 border-l border-charcoal-700 pl-3">
+          {variants.map((v) => (
+            <Link
+              key={v.friendlyId}
+              to={v3ModelDetailPath(organization, project, environment, v.friendlyId)}
+              className="block text-xs text-text-dimmed hover:text-text-bright"
+            >
+              {v.modelName}
+              {v.releaseDate && (
+                <span className="ml-1.5 text-charcoal-500">{v.releaseDate}</span>
+              )}
+            </Link>
+          ))}
+        </div>
+      )}
+    </div>
+  );
+}
+
+// --- Models Table ---
+
+function ModelsTable({
+  models,
+  popularMap,
+  compareSet,
+  onToggleCompare,
+}: {
+  models: ModelCatalogItem[];
+  popularMap: Map<string, PopularModel>;
+  compareSet: Set<string>;
+  onToggleCompare: (modelName: string) => void;
+}) {
+  const organization = useOrganization();
+  const project = useProject();
+  const environment = useEnvironment();
+
+  return (
+    <Table containerClassName="border-t-0">
+      <TableHeader>
+        <TableRow>
+          <TableHeaderCell className="w-8" />
+          <TableHeaderCell>Model</TableHeaderCell>
+          <TableHeaderCell>Provider</TableHeaderCell>
+          <TableHeaderCell alignment="right">Input $/1M</TableHeaderCell>
+          <TableHeaderCell alignment="right">Output $/1M</TableHeaderCell>
+          <TableHeaderCell alignment="right">Context</TableHeaderCell>
+          <TableHeaderCell alignment="right">p50 TTFC</TableHeaderCell>
+          <TableHeaderCell alignment="right">Calls (7d)</TableHeaderCell>
+        </TableRow>
+      </TableHeader>
+      <TableBody>
+        {models.map((model) => {
+          const path = v3ModelDetailPath(organization, project, environment, model.friendlyId);
+          const popular = popularMap.get(model.modelName);
+          return (
+            <TableRow key={model.friendlyId}>
+              <TableCell>
+                <Checkbox
+                  checked={compareSet.has(model.modelName)}
+                  onChange={() => onToggleCompare(model.modelName)}
+                />
+              </TableCell>
+              <TableCell to={path} isTabbableCell>
+                <span className="font-medium text-text-bright">{model.displayId}</span>
+              </TableCell>
+              <TableCell to={path}>{formatProviderName(model.provider)}</TableCell>
+              <TableCell to={path} alignment="right">
+                {formatModelPrice(model.inputPrice)}
+              </TableCell>
+              <TableCell to={path} alignment="right">
+                {formatModelPrice(model.outputPrice)}
+              </TableCell>
+              <TableCell to={path} alignment="right">
+                {formatTokenCount(model.contextWindow)}
+              </TableCell>
+              <TableCell to={path} alignment="right">
+                {popular && popular.ttfcP50 > 0 ? `${popular.ttfcP50.toFixed(0)}ms` : "—"}
+              </TableCell>
+              <TableCell to={path} alignment="right">
+                {popular && popular.callCount > 0
+                  ? formatNumberCompact(popular.callCount)
+                  : "—"}
+              </TableCell>
+            </TableRow>
+          );
+        })}
+      </TableBody>
+    </Table>
+  );
+}
+
+// --- Main Page ---
+
+export default function ModelsPage() {
+  const { catalog, popularModels, allProviders, allCapabilities } =
+    useTypedLoaderData<typeof loader>();
+  const organization = useOrganization();
+  const project = useProject();
+  const environment = useEnvironment();
+  const navigate = useNavigate();
+  const searchParams = useSearchParams();
+
+  const view = searchParams.value("view") ?? "cards";
+  const search = searchParams.value("search") ?? "";
+  const selectedProviders = searchParams.values("providers");
+  const selectedCapabilities = searchParams.values("capabilities");
+  const selectedFeatures = searchParams.values("features") as FeatureKey[];
+  const [compareSet, setCompareSet] = useState<Set<string>>(new Set());
+
+  const popularMap = useMemo(() => {
+    const map = new Map<string, PopularModel>();
+    for (const m of popularModels) {
+      // Index by raw response_model
+      map.set(m.responseModel, m);
+      // Also index by model name without provider prefix (e.g. "openai/gpt-4o" → "gpt-4o")
+      if (m.responseModel.includes("/")) {
+        map.set(m.responseModel.split("/").slice(1).join("/"), m);
+      }
+    }
+    return map;
+  }, [popularModels]);
+
+  const filteredCatalog = useMemo(() => {
+    return catalog
+      .map((group) => ({
+        ...group,
+        models: group.models.filter((m) => {
+          if (search && !m.displayId.toLowerCase().includes(search.toLowerCase())) return false;
+          if (selectedProviders.length > 0 && !selectedProviders.includes(m.provider)) return false;
+          if (
+            selectedCapabilities.length > 0 &&
+            !selectedCapabilities.every((c) => m.capabilities.includes(c))
+          )
+            return false;
+          if (
+            selectedFeatures.length > 0 &&
+            !selectedFeatures.every((f) => modelMatchesFeature(m, f))
+          )
+            return false;
+          return true;
+        }),
+      }))
+      .filter((group) => group.models.length > 0);
+  }, [catalog, search, selectedProviders, selectedCapabilities, selectedFeatures]);
+
+  const allFilteredModels = useMemo(
+    () => filteredCatalog.flatMap((g) => g.models),
+    [filteredCatalog]
+  );
+
+  const toggleCompare = (modelName: string) => {
+    setCompareSet((prev) => {
+      const next = new Set(prev);
+      if (next.has(modelName)) {
+        next.delete(modelName);
+      } else if (next.size < 4) {
+        next.add(modelName);
+      }
+      return next;
+    });
+  };
+
+  const hasActiveFilters =
+    selectedProviders.length > 0 ||
+    selectedCapabilities.length > 0 ||
+    selectedFeatures.length > 0;
+
+  return (
+    <PageContainer>
+      <NavBar>
+        <PageTitle title="Models" />
+        <PageAccessories>
+          <div className="flex items-center gap-2">
+            <div className="relative">
+              <MagnifyingGlassIcon className="absolute left-2.5 top-1/2 h-4 w-4 -translate-y-1/2 text-text-dimmed" />
+              <Input
+                placeholder="Search models..."
+                value={search}
+                onChange={(e) => searchParams.replace({ search: e.target.value || undefined })}
+                variant="small"
+                className="pl-8"
+                fullWidth={false}
+              />
+            </div>
+
+            <div className="flex items-center overflow-hidden rounded-sm border border-charcoal-700">
+              <button
+                onClick={() => searchParams.replace({ view: "cards" })}
+                className={`p-1.5 transition-colors ${view === "cards" ? "bg-charcoal-700 text-text-bright" : "text-text-dimmed hover:text-text-bright"}`}
+              >
+                <Squares2X2Icon className="h-4 w-4" />
+              </button>
+              <button
+                onClick={() => searchParams.replace({ view: "table" })}
+                className={`p-1.5 transition-colors ${view === "table" ? "bg-charcoal-700 text-text-bright" : "text-text-dimmed hover:text-text-bright"}`}
+              >
+                <ListBulletIcon className="h-4 w-4" />
+              </button>
+            </div>
+          </div>
+        </PageAccessories>
+      </NavBar>
+      <PageBody scrollable>
+        {/* Filter bar */}
+        <div className="flex flex-wrap items-center gap-2 border-b border-grid-dimmed px-4 py-2">
+          <ProviderFilter providers={allProviders} />
+          <CapabilityFilter capabilities={allCapabilities} />
+          <FeaturesFilter />
+          {hasActiveFilters && (
+            <Button
+              variant="minimal/small"
+              onClick={() => searchParams.del(["providers", "capabilities", "features"])}
+            >
+              Clear all
+            </Button>
+          )}
+        </div>
+
+        {/* Compare bar */}
+        {compareSet.size >= 2 && (
+          <div className="sticky top-0 z-10 flex items-center justify-between border-b border-grid-dimmed bg-background-dimmed px-4 py-2">
+            <span className="text-sm text-text-dimmed">{compareSet.size} models selected</span>
+            <div className="flex items-center gap-2">
+              <Button variant="tertiary/small" onClick={() => setCompareSet(new Set())}>
+                Clear
+              </Button>
+              <Button
+                variant="primary/small"
+                onClick={() => {
+                  const params = Array.from(compareSet).join(",");
+                  navigate(
+                    `${v3ModelComparePath(organization, project, environment)}?models=${params}`
+                  );
+                }}
+              >
+                Compare ({compareSet.size})
+              </Button>
+            </div>
+          </div>
+        )}
+
+        {view === "cards" ? (
+          <div className="space-y-6 p-4">
+            {filteredCatalog.map((group) => (
+              <div key={group.provider}>
+                <Header2 className="mb-3">{formatProviderName(group.provider)}</Header2>
+                <div className="grid grid-cols-1 gap-3 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4">
+                  {group.models.map((model) => (
+                    <ModelCard
+                      key={model.friendlyId}
+                      model={model}
+                      popular={popularMap.get(model.modelName)}
+                      onToggleCompare={toggleCompare}
+                      isSelected={compareSet.has(model.modelName)}
+                    />
+                  ))}
+                </div>
+              </div>
+            ))}
+            {filteredCatalog.length === 0 && (
+              <p className="py-8 text-center text-sm text-text-dimmed">
+                No models match your filters.
+              </p>
+            )}
+          </div>
+        ) : (
+          <ModelsTable
+            models={allFilteredModels}
+            popularMap={popularMap}
+            compareSet={compareSet}
+            onToggleCompare={toggleCompare}
+          />
+        )}
+      </PageBody>
+    </PageContainer>
+  );
+}
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.compare/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.compare/route.tsx
new file mode 100644
index 00000000000..661fb294268
--- /dev/null
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.compare/route.tsx
@@ -0,0 +1,221 @@
+import { ArrowsRightLeftIcon } from "@heroicons/react/20/solid";
+import { type MetaFunction } from "@remix-run/react";
+import { type LoaderFunctionArgs } from "@remix-run/server-runtime";
+import { typedjson, useTypedLoaderData } from "remix-typedjson";
+import { MainCenteredContainer, PageBody, PageContainer } from "~/components/layout/AppLayout";
+import { InfoPanel } from "~/components/primitives/InfoPanel";
+import { LinkButton } from "~/components/primitives/Buttons";
+import { NavBar, PageTitle } from "~/components/primitives/PageHeader";
+import {
+  Table,
+  TableBody,
+  TableCell,
+  TableHeader,
+  TableHeaderCell,
+  TableRow,
+} from "~/components/primitives/Table";
+import { findProjectBySlug } from "~/models/project.server";
+import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server";
+import {
+  type ModelComparisonItem,
+  ModelRegistryPresenter,
+} from "~/presenters/v3/ModelRegistryPresenter.server";
+import { clickhouseClient } from "~/services/clickhouseInstance.server";
+import { requireUserId } from "~/services/session.server";
+import { useOrganization } from "~/hooks/useOrganizations";
+import { useProject } from "~/hooks/useProject";
+import { useEnvironment } from "~/hooks/useEnvironment";
+import { EnvironmentParamSchema, v3ModelsPath } from "~/utils/pathBuilder";
+import { formatModelCost } from "~/utils/modelFormatters";
+import { formatNumberCompact } from "~/utils/numberFormatter";
+
+export const meta: MetaFunction = () => {
+  return [{ title: "Compare Models | Trigger.dev" }];
+};
+
+export const loader = async ({ request, params }: LoaderFunctionArgs) => {
+  const userId = await requireUserId(request);
+  const { organizationSlug, projectParam, envParam } = EnvironmentParamSchema.parse(params);
+
+  const project = await findProjectBySlug(organizationSlug, projectParam, userId);
+  if (!project) {
+    throw new Response("Project not found", { status: 404 });
+  }
+
+  const environment = await findEnvironmentBySlug(project.id, envParam, userId);
+  if (!environment) {
+    throw new Response("Environment not found", { status: 404 });
+  }
+
+  const url = new URL(request.url);
+  const modelsParam = url.searchParams.get("models") ?? "";
+  const responseModels = modelsParam.split(",").filter(Boolean).slice(0, 4);
+
+  if (responseModels.length < 2) {
+    return typedjson({ comparison: [] as ModelComparisonItem[], models: responseModels });
+  }
+
+  const presenter = new ModelRegistryPresenter(clickhouseClient);
+  const now = new Date();
+  const sevenDaysAgo = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000);
+
+  const comparison = await presenter.getModelComparison(responseModels, sevenDaysAgo, now);
+
+  return typedjson({ comparison, models: responseModels });
+};
+
+type ComparisonRow = {
+  label: string;
+  values: string[];
+  bestIndex?: number;
+};
+
+function buildRows(models: string[], comparison: ModelComparisonItem[]): ComparisonRow[] {
+  const dataMap = new Map<string, ModelComparisonItem>();
+  for (const item of comparison) {
+    dataMap.set(item.responseModel, item);
+  }
+
+  const getValue = (model: string, key: keyof ModelComparisonItem) => {
+    const d = dataMap.get(model);
+    return d ? d[key] : 0;
+  };
+
+  const findBest = (values: number[], lowerIsBetter: boolean) => {
+    if (values.every((v) => v === 0)) return undefined;
+    const filtered = values.map((v, i) => ({ v, i })).filter(({ v }) => v > 0);
+    if (filtered.length === 0) return undefined;
+    const best = lowerIsBetter
+      ? filtered.reduce((a, b) => (a.v < b.v ? a : b))
+      : filtered.reduce((a, b) => (a.v > b.v ? a : b));
+    return best.i;
+  };
+
+  const callValues = models.map((m) => Number(getValue(m, "callCount")));
+  const ttfcP50Values = models.map((m) => Number(getValue(m, "ttfcP50")));
+  const ttfcP90Values = models.map((m) => Number(getValue(m, "ttfcP90")));
+  const tpsP50Values = models.map((m) => Number(getValue(m, "tpsP50")));
+  const tpsP90Values = models.map((m) => Number(getValue(m, "tpsP90")));
+  const costValues = models.map((m) => Number(getValue(m, "totalCost")));
+
+  return [
+    {
+      label: "Provider",
+      values: models.map((m) => dataMap.get(m)?.genAiSystem ?? "—"),
+    },
+    {
+      label: "Total Calls (7d)",
+      values: callValues.map((v) => formatNumberCompact(v)),
+      bestIndex: findBest(callValues, false),
+    },
+    {
+      label: "p50 TTFC",
+      values: ttfcP50Values.map((v) => (v > 0 ? `${v.toFixed(0)}ms` : "—")),
+      bestIndex: findBest(ttfcP50Values, true),
+    },
+    {
+      label: "p90 TTFC",
+      values: ttfcP90Values.map((v) => (v > 0 ? `${v.toFixed(0)}ms` : "—")),
+      bestIndex: findBest(ttfcP90Values, true),
+    },
+    {
+      label: "Tokens/sec (p50)",
+      values: tpsP50Values.map((v) => (v > 0 ? v.toFixed(0) : "—")),
+      bestIndex: findBest(tpsP50Values, false),
+    },
+    {
+      label: "Tokens/sec (p90)",
+      values: tpsP90Values.map((v) => (v > 0 ? v.toFixed(0) : "—")),
+      bestIndex: findBest(tpsP90Values, false),
+    },
+    {
+      label: "Total Cost (7d)",
+      values: costValues.map((v) => (v > 0 ? formatModelCost(v) : "—")),
+      bestIndex: findBest(costValues, true),
+    },
+  ];
+}
+
+export default function ModelComparePage() {
+  const { comparison, models } = useTypedLoaderData<typeof loader>();
+  const organization = useOrganization();
+  const project = useProject();
+  const environment = useEnvironment();
+
+  const rows = buildRows(models, comparison);
+
+  return (
+    <PageContainer>
+      <NavBar>
+        <PageTitle
+          title="Compare Models"
+          backButton={{
+            to: v3ModelsPath(organization, project, environment),
+            text: "Models",
+          }}
+        />
+      </NavBar>
+      <PageBody scrollable>
+        {models.length < 2 ? (
+          <MainCenteredContainer className="max-w-md">
+            <InfoPanel
+              title="Compare models side by side"
+              icon={ArrowsRightLeftIcon}
+              iconClassName="text-indigo-500"
+              panelClassName="max-w-md"
+            >
+              <p className="text-sm text-text-dimmed">
+                Select 2-4 models from the catalog to compare their pricing, capabilities, and
+                performance metrics side by side.
+              </p>
+              <LinkButton
+                to={v3ModelsPath(organization, project, environment)}
+                variant="primary/small"
+                className="mt-3"
+              >
+                Browse models
+              </LinkButton>
+            </InfoPanel>
+          </MainCenteredContainer>
+        ) : (
+          <div className="p-4">
+            <Table>
+              <TableHeader>
+                <TableRow>
+                  <TableHeaderCell>Metric</TableHeaderCell>
+                  {models.map((model) => (
+                    <TableHeaderCell key={model} alignment="right">
+                      {model}
+                    </TableHeaderCell>
+                  ))}
+                </TableRow>
+              </TableHeader>
+              <TableBody>
+                {rows.map((row) => (
+                  <TableRow key={row.label}>
+                    <TableCell>
+                      <span className="text-xs font-medium text-text-dimmed">{row.label}</span>
+                    </TableCell>
+                    {row.values.map((value, i) => (
+                      <TableCell key={i} alignment="right">
+                        <span
+                          className={`tabular-nums ${
+                            row.bestIndex === i
+                              ? "font-medium text-success"
+                              : "text-text-bright"
+                          }`}
+                        >
+                          {value}
+                        </span>
+                      </TableCell>
+                    ))}
+                  </TableRow>
+                ))}
+              </TableBody>
+            </Table>
+          </div>
+        )}
+      </PageBody>
+    </PageContainer>
+  );
+}
diff --git a/apps/webapp/app/routes/admin.api.v1.llm-models.$modelId.ts b/apps/webapp/app/routes/admin.api.v1.llm-models.$modelId.ts
index 4e8357c886c..2556dc8267f 100644
--- a/apps/webapp/app/routes/admin.api.v1.llm-models.$modelId.ts
+++ b/apps/webapp/app/routes/admin.api.v1.llm-models.$modelId.ts
@@ -41,6 +41,12 @@ const UpdateModelSchema = z.object({
   modelName: z.string().min(1).optional(),
   matchPattern: z.string().min(1).optional(),
   startDate: z.string().nullable().optional(),
+  provider: z.string().nullable().optional(),
+  description: z.string().nullable().optional(),
+  contextWindow: z.number().int().nullable().optional(),
+  maxOutputTokens: z.number().int().nullable().optional(),
+  capabilities: z.array(z.string()).optional(),
+  isHidden: z.boolean().optional(),
   pricingTiers: z
     .array(
       z.object({
@@ -94,7 +100,7 @@ export async function action({ request, params }: ActionFunctionArgs) {
     return json({ error: "Invalid request body", details: parsed.error.issues }, { status: 400 });
   }
 
-  const { modelName, matchPattern, startDate, pricingTiers } = parsed.data;
+  const { modelName, matchPattern, startDate, pricingTiers, provider, description, contextWindow, maxOutputTokens, capabilities, isHidden } = parsed.data;
 
   // Validate regex if provided — strip (?i) POSIX flag since our registry handles it
   if (matchPattern) {
@@ -114,6 +120,12 @@ export async function action({ request, params }: ActionFunctionArgs) {
         ...(modelName !== undefined && { modelName }),
         ...(matchPattern !== undefined && { matchPattern }),
         ...(startDate !== undefined && { startDate: startDate ? new Date(startDate) : null }),
+        ...(provider !== undefined && { provider }),
+        ...(description !== undefined && { description }),
+        ...(contextWindow !== undefined && { contextWindow }),
+        ...(maxOutputTokens !== undefined && { maxOutputTokens }),
+        ...(capabilities !== undefined && { capabilities }),
+        ...(isHidden !== undefined && { isHidden }),
       },
     });
 
diff --git a/apps/webapp/app/routes/admin.api.v1.llm-models.seed.ts b/apps/webapp/app/routes/admin.api.v1.llm-models.seed.ts
index 805f97ad233..32e780d9fb9 100644
--- a/apps/webapp/app/routes/admin.api.v1.llm-models.seed.ts
+++ b/apps/webapp/app/routes/admin.api.v1.llm-models.seed.ts
@@ -1,5 +1,5 @@
 import { type ActionFunctionArgs, json } from "@remix-run/server-runtime";
-import { seedLlmPricing } from "@internal/llm-pricing";
+import { seedLlmPricing, syncLlmCatalog } from "@internal/llm-model-catalog";
 import { prisma } from "~/db.server";
 import { authenticateApiRequestWithPersonalAccessToken } from "~/services/personalAccessToken.server";
 import { llmPricingRegistry } from "~/v3/llmPricingRegistry.server";
@@ -15,9 +15,26 @@ export async function action({ request }: ActionFunctionArgs) {
     return json({ error: "You must be an admin to perform this action" }, { status: 403 });
   }
 
+  const url = new URL(request.url);
+  const action = url.searchParams.get("action") ?? "seed";
+
+  if (action === "sync") {
+    const result = await syncLlmCatalog(prisma);
+
+    if (llmPricingRegistry) {
+      await llmPricingRegistry.reload();
+    }
+
+    return json({
+      success: true,
+      ...result,
+      message: `Synced ${result.modelsUpdated} models, skipped ${result.modelsSkipped}`,
+    });
+  }
+
+  // Default: seed (creates new + syncs existing)
   const result = await seedLlmPricing(prisma);
 
-  // Reload the in-memory registry after seeding (if enabled)
   if (llmPricingRegistry) {
     await llmPricingRegistry.reload();
   }
@@ -25,6 +42,6 @@ export async function action({ request }: ActionFunctionArgs) {
   return json({
     success: true,
     ...result,
-    message: `Seeded ${result.modelsCreated} models, skipped ${result.modelsSkipped} existing`,
+    message: `Seeded ${result.modelsCreated} created, ${result.modelsSkipped} skipped, ${result.modelsUpdated} updated`,
   });
 }
diff --git a/apps/webapp/app/routes/admin.api.v1.llm-models.ts b/apps/webapp/app/routes/admin.api.v1.llm-models.ts
index 6305869c605..4e3cc39f47a 100644
--- a/apps/webapp/app/routes/admin.api.v1.llm-models.ts
+++ b/apps/webapp/app/routes/admin.api.v1.llm-models.ts
@@ -49,6 +49,12 @@ const CreateModelSchema = z.object({
   matchPattern: z.string().min(1),
   startDate: z.string().optional(),
   source: z.enum(["default", "admin"]).optional().default("admin"),
+  provider: z.string().optional(),
+  description: z.string().optional(),
+  contextWindow: z.number().int().optional(),
+  maxOutputTokens: z.number().int().optional(),
+  capabilities: z.array(z.string()).optional(),
+  isHidden: z.boolean().optional(),
   pricingTiers: z.array(
     z.object({
       name: z.string().min(1),
@@ -88,7 +94,7 @@ export async function action({ request }: ActionFunctionArgs) {
     return json({ error: "Invalid request body", details: parsed.error.issues }, { status: 400 });
   }
 
-  const { modelName, matchPattern, startDate, source, pricingTiers } = parsed.data;
+  const { modelName, matchPattern, startDate, source, pricingTiers, provider, description, contextWindow, maxOutputTokens, capabilities, isHidden } = parsed.data;
 
   // Validate regex pattern — strip (?i) POSIX flag since our registry handles it
   try {
@@ -107,6 +113,12 @@ export async function action({ request }: ActionFunctionArgs) {
         matchPattern,
         startDate: startDate ? new Date(startDate) : null,
         source,
+        provider: provider ?? null,
+        description: description ?? null,
+        contextWindow: contextWindow ?? null,
+        maxOutputTokens: maxOutputTokens ?? null,
+        capabilities: capabilities ?? [],
+        isHidden: isHidden ?? false,
       },
     });
 
diff --git a/apps/webapp/app/routes/admin.llm-models.$modelId.tsx b/apps/webapp/app/routes/admin.llm-models.$modelId.tsx
index e37491a1b4f..60c182d2274 100644
--- a/apps/webapp/app/routes/admin.llm-models.$modelId.tsx
+++ b/apps/webapp/app/routes/admin.llm-models.$modelId.tsx
@@ -41,6 +41,12 @@ const SaveSchema = z.object({
   modelName: z.string().min(1),
   matchPattern: z.string().min(1),
   pricingTiersJson: z.string(),
+  provider: z.string().optional(),
+  description: z.string().optional(),
+  contextWindow: z.string().optional(),
+  maxOutputTokens: z.string().optional(),
+  capabilities: z.string().optional(),
+  isHidden: z.string().optional(),
 });
 
 export async function action({ request, params }: ActionFunctionArgs) {
@@ -95,9 +101,19 @@ export async function action({ request, params }: ActionFunctionArgs) {
     }
 
     // Update model
+    const { provider, description, contextWindow, maxOutputTokens, capabilities, isHidden } = parsed.data;
     await prisma.llmModel.update({
       where: { id: modelId },
-      data: { modelName, matchPattern },
+      data: {
+        modelName,
+        matchPattern,
+        provider: provider || null,
+        description: description || null,
+        contextWindow: contextWindow ? parseInt(contextWindow) || null : null,
+        maxOutputTokens: maxOutputTokens ? parseInt(maxOutputTokens) || null : null,
+        capabilities: capabilities ? capabilities.split(",").map((s) => s.trim()).filter(Boolean) : [],
+        isHidden: isHidden === "on",
+      },
     });
 
     // Replace tiers
@@ -135,6 +151,12 @@ export default function AdminLlmModelDetailRoute() {
 
   const [modelName, setModelName] = useState(model.modelName);
   const [matchPattern, setMatchPattern] = useState(model.matchPattern);
+  const [provider, setProvider] = useState(model.provider ?? "");
+  const [description, setDescription] = useState(model.description ?? "");
+  const [contextWindow, setContextWindow] = useState(model.contextWindow?.toString() ?? "");
+  const [maxOutputTokens, setMaxOutputTokens] = useState(model.maxOutputTokens?.toString() ?? "");
+  const [capabilities, setCapabilities] = useState(model.capabilities?.join(", ") ?? "");
+  const [isHidden, setIsHidden] = useState(model.isHidden ?? false);
   const [testInput, setTestInput] = useState("");
   const [tiers, setTiers] = useState(() =>
     model.pricingTiers.map((t) => ({
@@ -236,6 +258,83 @@ export default function AdminLlmModelDetailRoute() {
               </div>
             </div>
 
+            {/* Catalog metadata */}
+            <div className="space-y-2 border-t border-grid-dimmed pt-4">
+              <label className="text-sm font-medium text-text-bright">Catalog Metadata</label>
+
+              <div className="grid grid-cols-2 gap-3">
+                <div className="space-y-1">
+                  <label className="text-xs font-medium text-text-dimmed">Provider</label>
+                  <Input
+                    name="provider"
+                    value={provider}
+                    onChange={(e) => setProvider(e.target.value)}
+                    variant="medium"
+                    fullWidth
+                    placeholder="openai, anthropic, google"
+                  />
+                </div>
+                <div className="space-y-1">
+                  <label className="text-xs font-medium text-text-dimmed">Context Window</label>
+                  <Input
+                    name="contextWindow"
+                    value={contextWindow}
+                    onChange={(e) => setContextWindow(e.target.value)}
+                    variant="medium"
+                    fullWidth
+                    placeholder="128000"
+                  />
+                </div>
+              </div>
+
+              <div className="space-y-1">
+                <label className="text-xs font-medium text-text-dimmed">Description</label>
+                <Input
+                  name="description"
+                  value={description}
+                  onChange={(e) => setDescription(e.target.value)}
+                  variant="medium"
+                  fullWidth
+                  placeholder="Brief model description"
+                />
+              </div>
+
+              <div className="grid grid-cols-2 gap-3">
+                <div className="space-y-1">
+                  <label className="text-xs font-medium text-text-dimmed">Max Output Tokens</label>
+                  <Input
+                    name="maxOutputTokens"
+                    value={maxOutputTokens}
+                    onChange={(e) => setMaxOutputTokens(e.target.value)}
+                    variant="medium"
+                    fullWidth
+                    placeholder="16384"
+                  />
+                </div>
+                <div className="space-y-1">
+                  <label className="text-xs font-medium text-text-dimmed">Capabilities (comma-separated)</label>
+                  <Input
+                    name="capabilities"
+                    value={capabilities}
+                    onChange={(e) => setCapabilities(e.target.value)}
+                    variant="medium"
+                    fullWidth
+                    placeholder="vision, tool_use, streaming, json_mode"
+                  />
+                </div>
+              </div>
+
+              <label className="flex items-center gap-2 text-xs text-text-dimmed">
+                <input
+                  type="checkbox"
+                  name="isHidden"
+                  checked={isHidden}
+                  onChange={(e) => setIsHidden(e.target.checked)}
+                />
+                Hidden (exclude from model registry)
+              </label>
+            </div>
+
             {/* Pricing tiers */}
             <div className="space-y-3">
               <div className="flex items-center justify-between">
diff --git a/apps/webapp/app/routes/admin.llm-models._index.tsx b/apps/webapp/app/routes/admin.llm-models._index.tsx
index fb2f6fdc491..ea2eff72541 100644
--- a/apps/webapp/app/routes/admin.llm-models._index.tsx
+++ b/apps/webapp/app/routes/admin.llm-models._index.tsx
@@ -20,7 +20,7 @@ import {
 import { prisma } from "~/db.server";
 import { requireUserId } from "~/services/session.server";
 import { createSearchParams } from "~/utils/searchParams";
-import { seedLlmPricing } from "@internal/llm-pricing";
+import { seedLlmPricing, syncLlmCatalog } from "@internal/llm-model-catalog";
 import { llmPricingRegistry } from "~/v3/llmPricingRegistry.server";
 
 const PAGE_SIZE = 50;
@@ -87,12 +87,24 @@ export async function action({ request }: ActionFunctionArgs) {
   if (_action === "seed") {
     console.log("[admin] seed action started");
     const result = await seedLlmPricing(prisma);
-    console.log(`[admin] seed complete: ${result.modelsCreated} created, ${result.modelsSkipped} skipped`);
+    console.log(`[admin] seed complete: ${result.modelsCreated} created, ${result.modelsSkipped} skipped, ${result.modelsUpdated} updated`);
     await llmPricingRegistry?.reload();
     console.log("[admin] registry reloaded after seed");
     return typedjson({
       success: true,
-      message: `Seeded: ${result.modelsCreated} created, ${result.modelsSkipped} skipped`,
+      message: `Seeded: ${result.modelsCreated} created, ${result.modelsSkipped} skipped, ${result.modelsUpdated} updated`,
+    });
+  }
+
+  if (_action === "sync") {
+    console.log("[admin] sync catalog action started");
+    const result = await syncLlmCatalog(prisma);
+    console.log(`[admin] sync complete: ${result.modelsUpdated} updated, ${result.modelsSkipped} skipped`);
+    await llmPricingRegistry?.reload();
+    console.log("[admin] registry reloaded after sync");
+    return typedjson({
+      success: true,
+      message: `Synced: ${result.modelsUpdated} updated, ${result.modelsSkipped} skipped`,
     });
   }
 
@@ -138,6 +150,7 @@ export default function AdminLlmModelsRoute() {
   const { models, filters, page, pageCount, total } =
     useTypedLoaderData<typeof loader>();
   const seedFetcher = useFetcher();
+  const syncFetcher = useFetcher();
   const reloadFetcher = useFetcher();
   const testFetcher = useFetcher<{
     testResult?: {
@@ -179,6 +192,17 @@ export default function AdminLlmModelsRoute() {
               </Button>
             </seedFetcher.Form>
 
+            <syncFetcher.Form method="post">
+              <input type="hidden" name="_action" value="sync" />
+              <Button
+                type="submit"
+                variant="tertiary/small"
+                disabled={syncFetcher.state !== "idle"}
+              >
+                {syncFetcher.state !== "idle" ? "Syncing..." : "Sync catalog"}
+              </Button>
+            </syncFetcher.Form>
+
             <reloadFetcher.Form method="post">
               <input type="hidden" name="_action" value="reload" />
               <Button
diff --git a/apps/webapp/app/routes/admin.llm-models.new.tsx b/apps/webapp/app/routes/admin.llm-models.new.tsx
index 20c6e1461f2..27c3c13256a 100644
--- a/apps/webapp/app/routes/admin.llm-models.new.tsx
+++ b/apps/webapp/app/routes/admin.llm-models.new.tsx
@@ -22,6 +22,12 @@ const CreateSchema = z.object({
   modelName: z.string().min(1),
   matchPattern: z.string().min(1),
   pricingTiersJson: z.string(),
+  provider: z.string().optional(),
+  description: z.string().optional(),
+  contextWindow: z.string().optional(),
+  maxOutputTokens: z.string().optional(),
+  capabilities: z.string().optional(),
+  isHidden: z.string().optional(),
 });
 
 export async function action({ request }: ActionFunctionArgs) {
@@ -62,12 +68,20 @@ export async function action({ request }: ActionFunctionArgs) {
     return typedjson({ error: "Invalid pricing tiers JSON" }, { status: 400 });
   }
 
+  const { provider, description, contextWindow, maxOutputTokens, capabilities, isHidden } = parsed.data;
+
   const model = await prisma.llmModel.create({
     data: {
       friendlyId: generateFriendlyId("llm_model"),
       modelName,
       matchPattern,
       source: "admin",
+      provider: provider || null,
+      description: description || null,
+      contextWindow: contextWindow ? parseInt(contextWindow) || null : null,
+      maxOutputTokens: maxOutputTokens ? parseInt(maxOutputTokens) || null : null,
+      capabilities: capabilities ? capabilities.split(",").map((s) => s.trim()).filter(Boolean) : [],
+      isHidden: isHidden === "on",
     },
   });
 
@@ -100,6 +114,12 @@ export default function AdminLlmModelNewRoute() {
   const initialModelName = params.get("modelName") ?? "";
   const [modelName, setModelName] = useState(initialModelName);
   const [matchPattern, setMatchPattern] = useState("");
+  const [provider, setProvider] = useState("");
+  const [description, setDescription] = useState("");
+  const [contextWindow, setContextWindow] = useState("");
+  const [maxOutputTokens, setMaxOutputTokens] = useState("");
+  const [capabilities, setCapabilities] = useState("");
+  const [isHidden, setIsHidden] = useState(false);
   const [testInput, setTestInput] = useState("");
   const [tiers, setTiers] = useState<TierData[]>([
     { name: "Standard", isDefault: true, priority: 0, conditions: [], prices: { input: 0, output: 0 } },
@@ -195,6 +215,83 @@ export default function AdminLlmModelNewRoute() {
               </div>
             </div>
 
+            {/* Catalog metadata */}
+            <div className="space-y-2 border-t border-grid-dimmed pt-4">
+              <label className="text-sm font-medium text-text-bright">Catalog Metadata</label>
+
+              <div className="grid grid-cols-2 gap-3">
+                <div className="space-y-1">
+                  <label className="text-xs font-medium text-text-dimmed">Provider</label>
+                  <Input
+                    name="provider"
+                    value={provider}
+                    onChange={(e) => setProvider(e.target.value)}
+                    variant="medium"
+                    fullWidth
+                    placeholder="openai, anthropic, google"
+                  />
+                </div>
+                <div className="space-y-1">
+                  <label className="text-xs font-medium text-text-dimmed">Context Window</label>
+                  <Input
+                    name="contextWindow"
+                    value={contextWindow}
+                    onChange={(e) => setContextWindow(e.target.value)}
+                    variant="medium"
+                    fullWidth
+                    placeholder="128000"
+                  />
+                </div>
+              </div>
+
+              <div className="space-y-1">
+                <label className="text-xs font-medium text-text-dimmed">Description</label>
+                <Input
+                  name="description"
+                  value={description}
+                  onChange={(e) => setDescription(e.target.value)}
+                  variant="medium"
+                  fullWidth
+                  placeholder="Brief model description"
+                />
+              </div>
+
+              <div className="grid grid-cols-2 gap-3">
+                <div className="space-y-1">
+                  <label className="text-xs font-medium text-text-dimmed">Max Output Tokens</label>
+                  <Input
+                    name="maxOutputTokens"
+                    value={maxOutputTokens}
+                    onChange={(e) => setMaxOutputTokens(e.target.value)}
+                    variant="medium"
+                    fullWidth
+                    placeholder="16384"
+                  />
+                </div>
+                <div className="space-y-1">
+                  <label className="text-xs font-medium text-text-dimmed">Capabilities (comma-separated)</label>
+                  <Input
+                    name="capabilities"
+                    value={capabilities}
+                    onChange={(e) => setCapabilities(e.target.value)}
+                    variant="medium"
+                    fullWidth
+                    placeholder="vision, tool_use, streaming, json_mode"
+                  />
+                </div>
+              </div>
+
+              <label className="flex items-center gap-2 text-xs text-text-dimmed">
+                <input
+                  type="checkbox"
+                  name="isHidden"
+                  checked={isHidden}
+                  onChange={(e) => setIsHidden(e.target.checked)}
+                />
+                Hidden (exclude from model registry)
+              </label>
+            </div>
+
             {/* Pricing tiers */}
             <div className="space-y-3">
               <div className="flex items-center justify-between">
diff --git a/apps/webapp/app/services/queryService.server.ts b/apps/webapp/app/services/queryService.server.ts
index d6397764eb6..1f3bdbba18a 100644
--- a/apps/webapp/app/services/queryService.server.ts
+++ b/apps/webapp/app/services/queryService.server.ts
@@ -213,11 +213,20 @@ export async function executeQuery<TOut extends z.ZodSchema>(
   const periodClipped = requestedFromDate !== null && requestedFromDate < maxQueryPeriodDate;
 
   // Force tenant isolation and time period limits
+  // Global tables (no tenantColumns) skip tenant isolation — they contain anonymized cross-tenant data
+  const isGlobalTable = matchedSchema != null && !matchedSchema.tenantColumns;
   const enforcedWhereClause = {
-    organization_id: { op: "eq", value: organizationId },
-    project_id:
-      scope === "project" || scope === "environment" ? { op: "eq", value: projectId } : undefined,
-    environment_id: scope === "environment" ? { op: "eq", value: environmentId } : undefined,
+    ...(isGlobalTable
+      ? {}
+      : {
+          organization_id: { op: "eq", value: organizationId },
+          project_id:
+            scope === "project" || scope === "environment"
+              ? { op: "eq", value: projectId }
+              : undefined,
+          environment_id:
+            scope === "environment" ? { op: "eq", value: environmentId } : undefined,
+        }),
     [timeColumn]: { op: "gte", value: maxQueryPeriodDate },
     // Optional filters for tasks and queues
     task_identifier:
diff --git a/apps/webapp/app/utils/modelFormatters.ts b/apps/webapp/app/utils/modelFormatters.ts
new file mode 100644
index 00000000000..0070c31dad2
--- /dev/null
+++ b/apps/webapp/app/utils/modelFormatters.ts
@@ -0,0 +1,53 @@
+import { formatNumberCompact } from "./numberFormatter";
+
+/** Format a per-token price as $/1M tokens. */
+export function formatModelPrice(pricePerToken: number | null): string {
+  if (pricePerToken === null) return "—";
+  const perMillion = pricePerToken * 1_000_000;
+  if (perMillion < 0.01) return `$${perMillion.toFixed(4)}`;
+  if (perMillion < 1) return `$${perMillion.toFixed(3)}`;
+  return `$${perMillion.toFixed(2)}`;
+}
+
+/** Format a token count (context window, max output). */
+export function formatTokenCount(tokens: number | null): string {
+  if (tokens === null) return "—";
+  return formatNumberCompact(tokens);
+}
+
+/** Format a dollar cost value. */
+export function formatModelCost(dollars: number): string {
+  if (dollars === 0) return "$0";
+  if (dollars < 0.01) return `$${dollars.toFixed(4)}`;
+  if (dollars < 1) return `$${dollars.toFixed(3)}`;
+  return `$${dollars.toFixed(2)}`;
+}
+
+/** Format a capability slug from SCREAMING_CASE to Title Case. */
+export function formatCapability(cap: string): string {
+  return cap
+    .toLowerCase()
+    .split("_")
+    .map((word) => word.charAt(0).toUpperCase() + word.slice(1))
+    .join(" ");
+}
+
+/** Capitalize a provider name. */
+export function formatProviderName(provider: string): string {
+  const names: Record<string, string> = {
+    openai: "OpenAI",
+    anthropic: "Anthropic",
+    google: "Google",
+    meta: "Meta",
+    mistral: "Mistral",
+    cohere: "Cohere",
+    ai21: "AI21",
+    amazon: "Amazon",
+    xai: "xAI",
+    deepseek: "DeepSeek",
+    qwen: "Qwen",
+    perplexity: "Perplexity",
+    nous: "Nous",
+  };
+  return names[provider.toLowerCase()] ?? provider.charAt(0).toUpperCase() + provider.slice(1);
+}
diff --git a/apps/webapp/app/utils/pathBuilder.ts b/apps/webapp/app/utils/pathBuilder.ts
index 038ee8597ea..cc3d0fba0fe 100644
--- a/apps/webapp/app/utils/pathBuilder.ts
+++ b/apps/webapp/app/utils/pathBuilder.ts
@@ -551,6 +551,31 @@ export function v3PromptPath(
   return version != null ? `${base}?version=${version}` : base;
 }
 
+export function v3ModelsPath(
+  organization: OrgForPath,
+  project: ProjectForPath,
+  environment: EnvironmentForPath
+) {
+  return `${v3EnvironmentPath(organization, project, environment)}/models`;
+}
+
+export function v3ModelDetailPath(
+  organization: OrgForPath,
+  project: ProjectForPath,
+  environment: EnvironmentForPath,
+  modelId: string
+) {
+  return `${v3ModelsPath(organization, project, environment)}/${modelId}`;
+}
+
+export function v3ModelComparePath(
+  organization: OrgForPath,
+  project: ProjectForPath,
+  environment: EnvironmentForPath
+) {
+  return `${v3ModelsPath(organization, project, environment)}/compare`;
+}
+
 export function v3ErrorsPath(
   organization: OrgForPath,
   project: ProjectForPath,
diff --git a/apps/webapp/app/v3/canAccessAi.server.ts b/apps/webapp/app/v3/canAccessAi.server.ts
new file mode 100644
index 00000000000..d7957c4dcc7
--- /dev/null
+++ b/apps/webapp/app/v3/canAccessAi.server.ts
@@ -0,0 +1,46 @@
+import { prisma } from "~/db.server";
+import { env } from "~/env.server";
+import { FEATURE_FLAG, makeFlag } from "~/v3/featureFlags.server";
+
+export async function canAccessAi(options: {
+  userId: string;
+  isAdmin: boolean;
+  isImpersonating: boolean;
+  organizationSlug: string;
+}): Promise<boolean> {
+  const { userId, isAdmin, isImpersonating, organizationSlug } = options;
+
+  // 1. If env var is set then globally enabled
+  if (env.AI_FEATURES_ENABLED === "1") {
+    return true;
+  }
+
+  // 2. Admins always have access
+  if (isAdmin || isImpersonating) {
+    return true;
+  }
+
+  // 3. Check if org/global feature flag is on
+  const org = await prisma.organization.findFirst({
+    where: {
+      slug: organizationSlug,
+      members: { some: { userId } },
+    },
+    select: {
+      featureFlags: true,
+    },
+  });
+
+  const flag = makeFlag();
+  const flagResult = await flag({
+    key: FEATURE_FLAG.hasAiAccess,
+    defaultValue: false,
+    overrides: (org?.featureFlags as Record<string, unknown>) ?? {},
+  });
+  if (flagResult) {
+    return true;
+  }
+
+  // 4. Not enabled anywhere
+  return false;
+}
diff --git a/apps/webapp/app/v3/canAccessAiModels.server.ts b/apps/webapp/app/v3/canAccessAiModels.server.ts
new file mode 100644
index 00000000000..a3a489ce881
--- /dev/null
+++ b/apps/webapp/app/v3/canAccessAiModels.server.ts
@@ -0,0 +1,46 @@
+import { prisma } from "~/db.server";
+import { env } from "~/env.server";
+import { FEATURE_FLAG, makeFlag } from "~/v3/featureFlags.server";
+
+export async function canAccessAiModels(options: {
+  userId: string;
+  isAdmin: boolean;
+  isImpersonating: boolean;
+  organizationSlug: string;
+}): Promise<boolean> {
+  const { userId, isAdmin, isImpersonating, organizationSlug } = options;
+
+  // 1. If env var is set then globally enabled
+  if (env.AI_MODELS_ENABLED === "1") {
+    return true;
+  }
+
+  // 2. Admins always have access
+  if (isAdmin || isImpersonating) {
+    return true;
+  }
+
+  // 3. Check if org/global feature flag is on
+  const org = await prisma.organization.findFirst({
+    where: {
+      slug: organizationSlug,
+      members: { some: { userId } },
+    },
+    select: {
+      featureFlags: true,
+    },
+  });
+
+  const flag = makeFlag();
+  const flagResult = await flag({
+    key: FEATURE_FLAG.hasAiModelsAccess,
+    defaultValue: false,
+    overrides: (org?.featureFlags as Record<string, unknown>) ?? {},
+  });
+  if (flagResult) {
+    return true;
+  }
+
+  // 4. Not enabled anywhere
+  return false;
+}
diff --git a/apps/webapp/app/v3/eventRepository/clickhouseEventRepository.server.ts b/apps/webapp/app/v3/eventRepository/clickhouseEventRepository.server.ts
index 72f8f3baa34..e598a17fdcc 100644
--- a/apps/webapp/app/v3/eventRepository/clickhouseEventRepository.server.ts
+++ b/apps/webapp/app/v3/eventRepository/clickhouseEventRepository.server.ts
@@ -266,6 +266,7 @@ export class ClickhouseEventRepository implements IEventRepository {
       gen_ai_system: llmMetrics.genAiSystem,
       request_model: llmMetrics.requestModel,
       response_model: llmMetrics.responseModel,
+      base_response_model: llmMetrics.baseResponseModel,
       matched_model_id: llmMetrics.matchedModelId,
       operation_id: llmMetrics.operationId,
       finish_reason: llmMetrics.finishReason,
diff --git a/apps/webapp/app/v3/eventRepository/eventRepository.types.ts b/apps/webapp/app/v3/eventRepository/eventRepository.types.ts
index bd87c370a03..0b45e536490 100644
--- a/apps/webapp/app/v3/eventRepository/eventRepository.types.ts
+++ b/apps/webapp/app/v3/eventRepository/eventRepository.types.ts
@@ -25,6 +25,7 @@ export type LlmMetricsData = {
   genAiSystem: string;
   requestModel: string;
   responseModel: string;
+  baseResponseModel: string;
   matchedModelId: string;
   operationId: string;
   finishReason: string;
diff --git a/apps/webapp/app/v3/featureFlags.server.ts b/apps/webapp/app/v3/featureFlags.server.ts
index e889b2123d2..f32f34c64b8 100644
--- a/apps/webapp/app/v3/featureFlags.server.ts
+++ b/apps/webapp/app/v3/featureFlags.server.ts
@@ -7,6 +7,8 @@ export const FEATURE_FLAG = {
   taskEventRepository: "taskEventRepository",
   hasQueryAccess: "hasQueryAccess",
   hasLogsPageAccess: "hasLogsPageAccess",
+  hasAiAccess: "hasAiAccess",
+  hasAiModelsAccess: "hasAiModelsAccess",
 } as const;
 
 const FeatureFlagCatalog = {
@@ -15,6 +17,8 @@ const FeatureFlagCatalog = {
   [FEATURE_FLAG.taskEventRepository]: z.enum(["clickhouse", "clickhouse_v2", "postgres"]),
   [FEATURE_FLAG.hasQueryAccess]: z.coerce.boolean(),
   [FEATURE_FLAG.hasLogsPageAccess]: z.coerce.boolean(),
+  [FEATURE_FLAG.hasAiAccess]: z.coerce.boolean(),
+  [FEATURE_FLAG.hasAiModelsAccess]: z.coerce.boolean(),
 };
 
 type FeatureFlagKey = keyof typeof FeatureFlagCatalog;
diff --git a/apps/webapp/app/v3/llmPricingRegistry.server.ts b/apps/webapp/app/v3/llmPricingRegistry.server.ts
index 627609bb1d8..2212c41779d 100644
--- a/apps/webapp/app/v3/llmPricingRegistry.server.ts
+++ b/apps/webapp/app/v3/llmPricingRegistry.server.ts
@@ -1,4 +1,4 @@
-import { ModelPricingRegistry, seedLlmPricing } from "@internal/llm-pricing";
+import { ModelPricingRegistry, seedLlmPricing } from "@internal/llm-model-catalog";
 import { prisma, $replica } from "~/db.server";
 import { env } from "~/env.server";
 import { signalsEmitter } from "~/services/signals.server";
diff --git a/apps/webapp/app/v3/querySchemas.ts b/apps/webapp/app/v3/querySchemas.ts
index e380459f717..1d2c5467742 100644
--- a/apps/webapp/app/v3/querySchemas.ts
+++ b/apps/webapp/app/v3/querySchemas.ts
@@ -859,7 +859,111 @@ export const llmMetricsSchema: TableSchema = {
   },
 };
 
-export const querySchemas: TableSchema[] = [runsSchema, metricsSchema, llmMetricsSchema];
+/**
+ * Schema definition for the llm_models table (trigger_dev.llm_model_aggregates_v1)
+ * Global table — no tenant columns. Contains anonymized cross-tenant model performance data.
+ */
+export const llmModelsSchema: TableSchema = {
+  name: "llm_models",
+  clickhouseName: "trigger_dev.llm_model_aggregates_v1",
+  description:
+    "Cross-tenant model performance aggregates: calls, cost, latency, and throughput per model per minute. No tenant-specific data.",
+  timeConstraint: "minute",
+  // No tenantColumns — this is a global table with anonymized data
+  columns: {
+    response_model: {
+      name: "response_model",
+      ...column("String", {
+        description: "The model name as returned by the provider",
+        example: "gpt-4o-2024-08-06",
+        coreColumn: true,
+      }),
+    },
+    base_response_model: {
+      name: "base_response_model",
+      ...column("String", {
+        description: "The base model name with dated variants grouped",
+        example: "gpt-4o",
+        coreColumn: true,
+      }),
+    },
+    gen_ai_system: {
+      name: "gen_ai_system",
+      ...column("String", {
+        description: "The AI provider system identifier",
+        example: "openai.responses",
+        coreColumn: true,
+      }),
+    },
+    minute: {
+      name: "minute",
+      ...column("DateTime", {
+        description: "Aggregation time bucket (per minute)",
+        coreColumn: true,
+      }),
+    },
+    call_count: {
+      name: "call_count",
+      ...column("UInt64", {
+        description: "Number of LLM calls in this time bucket",
+        coreColumn: true,
+      }),
+    },
+    total_input_tokens: {
+      name: "total_input_tokens",
+      ...column("UInt64", {
+        description: "Total input tokens consumed",
+      }),
+    },
+    total_output_tokens: {
+      name: "total_output_tokens",
+      ...column("UInt64", {
+        description: "Total output tokens generated",
+      }),
+    },
+    total_cost: {
+      name: "total_cost",
+      ...column("Float64", {
+        description: "Total cost in USD",
+        customRenderType: "costInDollars",
+        coreColumn: true,
+      }),
+    },
+    // Aggregate state columns — use quantilesMerge() in queries to extract values
+    // Example: quantilesMerge(0.5)(ttfc_quantiles)[1] AS ttfc_p50
+    ttfc_quantiles: {
+      name: "ttfc_quantiles",
+      ...column("String", {
+        description:
+          "Time to first chunk quantile state. Use quantilesMerge(0.5)(ttfc_quantiles)[1] AS ttfc_p50 in queries.",
+        example: "quantilesMerge(0.5)(ttfc_quantiles)[1]",
+      }),
+    },
+    tps_quantiles: {
+      name: "tps_quantiles",
+      ...column("String", {
+        description:
+          "Tokens per second quantile state. Use quantilesMerge(0.5)(tps_quantiles)[1] AS tps_p50 in queries.",
+        example: "quantilesMerge(0.5)(tps_quantiles)[1]",
+      }),
+    },
+    duration_quantiles: {
+      name: "duration_quantiles",
+      ...column("String", {
+        description:
+          "Duration quantile state. Use quantilesMerge(0.5)(duration_quantiles)[1] AS duration_p50 in queries.",
+        example: "quantilesMerge(0.5)(duration_quantiles)[1]",
+      }),
+    },
+  },
+};
+
+export const querySchemas: TableSchema[] = [
+  runsSchema,
+  metricsSchema,
+  llmMetricsSchema,
+  llmModelsSchema,
+];
 
 /**
  * Default query for the query editor
diff --git a/apps/webapp/app/v3/utils/enrichCreatableEvents.server.ts b/apps/webapp/app/v3/utils/enrichCreatableEvents.server.ts
index 6b52e93469f..64382010496 100644
--- a/apps/webapp/app/v3/utils/enrichCreatableEvents.server.ts
+++ b/apps/webapp/app/v3/utils/enrichCreatableEvents.server.ts
@@ -1,6 +1,7 @@
+import { modelCatalog } from "@internal/llm-model-catalog";
 import type { CreateEventInput, LlmMetricsData } from "../eventRepository/eventRepository.types";
 
-// Registry interface — matches ModelPricingRegistry from @internal/llm-pricing
+// Registry interface — matches ModelPricingRegistry from @internal/llm-model-catalog
 type CostRegistry = {
   isLoaded: boolean;
   calculateCost(
@@ -183,6 +184,7 @@ function enrichLlmMetrics(event: CreateEventInput): void {
     genAiSystem: typeof props["gen_ai.system"] === "string" ? props["gen_ai.system"] : "unknown",
     requestModel: typeof props["gen_ai.request.model"] === "string" ? props["gen_ai.request.model"] : responseModel,
     responseModel,
+    baseResponseModel: modelCatalog[responseModel]?.baseModelName ?? responseModel,
     matchedModelId: cost?.matchedModelId ?? "",
     operationId,
     finishReason,
diff --git a/apps/webapp/package.json b/apps/webapp/package.json
index a34b0cf7f90..3eafd1467fc 100644
--- a/apps/webapp/package.json
+++ b/apps/webapp/package.json
@@ -57,7 +57,7 @@
     "@heroicons/react": "^2.0.12",
     "@jsonhero/schema-infer": "^0.1.5",
     "@internal/cache": "workspace:*",
-    "@internal/llm-pricing": "workspace:*",
+    "@internal/llm-model-catalog": "workspace:*",
     "@internal/redis": "workspace:*",
     "@internal/run-engine": "workspace:*",
     "@internal/schedule-engine": "workspace:*",
diff --git a/apps/webapp/seed-ai-spans.mts b/apps/webapp/seed-ai-spans.mts
index 35ec3d5851e..6ada9fb3285 100644
--- a/apps/webapp/seed-ai-spans.mts
+++ b/apps/webapp/seed-ai-spans.mts
@@ -11,7 +11,7 @@ import {
   enrichCreatableEvents,
   setLlmPricingRegistry,
 } from "./app/v3/utils/enrichCreatableEvents.server";
-import { ModelPricingRegistry, seedLlmPricing } from "@internal/llm-pricing";
+import { ModelPricingRegistry, seedLlmPricing } from "@internal/llm-model-catalog";
 import { nanoid } from "nanoid";
 import { unflattenAttributes } from "@trigger.dev/core/v3/utils/flattenAttributes";
 import type { Attributes } from "@opentelemetry/api";
@@ -138,6 +138,7 @@ function eventToLlmMetricsRow(event: CreateEventInput): LlmMetricsV1Input {
     gen_ai_system: llm.genAiSystem,
     request_model: llm.requestModel,
     response_model: llm.responseModel,
+    base_response_model: llm.baseResponseModel,
     matched_model_id: llm.matchedModelId,
     operation_id: llm.operationId,
     finish_reason: llm.finishReason,
@@ -156,6 +157,8 @@ function eventToLlmMetricsRow(event: CreateEventInput): LlmMetricsV1Input {
     ms_to_first_chunk: llm.msToFirstChunk,
     tokens_per_second: llm.tokensPerSecond,
     metadata: llm.metadata,
+    prompt_slug: llm.promptSlug,
+    prompt_version: llm.promptVersion,
     start_time: formatStartTime(BigInt(event.startTime)),
     duration: formatDuration(event.duration ?? 0),
   };
diff --git a/internal-packages/clickhouse/schema/026_add_base_response_model_to_llm_metrics_v1.sql b/internal-packages/clickhouse/schema/026_add_base_response_model_to_llm_metrics_v1.sql
new file mode 100644
index 00000000000..3d23295b026
--- /dev/null
+++ b/internal-packages/clickhouse/schema/026_add_base_response_model_to_llm_metrics_v1.sql
@@ -0,0 +1,7 @@
+-- +goose Up
+ALTER TABLE trigger_dev.llm_metrics_v1
+  ADD COLUMN base_response_model String DEFAULT '' CODEC(ZSTD(1));
+
+-- +goose Down
+ALTER TABLE trigger_dev.llm_metrics_v1
+  DROP COLUMN base_response_model;
diff --git a/internal-packages/clickhouse/schema/027_create_llm_model_aggregates_v1.sql b/internal-packages/clickhouse/schema/027_create_llm_model_aggregates_v1.sql
new file mode 100644
index 00000000000..3797f744bc2
--- /dev/null
+++ b/internal-packages/clickhouse/schema/027_create_llm_model_aggregates_v1.sql
@@ -0,0 +1,57 @@
+-- +goose Up
+
+-- Pre-aggregated model performance metrics with no tenant information.
+-- Used for cross-tenant model comparisons in the Model Registry.
+-- Aggregated per minute for high-resolution model performance tracking.
+CREATE TABLE IF NOT EXISTS trigger_dev.llm_model_aggregates_v1
+(
+  response_model        String,
+  base_response_model   String DEFAULT '',
+  gen_ai_system         LowCardinality(String),
+  minute                DateTime,
+
+  -- Counts & totals (SimpleAggregateFunction for sum)
+  call_count            SimpleAggregateFunction(sum, UInt64),
+  total_input_tokens    SimpleAggregateFunction(sum, UInt64),
+  total_output_tokens   SimpleAggregateFunction(sum, UInt64),
+  total_cost            SimpleAggregateFunction(sum, Float64),
+
+  -- Performance quantiles (AggregateFunction for merge across parts)
+  ttfc_quantiles        AggregateFunction(quantiles(0.5, 0.9, 0.95, 0.99), Float64),
+  tps_quantiles         AggregateFunction(quantiles(0.5, 0.9, 0.95, 0.99), Float64),
+  duration_quantiles    AggregateFunction(quantiles(0.5, 0.9, 0.95, 0.99), UInt64),
+
+  -- Finish reason distribution
+  finish_reason_counts  SimpleAggregateFunction(sumMap, Map(String, UInt64))
+)
+ENGINE = AggregatingMergeTree
+PARTITION BY toYYYYMM(minute)
+ORDER BY (response_model, base_response_model, gen_ai_system, minute)
+TTL toDate(minute) + INTERVAL 365 DAY
+SETTINGS ttl_only_drop_parts = 1;
+
+-- Materialized view that feeds the aggregate table from llm_metrics_v1.
+-- Strips all tenant-specific columns (org, project, env, run, span, trace).
+-- base_response_model comes from the source table (populated during event enrichment).
+CREATE MATERIALIZED VIEW IF NOT EXISTS trigger_dev.llm_model_aggregates_mv_v1
+TO trigger_dev.llm_model_aggregates_v1
+AS SELECT
+  response_model,
+  base_response_model,
+  gen_ai_system,
+  toStartOfMinute(start_time) AS minute,
+  count()            AS call_count,
+  sum(input_tokens)  AS total_input_tokens,
+  sum(output_tokens) AS total_output_tokens,
+  sum(total_cost)    AS total_cost,
+  quantilesStateIf(0.5, 0.9, 0.95, 0.99)(ms_to_first_chunk, ms_to_first_chunk > 0) AS ttfc_quantiles,
+  quantilesStateIf(0.5, 0.9, 0.95, 0.99)(tokens_per_second, tokens_per_second > 0)  AS tps_quantiles,
+  quantilesState(0.5, 0.9, 0.95, 0.99)(duration)           AS duration_quantiles,
+  sumMap(map(finish_reason, toUInt64(1)))                    AS finish_reason_counts
+FROM trigger_dev.llm_metrics_v1
+WHERE response_model != ''
+GROUP BY response_model, base_response_model, gen_ai_system, minute;
+
+-- +goose Down
+DROP TABLE IF EXISTS trigger_dev.llm_model_aggregates_mv_v1;
+DROP TABLE IF EXISTS trigger_dev.llm_model_aggregates_v1;
diff --git a/internal-packages/clickhouse/src/client/queryBuilder.ts b/internal-packages/clickhouse/src/client/queryBuilder.ts
index dc0fb297cc9..fb0430fd0db 100644
--- a/internal-packages/clickhouse/src/client/queryBuilder.ts
+++ b/internal-packages/clickhouse/src/client/queryBuilder.ts
@@ -36,6 +36,12 @@ export class ClickhouseQueryBuilder<TOutput> {
     this.settings = settings;
   }
 
+  /** Set query parameters without adding a WHERE clause. Use for base queries with inline params. */
+  setParams(params: QueryParams): this {
+    Object.assign(this.params, params);
+    return this;
+  }
+
   where(clause: string, params?: QueryParams): this {
     this.whereClauses.push(clause);
     if (params) {
diff --git a/internal-packages/clickhouse/src/index.ts b/internal-packages/clickhouse/src/index.ts
index 18e52483627..99d22a5a18e 100644
--- a/internal-packages/clickhouse/src/index.ts
+++ b/internal-packages/clickhouse/src/index.ts
@@ -28,6 +28,11 @@ import {
 } from "./taskEvents.js";
 import { insertMetrics } from "./metrics.js";
 import { insertLlmMetrics } from "./llmMetrics.js";
+import {
+  getGlobalModelMetrics,
+  getGlobalModelComparison,
+  getPopularModels,
+} from "./llmModelAggregates.js";
 import {
   getErrorGroups,
   getErrorInstances,
@@ -46,6 +51,7 @@ export type * from "./taskRuns.js";
 export type * from "./taskEvents.js";
 export type * from "./metrics.js";
 export type * from "./llmMetrics.js";
+export type * from "./llmModelAggregates.js";
 export type * from "./errors.js";
 export type * from "./client/queryBuilder.js";
 
@@ -233,6 +239,14 @@ export class ClickHouse {
     };
   }
 
+  get llmModelAggregates() {
+    return {
+      globalMetrics: getGlobalModelMetrics(this.reader),
+      comparison: getGlobalModelComparison(this.reader),
+      popular: getPopularModels(this.reader),
+    };
+  }
+
   get taskEventsV2() {
     return {
       insert: insertTaskEventsV2(this.writer),
diff --git a/internal-packages/clickhouse/src/llmMetrics.ts b/internal-packages/clickhouse/src/llmMetrics.ts
index 200979f7b61..ba064df125f 100644
--- a/internal-packages/clickhouse/src/llmMetrics.ts
+++ b/internal-packages/clickhouse/src/llmMetrics.ts
@@ -13,6 +13,7 @@ export const LlmMetricsV1Input = z.object({
   gen_ai_system: z.string(),
   request_model: z.string(),
   response_model: z.string(),
+  base_response_model: z.string(),
   matched_model_id: z.string(),
   operation_id: z.string(),
   finish_reason: z.string(),
diff --git a/internal-packages/clickhouse/src/llmModelAggregates.ts b/internal-packages/clickhouse/src/llmModelAggregates.ts
new file mode 100644
index 00000000000..4e7bbba7bc9
--- /dev/null
+++ b/internal-packages/clickhouse/src/llmModelAggregates.ts
@@ -0,0 +1,138 @@
+import { z } from "zod";
+import { ClickhouseQueryBuilder } from "./client/queryBuilder.js";
+import type { ClickhouseReader } from "./client/types.js";
+
+// --- Schemas ---
+
+const ModelMetricsRow = z.object({
+  response_model: z.string(),
+  gen_ai_system: z.string(),
+  minute: z.string(),
+  call_count: z.coerce.number(),
+  total_input_tokens: z.coerce.number(),
+  total_output_tokens: z.coerce.number(),
+  total_cost: z.coerce.number(),
+  ttfc_p50: z.coerce.number(),
+  ttfc_p90: z.coerce.number(),
+  ttfc_p95: z.coerce.number(),
+  ttfc_p99: z.coerce.number(),
+  tps_p50: z.coerce.number(),
+  tps_p90: z.coerce.number(),
+  tps_p95: z.coerce.number(),
+  tps_p99: z.coerce.number(),
+  duration_p50: z.coerce.number(),
+  duration_p90: z.coerce.number(),
+  duration_p95: z.coerce.number(),
+  duration_p99: z.coerce.number(),
+});
+
+const ModelSummaryRow = z.object({
+  response_model: z.string(),
+  gen_ai_system: z.string(),
+  call_count: z.coerce.number(),
+  total_input_tokens: z.coerce.number(),
+  total_output_tokens: z.coerce.number(),
+  total_cost: z.coerce.number(),
+  ttfc_p50: z.coerce.number(),
+  ttfc_p90: z.coerce.number(),
+  tps_p50: z.coerce.number(),
+  tps_p90: z.coerce.number(),
+});
+
+const PopularModelRow = z.object({
+  response_model: z.string(),
+  gen_ai_system: z.string(),
+  call_count: z.coerce.number(),
+  total_cost: z.coerce.number(),
+  ttfc_p50: z.coerce.number(),
+});
+
+// --- Query builders ---
+
+/** Get per-minute metrics for a specific model over a date range. */
+export function getGlobalModelMetrics(reader: ClickhouseReader) {
+  return new ClickhouseQueryBuilder(
+    "getGlobalModelMetrics",
+    `SELECT
+      response_model,
+      gen_ai_system,
+      minute,
+      sum(call_count) AS call_count,
+      sum(total_input_tokens) AS total_input_tokens,
+      sum(total_output_tokens) AS total_output_tokens,
+      sum(total_cost) AS total_cost,
+      quantilesMerge(0.5, 0.9, 0.95, 0.99)(ttfc_quantiles) AS ttfc_arr,
+      ttfc_arr[1] AS ttfc_p50,
+      ttfc_arr[2] AS ttfc_p90,
+      ttfc_arr[3] AS ttfc_p95,
+      ttfc_arr[4] AS ttfc_p99,
+      quantilesMerge(0.5, 0.9, 0.95, 0.99)(tps_quantiles) AS tps_arr,
+      tps_arr[1] AS tps_p50,
+      tps_arr[2] AS tps_p90,
+      tps_arr[3] AS tps_p95,
+      tps_arr[4] AS tps_p99,
+      quantilesMerge(0.5, 0.9, 0.95, 0.99)(duration_quantiles) AS dur_arr,
+      dur_arr[1] AS duration_p50,
+      dur_arr[2] AS duration_p90,
+      dur_arr[3] AS duration_p95,
+      dur_arr[4] AS duration_p99
+    FROM trigger_dev.llm_model_aggregates_v1
+    WHERE response_model = {responseModel: String}
+      AND minute >= {startTime: DateTime}
+      AND minute <= {endTime: DateTime}
+    GROUP BY response_model, gen_ai_system, minute
+    ORDER BY minute`,
+    reader,
+    ModelMetricsRow
+  );
+}
+
+/** Get summary metrics for multiple models (for comparison). */
+export function getGlobalModelComparison(reader: ClickhouseReader) {
+  return new ClickhouseQueryBuilder(
+    "getGlobalModelComparison",
+    `SELECT
+      response_model,
+      gen_ai_system,
+      sum(call_count) AS call_count,
+      sum(total_input_tokens) AS total_input_tokens,
+      sum(total_output_tokens) AS total_output_tokens,
+      sum(total_cost) AS total_cost,
+      quantilesMerge(0.5, 0.9)(ttfc_quantiles) AS ttfc_arr,
+      ttfc_arr[1] AS ttfc_p50,
+      ttfc_arr[2] AS ttfc_p90,
+      quantilesMerge(0.5, 0.9)(tps_quantiles) AS tps_arr,
+      tps_arr[1] AS tps_p50,
+      tps_arr[2] AS tps_p90
+    FROM trigger_dev.llm_model_aggregates_v1
+    WHERE response_model IN {responseModels: Array(String)}
+      AND minute >= {startTime: DateTime}
+      AND minute <= {endTime: DateTime}
+    GROUP BY response_model, gen_ai_system
+    ORDER BY call_count DESC`,
+    reader,
+    ModelSummaryRow
+  );
+}
+
+/** Get the most popular models by call count. */
+export function getPopularModels(reader: ClickhouseReader) {
+  return new ClickhouseQueryBuilder(
+    "getPopularModels",
+    `SELECT
+      response_model,
+      gen_ai_system,
+      sum(call_count) AS call_count,
+      sum(total_cost) AS total_cost,
+      quantilesMerge(0.5)(ttfc_quantiles) AS ttfc_arr,
+      ttfc_arr[1] AS ttfc_p50
+    FROM trigger_dev.llm_model_aggregates_v1
+    WHERE minute >= {startTime: DateTime}
+      AND minute <= {endTime: DateTime}
+    GROUP BY response_model, gen_ai_system
+    ORDER BY call_count DESC
+    LIMIT {limit: UInt32}`,
+    reader,
+    PopularModelRow
+  );
+}
diff --git a/internal-packages/database/prisma/migrations/20260323104144_add_catalog_columns_to_llm_models/migration.sql b/internal-packages/database/prisma/migrations/20260323104144_add_catalog_columns_to_llm_models/migration.sql
new file mode 100644
index 00000000000..759d17dc93e
--- /dev/null
+++ b/internal-packages/database/prisma/migrations/20260323104144_add_catalog_columns_to_llm_models/migration.sql
@@ -0,0 +1,7 @@
+-- AlterTable
+ALTER TABLE "public"."llm_models" ADD COLUMN     "capabilities" TEXT[] DEFAULT ARRAY[]::TEXT[],
+ADD COLUMN     "context_window" INTEGER,
+ADD COLUMN     "description" TEXT,
+ADD COLUMN     "is_hidden" BOOLEAN NOT NULL DEFAULT false,
+ADD COLUMN     "max_output_tokens" INTEGER,
+ADD COLUMN     "provider" TEXT;
diff --git a/internal-packages/database/prisma/migrations/20260324142824_add_base_model_name_to_llm_models/migration.sql b/internal-packages/database/prisma/migrations/20260324142824_add_base_model_name_to_llm_models/migration.sql
new file mode 100644
index 00000000000..acdae6e37ba
--- /dev/null
+++ b/internal-packages/database/prisma/migrations/20260324142824_add_base_model_name_to_llm_models/migration.sql
@@ -0,0 +1,2 @@
+-- AlterTable
+ALTER TABLE "public"."llm_models" ADD COLUMN     "base_model_name" TEXT;
diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma
index 5ebc78508b9..bf3c946a985 100644
--- a/internal-packages/database/prisma/schema.prisma
+++ b/internal-packages/database/prisma/schema.prisma
@@ -2670,6 +2670,15 @@ model LlmModel {
   createdAt    DateTime  @default(now()) @map("created_at")
   updatedAt    DateTime  @updatedAt @map("updated_at")
 
+  // Catalog metadata for model registry
+  provider        String?  @map("provider")
+  description     String?  @map("description")
+  contextWindow   Int?     @map("context_window")
+  maxOutputTokens Int?     @map("max_output_tokens")
+  capabilities    String[] @default([]) @map("capabilities")
+  isHidden        Boolean  @default(false) @map("is_hidden")
+  baseModelName   String?  @map("base_model_name")
+
   pricingTiers LlmPricingTier[]
   prices       LlmPrice[]
 
diff --git a/internal-packages/llm-model-catalog/package.json b/internal-packages/llm-model-catalog/package.json
new file mode 100644
index 00000000000..be27ce3529d
--- /dev/null
+++ b/internal-packages/llm-model-catalog/package.json
@@ -0,0 +1,20 @@
+{
+  "name": "@internal/llm-model-catalog",
+  "private": true,
+  "version": "0.0.1",
+  "main": "./src/index.ts",
+  "types": "./src/index.ts",
+  "type": "module",
+  "dependencies": {
+    "@trigger.dev/core": "workspace:*",
+    "@trigger.dev/database": "workspace:*"
+  },
+  "scripts": {
+    "typecheck": "tsc --noEmit",
+    "generate": "node scripts/generate.mjs",
+    "sync-prices": "bash scripts/sync-model-prices.sh && node scripts/generate.mjs",
+    "sync-prices:check": "bash scripts/sync-model-prices.sh --check",
+    "generate-catalog": "bash scripts/generate-model-catalog.sh && node scripts/generate.mjs",
+    "generate-catalog:dry-run": "bash scripts/generate-model-catalog.sh --dry-run"
+  }
+}
diff --git a/internal-packages/llm-model-catalog/scripts/.gitignore b/internal-packages/llm-model-catalog/scripts/.gitignore
new file mode 100644
index 00000000000..333c1e910a3
--- /dev/null
+++ b/internal-packages/llm-model-catalog/scripts/.gitignore
@@ -0,0 +1 @@
+logs/
diff --git a/internal-packages/llm-model-catalog/scripts/generate-model-catalog.sh b/internal-packages/llm-model-catalog/scripts/generate-model-catalog.sh
new file mode 100755
index 00000000000..c1d945e04b3
--- /dev/null
+++ b/internal-packages/llm-model-catalog/scripts/generate-model-catalog.sh
@@ -0,0 +1,346 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Generate model-catalog.json by researching each unique base model using Claude Code CLI.
+# Usage: ./scripts/generate-model-catalog.sh [options]
+#
+# Options:
+#   --dry-run            Print models that would be researched without running Claude
+#   --filter <pattern>   Only research models matching this ERE pattern (e.g. "gpt-4o|claude")
+#   --max <n>            Maximum number of models to research (useful for testing)
+#   --stale-days <n>     Re-research models older than N days (default: 7)
+#   --force              Re-research all models regardless of resolvedAt timestamp
+#   --skip-hidden        Skip models already marked as hidden/deprecated (saves time)
+#   --concurrency <n>    Number of models to research in parallel (default: 5)
+#
+# The script:
+# 1. Extracts all modelNames from defaultPrices.ts
+# 2. Groups dated variants to their base model
+# 3. Runs research-model.sh for each base model (in parallel)
+# 4. Writes results incrementally to model-catalog.json
+#
+# Logs are written to scripts/logs/ for debugging failures.
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PACKAGE_DIR="$(dirname "$SCRIPT_DIR")"
+DEFAULTS_FILE="$PACKAGE_DIR/src/defaultPrices.ts"
+CATALOG_FILE="$PACKAGE_DIR/src/model-catalog.json"
+RESEARCH_SCRIPT="$SCRIPT_DIR/research-model.sh"
+
+LOG_DIR="$SCRIPT_DIR/logs"
+mkdir -p "$LOG_DIR"
+
+DRY_RUN=false
+FILTER=""
+MAX_MODELS=0
+STALE_DAYS=7
+FORCE=false
+SKIP_HIDDEN=false
+CONCURRENCY=5
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --dry-run) DRY_RUN=true; shift ;;
+    --filter) FILTER="$2"; shift 2 ;;
+    --max) MAX_MODELS="$2"; shift 2 ;;
+    --stale-days) STALE_DAYS="$2"; shift 2 ;;
+    --force) FORCE=true; shift ;;
+    --skip-hidden) SKIP_HIDDEN=true; shift ;;
+    --concurrency) CONCURRENCY="$2"; shift 2 ;;
+    *) echo "Unknown option: $1" >&2; exit 1 ;;
+  esac
+done
+
+# Extract all model names from defaultPrices.ts
+ALL_MODELS=$(grep -o '"modelName": "[^"]*"' "$DEFAULTS_FILE" | sed 's/"modelName": "//;s/"//' | sort -u)
+
+# Skip embedding, legacy completion, and fine-tuned models
+SKIP_PATTERNS="^text-embedding|^textembedding|^text-ada|^text-babbage|^text-curie|^text-davinci|^text-bison|^text-unicorn|^code-bison|^code-gecko|^codechat-bison|^chat-bison|^babbage-002|^davinci-002|^ft:|^gemini-live"
+
+FILTERED_MODELS=$(echo "$ALL_MODELS" | grep -vE "$SKIP_PATTERNS")
+
+if [[ -n "$FILTER" ]]; then
+  FILTERED_MODELS=$(echo "$FILTERED_MODELS" | grep -E "$FILTER" || true)
+fi
+
+# Group dated variants to base models
+declare -A BASE_TO_VARIANTS
+declare -A MODEL_TO_BASE
+
+for model in $FILTERED_MODELS; do
+  base=$(echo "$model" | sed -E 's/-[0-9]{4}-?[0-9]{2}-?[0-9]{2}$//')
+  base_no_latest=$(echo "$base" | sed -E 's/-latest$//')
+  if [[ ${#base_no_latest} -lt ${#base} ]]; then
+    base="$base_no_latest"
+  fi
+
+  MODEL_TO_BASE["$model"]="$base"
+
+  if [[ -n "${BASE_TO_VARIANTS[$base]:-}" ]]; then
+    BASE_TO_VARIANTS["$base"]="${BASE_TO_VARIANTS[$base]} $model"
+  else
+    BASE_TO_VARIANTS["$base"]="$model"
+  fi
+done
+
+BASE_MODELS=$(printf '%s\n' "${!BASE_TO_VARIANTS[@]}" | sort -u)
+TOTAL=$(echo "$BASE_MODELS" | wc -l | tr -d ' ')
+
+if [[ "$MAX_MODELS" -gt 0 ]]; then
+  BASE_MODELS=$(echo "$BASE_MODELS" | head -n "$MAX_MODELS")
+  TOTAL=$(echo "$BASE_MODELS" | wc -l | tr -d ' ')
+fi
+
+echo "Found $TOTAL unique base models (concurrency: $CONCURRENCY)"
+
+if $DRY_RUN; then
+  echo ""
+  echo "Base models and their variants:"
+  for base in $BASE_MODELS; do
+    echo "  $base → ${BASE_TO_VARIANTS[$base]}"
+  done
+  exit 0
+fi
+
+# Load existing catalog
+if [[ -f "$CATALOG_FILE" ]]; then
+  EXISTING_CATALOG=$(cat "$CATALOG_FILE")
+else
+  EXISTING_CATALOG="{}"
+fi
+
+# Lock file for thread-safe catalog writes
+LOCK_FILE="$LOG_DIR/.catalog.lock"
+RESULTS_DIR="$LOG_DIR/results"
+mkdir -p "$RESULTS_DIR"
+
+ERRORS=0
+FAILED_MODELS=""
+SKIPPED=0
+RESEARCHED=0
+CHANGED=0
+
+# --- Determine which models need research ---
+
+MODELS_TO_RESEARCH=""
+COUNT=0
+
+for base in $BASE_MODELS; do
+  COUNT=$((COUNT + 1))
+
+  SKIP_REASON=$(echo "$EXISTING_CATALOG" | node -e "
+    const data = JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8'));
+    const entry = data['$base'];
+    if (!entry) { process.stdout.write('missing'); return; }
+    if ($FORCE) { process.stdout.write('force'); return; }
+    if ($SKIP_HIDDEN && entry.isHidden) { process.stdout.write('hidden'); return; }
+    const resolvedAt = entry.resolvedAt ? new Date(entry.resolvedAt) : null;
+    if (!resolvedAt) { process.stdout.write('no_timestamp'); return; }
+    const staleMs = $STALE_DAYS * 24 * 60 * 60 * 1000;
+    if (Date.now() - resolvedAt.getTime() > staleMs) { process.stdout.write('stale'); return; }
+    process.stdout.write('fresh');
+  " 2>/dev/null || echo "missing")
+
+  case "$SKIP_REASON" in
+    fresh)
+      RESOLVED_DATE=$(echo "$EXISTING_CATALOG" | node -e "const d=JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8'));const r=d['$base']?.resolvedAt;console.log(r?r.split('T')[0]:'?')" 2>/dev/null)
+      echo "[$COUNT/$TOTAL] Skipping $base (resolved $RESOLVED_DATE)"
+      SKIPPED=$((SKIPPED + 1))
+      ;;
+    hidden)
+      echo "[$COUNT/$TOTAL] Skipping $base (hidden/deprecated)"
+      SKIPPED=$((SKIPPED + 1))
+      ;;
+    *)
+      MODELS_TO_RESEARCH="$MODELS_TO_RESEARCH $base"
+      ;;
+  esac
+done
+
+RESEARCH_COUNT=$(echo "$MODELS_TO_RESEARCH" | wc -w | tr -d ' ')
+echo ""
+echo "Researching $RESEARCH_COUNT models, skipped $SKIPPED"
+echo ""
+
+if [[ "$RESEARCH_COUNT" -eq 0 ]]; then
+  echo "Nothing to do."
+  exit 0
+fi
+
+# --- Research function (called per model, may run in parallel) ---
+
+research_model() {
+  local base="$1"
+  local idx="$2"
+  local total="$3"
+  local model_log="$LOG_DIR/$base.log"
+  local result_file="$RESULTS_DIR/$base.json"
+
+  echo "[$idx/$total] Researching $base..."
+
+  local raw
+  raw=$("$RESEARCH_SCRIPT" "$base" 3 2>&1) || {
+    echo "$raw" > "$model_log"
+    echo "  ERROR: Failed to research $base (after retries). Log: $model_log" >&2
+    echo '{"error":true}' > "$result_file"
+    return 1
+  }
+
+  echo "$raw" > "$model_log"
+
+  local entry
+  entry=$(echo "$raw" | node -e "
+    try {
+      const d = JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8'));
+      let text = (typeof d.result === 'string' ? d.result : JSON.stringify(d)).trim();
+      text = text.replace(/^\`\`\`(?:json)?\s*/i, '').replace(/\s*\`\`\`\s*$/, '').trim();
+      const jsonMatch = text.match(/\{[\s\S]*\}/);
+      if (jsonMatch) text = jsonMatch[0];
+      const r = JSON.parse(text);
+      if (!r.provider) throw new Error('missing provider field');
+      process.stdout.write(JSON.stringify({
+        provider: r.provider,
+        description: r.description || '',
+        contextWindow: r.contextWindow || null,
+        maxOutputTokens: r.maxOutputTokens || null,
+        capabilities: r.capabilities || [],
+        releaseDate: r.releaseDate || null,
+        isHidden: r.isHidden === true,
+        supportsStructuredOutput: r.supportsStructuredOutput === true,
+        supportsParallelToolCalls: r.supportsParallelToolCalls === true,
+        supportsStreamingToolCalls: r.supportsStreamingToolCalls === true,
+        deprecationDate: r.deprecationDate || null,
+        knowledgeCutoff: r.knowledgeCutoff || null,
+        resolvedAt: new Date().toISOString()
+      }));
+    } catch(e) {
+      process.stderr.write(e.message);
+      process.exit(1);
+    }
+  " 2>"$LOG_DIR/$base.parse-error") || {
+    local parse_err
+    parse_err=$(cat "$LOG_DIR/$base.parse-error" 2>/dev/null)
+    echo "  ERROR: Failed to parse response for $base: $parse_err" >&2
+    echo "  Raw response saved to: $model_log" >&2
+    echo '{"error":true}' > "$result_file"
+    return 1
+  }
+
+  echo "$entry" > "$result_file"
+  echo "  OK: $(echo "$entry" | node -e "const d=JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8'));console.log(d.provider + ' / ' + (d.contextWindow||'?') + ' ctx / ' + d.capabilities.length + ' caps')" 2>/dev/null)"
+}
+
+export -f research_model
+export RESEARCH_SCRIPT LOG_DIR RESULTS_DIR
+
+# --- Run research in parallel ---
+
+IDX=0
+PIDS=()
+MODEL_LIST=($MODELS_TO_RESEARCH)
+
+for base in "${MODEL_LIST[@]}"; do
+  IDX=$((IDX + 1))
+
+  research_model "$base" "$IDX" "$RESEARCH_COUNT" &
+  PIDS+=($!)
+
+  # Throttle concurrency
+  if [[ ${#PIDS[@]} -ge $CONCURRENCY ]]; then
+    wait "${PIDS[0]}" 2>/dev/null || true
+    PIDS=("${PIDS[@]:1}")
+  fi
+done
+
+# Wait for remaining
+for pid in "${PIDS[@]}"; do
+  wait "$pid" 2>/dev/null || true
+done
+
+echo ""
+echo "Research complete. Merging results..."
+
+# --- Merge results into catalog ---
+
+CATALOG="$EXISTING_CATALOG"
+
+for base in "${MODEL_LIST[@]}"; do
+  RESULT_FILE="$RESULTS_DIR/$base.json"
+
+  if [[ ! -f "$RESULT_FILE" ]]; then
+    ERRORS=$((ERRORS + 1))
+    FAILED_MODELS="$FAILED_MODELS $base"
+    continue
+  fi
+
+  ENTRY=$(cat "$RESULT_FILE")
+
+  # Check for error marker
+  if echo "$ENTRY" | node -e "const d=JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8'));process.exit(d.error?0:1)" 2>/dev/null; then
+    ERRORS=$((ERRORS + 1))
+    FAILED_MODELS="$FAILED_MODELS $base"
+    continue
+  fi
+
+  RESEARCHED=$((RESEARCHED + 1))
+
+  # Diff detection: compare with existing entry
+  OLD_ENTRY=$(echo "$EXISTING_CATALOG" | node -e "
+    const d = JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8'));
+    const e = d['$base'];
+    if (e) { delete e.resolvedAt; process.stdout.write(JSON.stringify(e)); }
+    else process.stdout.write('null');
+  " 2>/dev/null)
+
+  NEW_FOR_DIFF=$(echo "$ENTRY" | node -e "
+    const d = JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8'));
+    delete d.resolvedAt;
+    process.stdout.write(JSON.stringify(d));
+  " 2>/dev/null)
+
+  if [[ "$OLD_ENTRY" != "null" && "$OLD_ENTRY" != "$NEW_FOR_DIFF" ]]; then
+    CHANGED=$((CHANGED + 1))
+    # Log what changed
+    node -e "
+      const old = JSON.parse('$OLD_ENTRY');
+      const cur = JSON.parse('$NEW_FOR_DIFF');
+      const changes = [];
+      for (const k of new Set([...Object.keys(old), ...Object.keys(cur)])) {
+        const o = JSON.stringify(old[k]); const n = JSON.stringify(cur[k]);
+        if (o !== n) changes.push(k + ': ' + o + ' → ' + n);
+      }
+      if (changes.length) console.log('  CHANGED: ' + changes.join(', '));
+    " 2>/dev/null || true
+  fi
+
+  # Apply to all variants of this base model
+  for variant in ${BASE_TO_VARIANTS[$base]}; do
+    CATALOG=$(echo "$CATALOG" | node -e "
+      const catalog = JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8'));
+      catalog['$variant'] = $ENTRY;
+      process.stdout.write(JSON.stringify(catalog));
+    ")
+  done
+done
+
+# Write final catalog
+echo "$CATALOG" | node -e "
+  const data = JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8'));
+  const sorted = Object.keys(data).sort().reduce((acc, k) => { acc[k] = data[k]; return acc; }, {});
+  process.stdout.write(JSON.stringify(sorted, null, 2) + '\n');
+" > "$CATALOG_FILE"
+
+# Cleanup results
+rm -rf "$RESULTS_DIR"
+
+FINAL_COUNT=$(node -e "console.log(Object.keys(JSON.parse(require('fs').readFileSync('$CATALOG_FILE','utf-8'))).length)")
+echo ""
+echo "Done! $FINAL_COUNT entries in catalog"
+echo "  Researched: $RESEARCHED | Changed: $CHANGED | Skipped: $SKIPPED | Errors: $ERRORS"
+
+if [[ "$ERRORS" -gt 0 ]]; then
+  echo ""
+  echo "Failed models:$FAILED_MODELS"
+  RETRY_PATTERN=$(echo "$FAILED_MODELS" | tr ' ' '\n' | grep -v '^$' | sed 's/\./\\./g; s/^/^/; s/$/$/' | paste -sd '|' -)
+  echo "Retry with: $0 --filter \"$RETRY_PATTERN\""
+fi
diff --git a/internal-packages/llm-model-catalog/scripts/generate.mjs b/internal-packages/llm-model-catalog/scripts/generate.mjs
new file mode 100644
index 00000000000..115643b1dff
--- /dev/null
+++ b/internal-packages/llm-model-catalog/scripts/generate.mjs
@@ -0,0 +1,93 @@
+#!/usr/bin/env node
+
+// Cross-platform generation script for the llm-pricing package.
+// Generates TypeScript modules from JSON data files:
+//   1. defaultPrices.ts   ← default-model-prices.json (synced from Langfuse)
+//   2. modelCatalog.ts    ← model-catalog.json (our maintained catalog metadata)
+//
+// Usage: node scripts/generate.mjs
+//
+// To update the source JSON files:
+//   - Pricing:  pnpm run sync-prices  (fetches from Langfuse, requires curl)
+//   - Catalog:  pnpm run generate-catalog  (uses Claude CLI to research models)
+
+import { readFileSync, writeFileSync, existsSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const srcDir = join(__dirname, "..", "src");
+
+// --- 1. Generate defaultPrices.ts from default-model-prices.json ---
+
+const pricesJsonPath = join(srcDir, "default-model-prices.json");
+
+if (existsSync(pricesJsonPath)) {
+  const raw = JSON.parse(readFileSync(pricesJsonPath, "utf-8"));
+  const stripped = raw.map((e) => ({
+    modelName: e.modelName.trim(),
+    matchPattern: e.matchPattern,
+    startDate: e.createdAt,
+    pricingTiers: e.pricingTiers.map((t) => ({
+      name: t.name,
+      isDefault: t.isDefault,
+      priority: t.priority,
+      conditions: t.conditions.map((c) => ({
+        usageDetailPattern: c.usageDetailPattern,
+        operator: c.operator,
+        value: c.value,
+      })),
+      prices: t.prices,
+    })),
+  }));
+
+  let out = 'import type { DefaultModelDefinition } from "./types.js";\n\n';
+  out += "// Auto-generated from default-model-prices.json — do not edit manually.\n";
+  out += "// Run `pnpm run sync-prices` to update the JSON, then `pnpm run generate` to regenerate.\n";
+  out += "// Source: https://github.com/langfuse/langfuse\n\n";
+  out += "export const defaultModelPrices: DefaultModelDefinition[] = ";
+  out += JSON.stringify(stripped, null, 2) + ";\n";
+
+  writeFileSync(join(srcDir, "defaultPrices.ts"), out);
+  console.log(`Generated defaultPrices.ts (${stripped.length} models)`);
+} else {
+  console.log("Skipping defaultPrices.ts — default-model-prices.json not found");
+}
+
+// --- 2. Generate modelCatalog.ts from model-catalog.json ---
+
+const catalogJsonPath = join(srcDir, "model-catalog.json");
+
+if (existsSync(catalogJsonPath)) {
+  const data = JSON.parse(readFileSync(catalogJsonPath, "utf-8"));
+
+  // Backfill missing fields for old entries
+  for (const key of Object.keys(data)) {
+    if (data[key].releaseDate === undefined) data[key].releaseDate = null;
+    if (data[key].isHidden === undefined) data[key].isHidden = false;
+    if (data[key].supportsStructuredOutput === undefined) data[key].supportsStructuredOutput = false;
+    if (data[key].supportsParallelToolCalls === undefined) data[key].supportsParallelToolCalls = false;
+    if (data[key].supportsStreamingToolCalls === undefined) data[key].supportsStreamingToolCalls = false;
+    if (data[key].deprecationDate === undefined) data[key].deprecationDate = null;
+    if (data[key].knowledgeCutoff === undefined) data[key].knowledgeCutoff = null;
+    if (data[key].resolvedAt === undefined) data[key].resolvedAt = new Date().toISOString();
+    {
+      // Always recompute base model name (don't trust existing values)
+      // Strip trailing date (-YYYYMMDD or -YYYY-MM-DD) and -latest suffix
+      // Keep original naming (dots, etc.) — don't normalize
+      let base = key.replace(/-\d{4}-?\d{2}-?\d{2}$/, "").replace(/-latest$/, "");
+      data[key].baseModelName = base !== key ? base : null;
+    }
+  }
+
+  let out = 'import type { ModelCatalogEntry } from "./types.js";\n\n';
+  out += "// Auto-generated from model-catalog.json — do not edit manually.\n";
+  out += "// Run `pnpm run generate-catalog` to update the JSON, then `pnpm run generate` to regenerate.\n\n";
+  out += "export const modelCatalog: Record<string, ModelCatalogEntry> = ";
+  out += JSON.stringify(data, null, 2) + ";\n";
+
+  writeFileSync(join(srcDir, "modelCatalog.ts"), out);
+  console.log(`Generated modelCatalog.ts (${Object.keys(data).length} entries)`);
+} else {
+  console.log("Skipping modelCatalog.ts — model-catalog.json not found");
+}
diff --git a/internal-packages/llm-model-catalog/scripts/research-model.sh b/internal-packages/llm-model-catalog/scripts/research-model.sh
new file mode 100755
index 00000000000..422b7a5689a
--- /dev/null
+++ b/internal-packages/llm-model-catalog/scripts/research-model.sh
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Research a single LLM model using Claude Code CLI and output structured JSON.
+# Usage: ./scripts/research-model.sh <model-name>
+#
+# Example:
+#   ./scripts/research-model.sh gpt-4o
+#   → {"provider":"openai","description":"...","contextWindow":128000,...}
+
+if [[ $# -lt 1 ]]; then
+  echo "Usage: $0 <model-name>" >&2
+  exit 1
+fi
+
+MODEL_NAME="$1"
+MAX_RETRIES="${2:-3}"
+
+PROMPT="Research the LLM model '${MODEL_NAME}' and return ONLY a valid JSON object (no markdown, no explanation, no code fences) with these exact fields:
+
+{
+  \"provider\": \"<provider-id>\",
+  \"description\": \"<1-2 sentence description of the model>\",
+  \"contextWindow\": <max input context window in tokens, or null if unknown>,
+  \"maxOutputTokens\": <max output tokens, or null if unknown>,
+  \"capabilities\": [<list of capability strings>],
+  \"releaseDate\": \"<YYYY-MM-DD or null if unknown>\",
+  \"isHidden\": <true if deprecated, false otherwise>,
+  \"supportsStructuredOutput\": <true/false>,
+  \"supportsParallelToolCalls\": <true/false>,
+  \"supportsStreamingToolCalls\": <true/false>,
+  \"deprecationDate\": \"<YYYY-MM-DD or null if no known sunset date>\",
+  \"knowledgeCutoff\": \"<YYYY-MM-DD or null if unknown>\"
+}
+
+Rules:
+- provider must be one of: \"openai\", \"anthropic\", \"google\", \"meta\", \"mistral\", \"cohere\", \"ai21\", \"amazon\", \"xai\", \"deepseek\", \"qwen\", \"perplexity\" or the correct provider lowercase id
+- description should be concise and factual (what the model is good at, its position in the provider's lineup)
+- contextWindow is the maximum input context in tokens (e.g. 128000 for GPT-4o, 200000 for Claude Sonnet 4)
+- maxOutputTokens is the maximum output the model can generate in a single response
+- capabilities should be drawn from: \"vision\", \"tool_use\", \"streaming\", \"json_mode\", \"extended_thinking\", \"code_execution\", \"image_generation\", \"audio_input\", \"audio_output\", \"embedding\", \"fine_tunable\"
+- Only include capabilities you are confident the model supports
+- releaseDate is when the model was first publicly available (API launch date), in YYYY-MM-DD format. Use null if unknown. If the model is a dated variant (e.g. gpt-4o-2024-08-06), the date in the name IS the release date.
+- isHidden should be true if the model is deprecated, discontinued, no longer available via API, or superseded by a newer version. Examples: gpt-3.5-turbo, claude-1.x, claude-2.x, text-davinci-003, gpt-4-0314 are hidden. Current/active models like gpt-4o, claude-sonnet-4-6, gemini-2.5-flash are NOT hidden.
+- supportsStructuredOutput: true if the model reliably follows JSON schemas / structured output mode (e.g. OpenAI's response_format, Anthropic's tool_use for structured output). false for older models that don't support it well.
+- supportsParallelToolCalls: true if the model can call multiple tools in a single assistant turn. Most modern models support this.
+- supportsStreamingToolCalls: true if the model supports streaming partial tool call arguments as they're generated.
+- deprecationDate: the date the provider has announced the model will be sunset/removed from their API, in YYYY-MM-DD format. Use null if no deprecation date has been announced. Only use dates that have been officially published by the provider.
+- knowledgeCutoff: the date when the model's training data ends, in YYYY-MM-DD format. Use null if unknown. This is different from releaseDate — it's when the training data was cut off, not when the model launched.
+- Output ONLY the JSON object, nothing else"
+
+for attempt in $(seq 1 "$MAX_RETRIES"); do
+  RESULT=$(claude -p "$PROMPT" --model opus --output-format json --permission-mode bypassPermissions --tools WebSearch,WebFetch 2>/dev/null) && {
+    echo "$RESULT"
+    exit 0
+  }
+  if [[ "$attempt" -lt "$MAX_RETRIES" ]]; then
+    echo "  Retry $attempt/$MAX_RETRIES for $MODEL_NAME..." >&2
+    sleep 2
+  fi
+done
+
+echo "  Failed after $MAX_RETRIES attempts for $MODEL_NAME" >&2
+exit 1
diff --git a/internal-packages/llm-pricing/scripts/sync-model-prices.sh b/internal-packages/llm-model-catalog/scripts/sync-model-prices.sh
similarity index 53%
rename from internal-packages/llm-pricing/scripts/sync-model-prices.sh
rename to internal-packages/llm-model-catalog/scripts/sync-model-prices.sh
index d72aa6714c6..74ad04f6bb3 100755
--- a/internal-packages/llm-pricing/scripts/sync-model-prices.sh
+++ b/internal-packages/llm-model-catalog/scripts/sync-model-prices.sh
@@ -8,7 +8,6 @@ set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 PACKAGE_DIR="$(dirname "$SCRIPT_DIR")"
 JSON_TARGET="$PACKAGE_DIR/src/default-model-prices.json"
-TS_TARGET="$PACKAGE_DIR/src/defaultPrices.ts"
 SOURCE_URL="https://raw.githubusercontent.com/langfuse/langfuse/main/worker/src/constants/default-model-prices.json"
 
 CHECK_MODE=false
@@ -37,41 +36,11 @@ if $CHECK_MODE; then
     echo "Model prices are up to date ($MODEL_COUNT models)"
     exit 0
   else
-    echo "Model prices are OUTDATED. Run 'pnpm run sync-prices' in @internal/llm-pricing to update."
+    echo "Model prices are OUTDATED. Run 'pnpm run sync-prices' in @internal/llm-model-catalog to update."
     exit 1
   fi
 fi
 
 cp "$TMPFILE" "$JSON_TARGET"
 echo "Updated default-model-prices.json ($MODEL_COUNT models)"
-
-# Generate the TypeScript module from the JSON
-echo "Generating defaultPrices.ts..."
-node -e "
-const data = JSON.parse(require('fs').readFileSync('$JSON_TARGET', 'utf-8'));
-const stripped = data.map(e => ({
-  modelName: e.modelName.trim(),
-  matchPattern: e.matchPattern,
-  startDate: e.createdAt,
-  pricingTiers: e.pricingTiers.map(t => ({
-    name: t.name,
-    isDefault: t.isDefault,
-    priority: t.priority,
-    conditions: t.conditions.map(c => ({
-      usageDetailPattern: c.usageDetailPattern,
-      operator: c.operator,
-      value: c.value,
-    })),
-    prices: t.prices,
-  })),
-}));
-
-let out = 'import type { DefaultModelDefinition } from \"./types.js\";\n\n';
-out += '// Auto-generated from Langfuse default-model-prices.json — do not edit manually.\n';
-out += '// Run \`pnpm run sync-prices\` to update from upstream.\n';
-out += '// Source: https://github.com/langfuse/langfuse\n\n';
-out += 'export const defaultModelPrices: DefaultModelDefinition[] = ';
-out += JSON.stringify(stripped, null, 2) + ';\n';
-require('fs').writeFileSync('$TS_TARGET', out);
-console.log('Generated defaultPrices.ts with ' + stripped.length + ' models');
-"
+echo "Run 'pnpm run generate' to regenerate defaultPrices.ts"
diff --git a/internal-packages/llm-pricing/src/default-model-prices.json b/internal-packages/llm-model-catalog/src/default-model-prices.json
similarity index 100%
rename from internal-packages/llm-pricing/src/default-model-prices.json
rename to internal-packages/llm-model-catalog/src/default-model-prices.json
diff --git a/internal-packages/llm-pricing/src/defaultPrices.ts b/internal-packages/llm-model-catalog/src/defaultPrices.ts
similarity index 99%
rename from internal-packages/llm-pricing/src/defaultPrices.ts
rename to internal-packages/llm-model-catalog/src/defaultPrices.ts
index 2bcc371da10..fb347c2bef6 100644
--- a/internal-packages/llm-pricing/src/defaultPrices.ts
+++ b/internal-packages/llm-model-catalog/src/defaultPrices.ts
@@ -1,7 +1,7 @@
 import type { DefaultModelDefinition } from "./types.js";
 
-// Auto-generated from Langfuse default-model-prices.json — do not edit manually.
-// Run `pnpm run sync-prices` to update from upstream.
+// Auto-generated from default-model-prices.json — do not edit manually.
+// Run `pnpm run sync-prices` to update the JSON, then `pnpm run generate` to regenerate.
 // Source: https://github.com/langfuse/langfuse
 
 export const defaultModelPrices: DefaultModelDefinition[] = [
diff --git a/internal-packages/llm-pricing/src/index.ts b/internal-packages/llm-model-catalog/src/index.ts
similarity index 73%
rename from internal-packages/llm-pricing/src/index.ts
rename to internal-packages/llm-model-catalog/src/index.ts
index 3632434c137..8533e66ddd7 100644
--- a/internal-packages/llm-pricing/src/index.ts
+++ b/internal-packages/llm-model-catalog/src/index.ts
@@ -1,6 +1,8 @@
 export { ModelPricingRegistry } from "./registry.js";
 export { seedLlmPricing } from "./seed.js";
+export { syncLlmCatalog } from "./sync.js";
 export { defaultModelPrices } from "./defaultPrices.js";
+export { modelCatalog } from "./modelCatalog.js";
 export type {
   LlmModelWithPricing,
   LlmCostResult,
@@ -8,4 +10,5 @@ export type {
   LlmPriceEntry,
   PricingCondition,
   DefaultModelDefinition,
+  ModelCatalogEntry,
 } from "./types.js";
diff --git a/internal-packages/llm-model-catalog/src/model-catalog.json b/internal-packages/llm-model-catalog/src/model-catalog.json
new file mode 100644
index 00000000000..0f602683528
--- /dev/null
+++ b/internal-packages/llm-model-catalog/src/model-catalog.json
@@ -0,0 +1,2446 @@
+{
+  "chatgpt-4o-latest": {
+    "provider": "openai",
+    "description": "OpenAI's flagship multimodal model optimized for speed and cost, capable of processing text, images, and audio with strong performance across reasoning, coding, and creative tasks.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "audio_input",
+      "audio_output",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-05-13",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T10:55:46.469Z"
+  },
+  "claude-1.1": {
+    "provider": "anthropic",
+    "description": "An early-generation Claude model from Anthropic, offering basic conversational and text completion capabilities. It was quickly superseded by Claude 1.2, 1.3, and the Claude 2 family.",
+    "contextWindow": 9000,
+    "maxOutputTokens": 8191,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2023-03-14",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": null,
+    "resolvedAt": "2026-03-24T10:55:47.906Z"
+  },
+  "claude-1.2": {
+    "provider": "anthropic",
+    "description": "An early-generation Anthropic model, part of the original Claude 1.x family. It offered improved performance over Claude 1.0 but was quickly superseded by Claude 1.3 and later model families.",
+    "contextWindow": 9000,
+    "maxOutputTokens": 8191,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": null,
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": null,
+    "resolvedAt": "2026-03-24T10:55:46.760Z"
+  },
+  "claude-1.3": {
+    "provider": "anthropic",
+    "description": "Early-generation Claude model from Anthropic, offering improved performance over Claude 1.0-1.2 in reasoning and instruction-following tasks.",
+    "contextWindow": 100000,
+    "maxOutputTokens": null,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2023-03-14",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": null,
+    "resolvedAt": "2026-03-24T10:55:46.227Z"
+  },
+  "claude-2.0": {
+    "provider": "anthropic",
+    "description": "Anthropic's second-generation large language model, offering improved performance over Claude 1.x with longer context support. Succeeded by Claude 2.1 and later the Claude 3 family.",
+    "contextWindow": 100000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2023-07-11",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-02-01",
+    "resolvedAt": "2026-03-24T10:55:45.922Z"
+  },
+  "claude-2.1": {
+    "provider": "anthropic",
+    "description": "Anthropic's Claude 2.1 model featuring a 200K context window, reduced hallucination rates compared to Claude 2.0, and improved accuracy on long document comprehension.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "streaming",
+      "tool_use"
+    ],
+    "releaseDate": "2023-11-21",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-01-01",
+    "resolvedAt": "2026-03-24T10:56:22.743Z"
+  },
+  "claude-3-5-haiku-20241022": {
+    "provider": "anthropic",
+    "description": "Anthropic's fastest and most cost-effective model in the Claude 3.5 family, optimized for speed and efficiency while maintaining strong performance across common tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-10-22",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-07-01",
+    "resolvedAt": "2026-03-24T10:56:25.724Z"
+  },
+  "claude-3-5-sonnet-20240620": {
+    "provider": "anthropic",
+    "description": "Anthropic's Claude 3.5 Sonnet is a mid-tier model balancing intelligence and speed, excelling at coding, analysis, and vision tasks while being faster and cheaper than Opus.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-06-20",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-04-01",
+    "resolvedAt": "2026-03-24T10:56:35.401Z"
+  },
+  "claude-3-haiku-20240307": {
+    "provider": "anthropic",
+    "description": "Anthropic's fastest and most compact Claude 3 model, optimized for speed and cost-efficiency while maintaining strong performance on everyday tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-03-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-08-01",
+    "resolvedAt": "2026-03-24T10:56:25.288Z"
+  },
+  "claude-3-opus-20240229": {
+    "provider": "anthropic",
+    "description": "Anthropic's most capable model in the Claude 3 family, excelling at complex analysis, nuanced content generation, and advanced reasoning tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-03-04",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-08-01",
+    "resolvedAt": "2026-03-24T10:56:26.008Z"
+  },
+  "claude-3-sonnet-20240229": {
+    "provider": "anthropic",
+    "description": "Mid-tier model in Anthropic's Claude 3 family, balancing performance and speed for a wide range of tasks including analysis, coding, and content generation.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-03-04",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-02-01",
+    "resolvedAt": "2026-03-24T10:56:59.532Z"
+  },
+  "claude-3.5-haiku-latest": {
+    "provider": "anthropic",
+    "description": "Anthropic's fastest and most cost-effective model in the Claude 3.5 family, optimized for speed and efficiency while maintaining strong performance across a wide range of tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-10-29",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-07-01",
+    "resolvedAt": "2026-03-24T10:57:04.392Z"
+  },
+  "claude-3.5-sonnet-20241022": {
+    "provider": "anthropic",
+    "description": "Anthropic's mid-tier model offering strong reasoning, coding, and analysis capabilities at a balance of speed and intelligence, positioned between Haiku and Opus in the Claude 3.5 family.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-06-20",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-04-01",
+    "resolvedAt": "2026-03-24T10:57:13.346Z"
+  },
+  "claude-3.5-sonnet-latest": {
+    "provider": "anthropic",
+    "description": "Anthropic's mid-tier model offering strong reasoning, coding, and analysis capabilities at a balance of speed and intelligence, positioned between Haiku and Opus in the Claude 3.5 family.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-06-20",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-04-01",
+    "resolvedAt": "2026-03-24T10:57:13.346Z"
+  },
+  "claude-3.7-sonnet-20250219": {
+    "provider": "anthropic",
+    "description": "Anthropic's Claude 3.7 Sonnet is a hybrid reasoning model that introduced extended thinking capabilities, offering strong performance on coding, math, and complex reasoning tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-02-24",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-04-01",
+    "resolvedAt": "2026-03-24T10:57:12.967Z"
+  },
+  "claude-3.7-sonnet-latest": {
+    "provider": "anthropic",
+    "description": "Anthropic's Claude 3.7 Sonnet is a hybrid reasoning model that introduced extended thinking capabilities, offering strong performance on coding, math, and complex reasoning tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-02-24",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-04-01",
+    "resolvedAt": "2026-03-24T10:57:12.967Z"
+  },
+  "claude-haiku-4-5-20251001": {
+    "provider": "anthropic",
+    "description": "Anthropic's fastest model with near-frontier intelligence, optimized for speed and cost efficiency while supporting extended thinking and vision.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 64000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-10-01",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-07-01",
+    "resolvedAt": "2026-03-24T10:57:29.685Z"
+  },
+  "claude-instant-1": {
+    "provider": "anthropic",
+    "description": "Anthropic's fast and cost-effective model optimized for speed and efficiency, positioned as a lighter alternative to Claude 1.x for tasks requiring lower latency.",
+    "contextWindow": 100000,
+    "maxOutputTokens": 8191,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2023-03-14",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-01-06",
+    "knowledgeCutoff": "2023-01-01",
+    "resolvedAt": "2026-03-24T10:57:36.888Z"
+  },
+  "claude-instant-1.2": {
+    "provider": "anthropic",
+    "description": "Anthropic's fast and cost-effective model, optimized for speed and efficiency while maintaining strong performance on conversational and text generation tasks.",
+    "contextWindow": 100000,
+    "maxOutputTokens": 8191,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2023-08-09",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-01-01",
+    "resolvedAt": "2026-03-24T10:57:41.865Z"
+  },
+  "claude-opus-4-1-20250805": {
+    "provider": "anthropic",
+    "description": "Anthropic's hybrid reasoning model with strong software engineering and agentic capabilities, scoring 74.5% on SWE-bench Verified. Supports both rapid responses and step-by-step extended thinking.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 32000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-08-05",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-03-01",
+    "resolvedAt": "2026-03-24T10:58:36.876Z"
+  },
+  "claude-opus-4-20250514": {
+    "provider": "anthropic",
+    "description": "Anthropic's flagship model from the Claude 4 family, excelling at complex coding tasks, long-running agent workflows, and deep reasoning with extended thinking support.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 32000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-05-14",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-03-01",
+    "resolvedAt": "2026-03-24T10:58:47.518Z"
+  },
+  "claude-opus-4-5-20251101": {
+    "provider": "anthropic",
+    "description": "Anthropic's flagship intelligence model released in November 2025, excelling at complex reasoning, vision, and extended thinking with the best performance in Anthropic's lineup before Opus 4.6.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 64000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-11-01",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-03-01",
+    "resolvedAt": "2026-03-24T10:58:48.961Z"
+  },
+  "claude-opus-4-6": {
+    "provider": "anthropic",
+    "description": "Anthropic's most intelligent model, optimized for building agents and coding with exceptional reasoning capabilities and extended agentic task horizons.",
+    "contextWindow": 1000000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2026-02-05",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-05-01",
+    "resolvedAt": "2026-03-24T10:58:42.061Z"
+  },
+  "claude-sonnet-4-20250514": {
+    "provider": "anthropic",
+    "description": "Anthropic's balanced Claude 4 model offering strong coding, reasoning, and multilingual performance at moderate cost. Now a legacy model superseded by Claude Sonnet 4.5 and 4.6.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 64000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-05-14",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-03-01",
+    "resolvedAt": "2026-03-24T10:58:39.601Z"
+  },
+  "claude-sonnet-4-5-20250929": {
+    "provider": "anthropic",
+    "description": "Anthropic's high-performance mid-tier model with strong coding, reasoning, and multi-step problem solving capabilities. Successor to Claude Sonnet 4, offering improved benchmarks at the same price point.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 64000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-09-29",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T10:59:54.426Z"
+  },
+  "claude-sonnet-4-6": {
+    "provider": "anthropic",
+    "description": "Anthropic's best combination of speed and intelligence, excelling at coding, agentic tasks, and computer use, with a 1M token context window and performance rivaling prior Opus-class models.",
+    "contextWindow": 1000000,
+    "maxOutputTokens": 64000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2026-02-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2026-01-01",
+    "resolvedAt": "2026-03-24T10:59:59.014Z"
+  },
+  "claude-sonnet-4-latest": {
+    "provider": "anthropic",
+    "description": "Anthropic's balanced Claude 4 model offering strong coding, reasoning, and multilingual performance at moderate cost. Now a legacy model superseded by Claude Sonnet 4.5 and 4.6.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 64000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-05-14",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-03-01",
+    "resolvedAt": "2026-03-24T10:58:39.601Z"
+  },
+  "gemini-1.0-pro": {
+    "provider": "google",
+    "description": "Google's first-generation Gemini Pro model, a mid-size multimodal model designed for text generation, reasoning, and chat applications. Succeeded by Gemini 1.5 Pro.",
+    "contextWindow": 32760,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-12-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-02-15",
+    "knowledgeCutoff": "2023-04-01",
+    "resolvedAt": "2026-03-24T10:59:26.767Z"
+  },
+  "gemini-1.0-pro-001": {
+    "provider": "google",
+    "description": "Google's first-generation Pro model optimized for text generation, reasoning, and multi-turn conversation tasks, part of the original Gemini 1.0 lineup.",
+    "contextWindow": 30720,
+    "maxOutputTokens": 2048,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-02-15",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-02-15",
+    "knowledgeCutoff": "2023-04-01",
+    "resolvedAt": "2026-03-24T10:59:27.391Z"
+  },
+  "gemini-1.0-pro-latest": {
+    "provider": "google",
+    "description": "Google's first-generation Gemini Pro model, a mid-size multimodal model designed for text generation, reasoning, and chat applications. Succeeded by Gemini 1.5 Pro.",
+    "contextWindow": 32760,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-12-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-02-15",
+    "knowledgeCutoff": "2023-04-01",
+    "resolvedAt": "2026-03-24T10:59:26.767Z"
+  },
+  "gemini-1.5-pro-latest": {
+    "provider": "google",
+    "description": "Google's mid-size multimodal model with a massive context window, strong at long-document understanding, code generation, and multi-turn conversation.",
+    "contextWindow": 2097152,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "audio_input"
+    ],
+    "releaseDate": "2024-02-15",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-09-24",
+    "knowledgeCutoff": "2024-04-01",
+    "resolvedAt": "2026-03-24T10:59:25.463Z"
+  },
+  "gemini-2.0-flash": {
+    "provider": "google",
+    "description": "Google's second-generation workhorse model optimized for speed, with native tool use, multimodal input (text, images, audio, video), and a 1M token context window.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2025-02-05",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-06-01",
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:01:15.429Z"
+  },
+  "gemini-2.0-flash-001": {
+    "provider": "google",
+    "description": "Google's fast and efficient multimodal model that outperforms Gemini 1.5 Pro on key benchmarks at twice the speed, supporting text, image, audio, and video inputs with native tool use.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "audio_input",
+      "image_generation",
+      "audio_output",
+      "code_execution"
+    ],
+    "releaseDate": "2025-02-05",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-06-01",
+    "knowledgeCutoff": "2024-08-01",
+    "resolvedAt": "2026-03-24T11:01:04.084Z"
+  },
+  "gemini-2.0-flash-lite-preview": {
+    "provider": "google",
+    "description": "A lightweight, cost-efficient variant of Gemini 2.0 Flash optimized for low latency and high throughput, supporting multimodal input with text output.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-02-05",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-06-01",
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:00:56.775Z"
+  },
+  "gemini-2.0-flash-lite-preview-02-05": {
+    "provider": "google",
+    "description": "Google's cost-optimized, low-latency model in the Gemini 2.0 family, designed for high-volume tasks like summarization, multimodal processing, and categorization.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-02-05",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-12-09",
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:01:34.165Z"
+  },
+  "gemini-2.5-flash": {
+    "provider": "google",
+    "description": "Google's best price-performance model optimized for low-latency, high-volume tasks requiring reasoning, with built-in thinking capabilities and multimodal input support.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 65536,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2025-06-01",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:01:25.200Z"
+  },
+  "gemini-2.5-flash-lite": {
+    "provider": "google",
+    "description": "Google's most cost-efficient Gemini model, optimized for low-latency use cases with strong reasoning, multilingual, and long-context capabilities at minimal cost.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 65535,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2025-07-22",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-07-22",
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:02:30.060Z"
+  },
+  "gemini-2.5-pro": {
+    "provider": "google",
+    "description": "Google's most advanced reasoning model with deep thinking capabilities, excelling at complex tasks like coding, math, and multimodal understanding across text, images, audio, and video.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 65535,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2025-03-25",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-06-17",
+    "knowledgeCutoff": "2025-01-31",
+    "resolvedAt": "2026-03-24T11:02:25.573Z"
+  },
+  "gemini-3-flash-preview": {
+    "provider": "google",
+    "description": "Google's high-speed thinking model that matches Gemini 2.5 Pro performance at ~3x faster speed and lower cost, designed for agentic workflows, multi-turn chat, and coding assistance with configurable reasoning levels.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 65536,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2025-12-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:02:13.388Z"
+  },
+  "gemini-3-pro-preview": {
+    "provider": "google",
+    "description": "Google's flagship reasoning and multimodal model with strong coding and agentic capabilities, now deprecated in favor of Gemini 3.1 Pro.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 65536,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2025-11-01",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-03-09",
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:02:29.313Z"
+  },
+  "gemini-3.1-flash-lite-preview": {
+    "provider": "google",
+    "description": "Google's most cost-efficient multimodal model in the Gemini 3 series, optimized for high-volume, low-latency tasks like translation, classification, and simple data extraction. Offers 2.5x faster time-to-first-token than Gemini 2.5 Flash.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 65536,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2026-03-03",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:02:29.253Z"
+  },
+  "gemini-3.1-pro-preview": {
+    "provider": "google",
+    "description": "Google's most advanced reasoning model in the Gemini 3.1 family, excelling at complex problem-solving across text, audio, images, video, and code with a 1M token context window and extended thinking capabilities.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 65536,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2026-02-19",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:03:33.071Z"
+  },
+  "gemini-pro": {
+    "provider": "google",
+    "description": "Google's first-generation Gemini model for text generation, reasoning, and multi-turn conversation. Superseded by Gemini 1.5 Pro and later models.",
+    "contextWindow": 32768,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-12-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-04-09",
+    "knowledgeCutoff": "2023-04-01",
+    "resolvedAt": "2026-03-24T11:03:45.401Z"
+  },
+  "gpt-3.5-turbo": {
+    "provider": "openai",
+    "description": "OpenAI's fast and cost-effective model optimized for chat and instruction-following tasks, now superseded by GPT-4o mini.",
+    "contextWindow": 16385,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2023-03-01",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-09-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:03:11.412Z"
+  },
+  "gpt-3.5-turbo-0125": {
+    "provider": "openai",
+    "description": "A fast and cost-effective GPT-3.5 Turbo snapshot optimized for chat completions, offering improved accuracy for function calling and reduced instances of incomplete responses.",
+    "contextWindow": 16385,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-01-25",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-09-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:03:11.310Z"
+  },
+  "gpt-3.5-turbo-0301": {
+    "provider": "openai",
+    "description": "Early snapshot of GPT-3.5 Turbo, OpenAI's first ChatGPT-optimized model for chat completions. Fast and cost-effective for simple tasks but superseded by later revisions.",
+    "contextWindow": 4096,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "streaming",
+      "fine_tunable"
+    ],
+    "releaseDate": "2023-03-01",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2024-06-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:03:12.060Z"
+  },
+  "gpt-3.5-turbo-0613": {
+    "provider": "openai",
+    "description": "A snapshot of GPT-3.5 Turbo from June 2023, optimized for chat and instruction-following tasks with function calling support.",
+    "contextWindow": 4096,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2023-06-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2024-09-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:04:04.463Z"
+  },
+  "gpt-3.5-turbo-1106": {
+    "provider": "openai",
+    "description": "A dated snapshot of GPT-3.5 Turbo released in November 2023, offering improved instruction following, JSON mode, and parallel function calling over previous GPT-3.5 variants.",
+    "contextWindow": 16385,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2023-11-06",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-09-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:04:23.054Z"
+  },
+  "gpt-3.5-turbo-16k": {
+    "provider": "openai",
+    "description": "Extended context version of GPT-3.5 Turbo with 16K token context window, offering the same capabilities as the base model but able to process longer inputs.",
+    "contextWindow": 16384,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "streaming",
+      "json_mode",
+      "fine_tunable",
+      "tool_use"
+    ],
+    "releaseDate": "2023-06-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-09-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:04:36.307Z"
+  },
+  "gpt-3.5-turbo-16k-0613": {
+    "provider": "openai",
+    "description": "Extended context window variant of GPT-3.5 Turbo with 16K token context, snapshot from June 2023. Optimized for chat completions with longer document processing.",
+    "contextWindow": 16384,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2023-06-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2024-09-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:04:22.894Z"
+  },
+  "gpt-3.5-turbo-instruct": {
+    "provider": "openai",
+    "description": "OpenAI's GPT-3.5 Turbo Instruct is a completions-only model (not chat) optimized for following explicit instructions, replacing the legacy text-davinci-003 model.",
+    "contextWindow": 4096,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "fine_tunable"
+    ],
+    "releaseDate": "2023-09-19",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-01-27",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:04:22.309Z"
+  },
+  "gpt-4": {
+    "provider": "openai",
+    "description": "OpenAI's flagship large language model that preceded GPT-4o, known for strong reasoning and instruction-following capabilities across a wide range of tasks.",
+    "contextWindow": 8192,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-03-14",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-06-06",
+    "knowledgeCutoff": "2023-12-01",
+    "resolvedAt": "2026-03-24T11:04:36.773Z"
+  },
+  "gpt-4-0125-preview": {
+    "provider": "openai",
+    "description": "An improved GPT-4 Turbo preview model with better task completion, reduced laziness in code generation, and enhanced instruction following.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-01-25",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-12-01",
+    "resolvedAt": "2026-03-24T11:04:54.196Z"
+  },
+  "gpt-4-0314": {
+    "provider": "openai",
+    "description": "Original GPT-4 snapshot from March 2023, a large multimodal model (text-only at launch) that was one of OpenAI's first GPT-4 releases. Now deprecated and replaced by newer GPT-4 variants.",
+    "contextWindow": 8192,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2023-03-14",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2024-06-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:05:14.112Z"
+  },
+  "gpt-4-0613": {
+    "provider": "openai",
+    "description": "A snapshot of GPT-4 from June 2023, offering strong reasoning and instruction-following capabilities. It was one of the first widely available GPT-4 variants with function calling support.",
+    "contextWindow": 8192,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2023-06-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-06-06",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:05:13.885Z"
+  },
+  "gpt-4-1106-preview": {
+    "provider": "openai",
+    "description": "GPT-4 Turbo preview model with 128K context window, offering improved instruction following and JSON mode support at reduced cost compared to GPT-4.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-11-06",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-04-01",
+    "resolvedAt": "2026-03-24T11:05:12.960Z"
+  },
+  "gpt-4-32k": {
+    "provider": "openai",
+    "description": "Extended context window variant of GPT-4 with 32,768 token capacity, offering the same capabilities as GPT-4 but able to process longer documents and conversations.",
+    "contextWindow": 32768,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-03-14",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-06-06",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:05:14.584Z"
+  },
+  "gpt-4-32k-0314": {
+    "provider": "openai",
+    "description": "Extended context (32k token) variant of the original GPT-4 launch snapshot from March 2024, offering the same capabilities as gpt-4-0314 but with 4x the context window.",
+    "contextWindow": 32768,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2023-03-14",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2024-06-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:05:32.044Z"
+  },
+  "gpt-4-32k-0613": {
+    "provider": "openai",
+    "description": "Extended context window variant of GPT-4 with 32,768 token context, based on the June 2023 snapshot. Offers the same capabilities as GPT-4 but with 4x the context length.",
+    "contextWindow": 32768,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-06-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-06-06",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:05:53.070Z"
+  },
+  "gpt-4-preview": {
+    "provider": "openai",
+    "description": "GPT-4 Turbo preview model with 128K context window, JSON mode, and parallel function calling. A preview release in the GPT-4 Turbo series, now deprecated in favor of newer models.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-11-06",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-06-06",
+    "knowledgeCutoff": "2023-04-01",
+    "resolvedAt": "2026-03-24T11:06:54.248Z"
+  },
+  "gpt-4-turbo": {
+    "provider": "openai",
+    "description": "OpenAI's optimized GPT-4 variant offering faster inference and lower cost than the original GPT-4, with vision capabilities and a 128K context window.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-04-09",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-12-01",
+    "resolvedAt": "2026-03-24T11:05:51.415Z"
+  },
+  "gpt-4-turbo-2024-04-09": {
+    "provider": "openai",
+    "description": "OpenAI's optimized GPT-4 variant offering faster inference and lower cost than the original GPT-4, with vision capabilities and a 128K context window.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-04-09",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-12-01",
+    "resolvedAt": "2026-03-24T11:05:51.415Z"
+  },
+  "gpt-4-turbo-preview": {
+    "provider": "openai",
+    "description": "An early preview of GPT-4 Turbo with a 128K context window, offering improved instruction following and JSON mode support at reduced cost compared to GPT-4.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-01-25",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-12-01",
+    "resolvedAt": "2026-03-24T11:05:52.346Z"
+  },
+  "gpt-4-turbo-vision": {
+    "provider": "openai",
+    "description": "OpenAI's GPT-4 Turbo model with vision capabilities, able to analyze and understand images alongside text. It was a preview model later superseded by GPT-4 Turbo (gpt-4-turbo-2024-04-09) and then GPT-4o.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-11-06",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2024-12-06",
+    "knowledgeCutoff": "2023-04-01",
+    "resolvedAt": "2026-03-24T11:06:38.455Z"
+  },
+  "gpt-4.1": {
+    "provider": "openai",
+    "description": "OpenAI's flagship model optimized for coding, instruction following, and tool calling with a 1M token context window. Excels at structured outputs and long-context tasks.",
+    "contextWindow": 1047576,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-04-14",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:07:00.439Z"
+  },
+  "gpt-4.1-2025-04-14": {
+    "provider": "openai",
+    "description": "OpenAI's flagship model optimized for coding, instruction following, and tool calling with a 1M token context window. Excels at structured outputs and long-context tasks.",
+    "contextWindow": 1047576,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-04-14",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:07:00.439Z"
+  },
+  "gpt-4.1-mini": {
+    "provider": "openai",
+    "description": "A compact, cost-efficient model in OpenAI's GPT-4.1 family that matches or exceeds GPT-4o on many benchmarks while offering nearly half the latency and significantly lower cost.",
+    "contextWindow": 1000000,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2025-04-14",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:08:14.524Z"
+  },
+  "gpt-4.1-mini-2025-04-14": {
+    "provider": "openai",
+    "description": "A compact, cost-efficient model in OpenAI's GPT-4.1 family that matches or exceeds GPT-4o on many benchmarks while offering nearly half the latency and significantly lower cost.",
+    "contextWindow": 1000000,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2025-04-14",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:08:14.524Z"
+  },
+  "gpt-4.1-nano": {
+    "provider": "openai",
+    "description": "OpenAI's fastest and most cost-effective model in the GPT-4.1 family, optimized for low-latency tasks like classification, autocompletion, and lightweight agentic workflows with strong instruction-following and tool-calling capabilities.",
+    "contextWindow": 1047576,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-04-14",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:08:04.533Z"
+  },
+  "gpt-4.1-nano-2025-04-14": {
+    "provider": "openai",
+    "description": "OpenAI's fastest and most cost-effective model in the GPT-4.1 family, optimized for low-latency tasks like classification, autocompletion, and lightweight agentic workflows with strong instruction-following and tool-calling capabilities.",
+    "contextWindow": 1047576,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-04-14",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:08:04.533Z"
+  },
+  "gpt-4.5-preview": {
+    "provider": "openai",
+    "description": "OpenAI's largest pretrained model before the GPT-5 series, emphasizing broad knowledge, creative writing, and improved emotional intelligence over reasoning-focused models.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-02-27",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-07-14",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:07:57.880Z"
+  },
+  "gpt-4.5-preview-2025-02-27": {
+    "provider": "openai",
+    "description": "OpenAI's largest pretrained model before the GPT-5 series, emphasizing broad knowledge, creative writing, and improved emotional intelligence over reasoning-focused models.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-02-27",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-07-14",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:07:57.880Z"
+  },
+  "gpt-4o": {
+    "provider": "openai",
+    "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "audio_input",
+      "audio_output",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-05-13",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:07:31.638Z"
+  },
+  "gpt-4o-2024-05-13": {
+    "provider": "openai",
+    "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "audio_input",
+      "audio_output",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-05-13",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:07:31.638Z"
+  },
+  "gpt-4o-2024-08-06": {
+    "provider": "openai",
+    "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "audio_input",
+      "audio_output",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-05-13",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:07:31.638Z"
+  },
+  "gpt-4o-2024-11-20": {
+    "provider": "openai",
+    "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "audio_input",
+      "audio_output",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-05-13",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:07:31.638Z"
+  },
+  "gpt-4o-audio-preview": {
+    "provider": "openai",
+    "description": "GPT-4o variant with native audio input and output capabilities via the Chat Completions API, supporting both text and audio modalities for conversational and voice-based applications.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "audio_input",
+      "audio_output",
+      "tool_use",
+      "streaming"
+    ],
+    "releaseDate": "2024-10-01",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-05-07",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:08:09.590Z"
+  },
+  "gpt-4o-audio-preview-2024-10-01": {
+    "provider": "openai",
+    "description": "GPT-4o variant with native audio input and output capabilities via the Chat Completions API, supporting both text and audio modalities for conversational and voice-based applications.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "audio_input",
+      "audio_output",
+      "tool_use",
+      "streaming"
+    ],
+    "releaseDate": "2024-10-01",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-05-07",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:08:09.590Z"
+  },
+  "gpt-4o-mini": {
+    "provider": "openai",
+    "description": "Fast, affordable small model optimized for focused tasks. Positioned as OpenAI's cost-efficient option with strong performance on benchmarks relative to its size.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-07-18",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:09:50.130Z"
+  },
+  "gpt-4o-mini-2024-07-18": {
+    "provider": "openai",
+    "description": "Fast, affordable small model optimized for focused tasks. Positioned as OpenAI's cost-efficient option with strong performance on benchmarks relative to its size.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-07-18",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:09:50.130Z"
+  },
+  "gpt-4o-realtime-preview": {
+    "provider": "openai",
+    "description": "OpenAI's real-time multimodal model capable of processing and generating both text and audio over WebRTC or WebSocket, enabling low-latency voice conversations and audio interactions.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "audio_input",
+      "audio_output",
+      "tool_use",
+      "streaming"
+    ],
+    "releaseDate": "2024-10-01",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2026-05-07",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:09:35.495Z"
+  },
+  "gpt-4o-realtime-preview-2024-10-01": {
+    "provider": "openai",
+    "description": "OpenAI's real-time multimodal model capable of processing and generating both text and audio over WebRTC or WebSocket, enabling low-latency voice conversations and audio interactions.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "audio_input",
+      "audio_output",
+      "tool_use",
+      "streaming"
+    ],
+    "releaseDate": "2024-10-01",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2026-05-07",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:09:35.495Z"
+  },
+  "gpt-5": {
+    "provider": "openai",
+    "description": "OpenAI's flagship reasoning model released August 2025, featuring a 400K token context window with strong coding, reasoning, and agentic capabilities.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "image_generation",
+      "code_execution"
+    ],
+    "releaseDate": "2025-08-07",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-09-30",
+    "resolvedAt": "2026-03-24T11:09:28.216Z"
+  },
+  "gpt-5-2025-08-07": {
+    "provider": "openai",
+    "description": "OpenAI's flagship reasoning model released August 2025, featuring a 400K token context window with strong coding, reasoning, and agentic capabilities.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "image_generation",
+      "code_execution"
+    ],
+    "releaseDate": "2025-08-07",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-09-30",
+    "resolvedAt": "2026-03-24T11:09:28.216Z"
+  },
+  "gpt-5-chat-latest": {
+    "provider": "openai",
+    "description": "Non-reasoning GPT-5 model used in ChatGPT, optimized for conversational tasks. Supports text and image inputs with function calling and structured outputs.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-08-07",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-09-30",
+    "resolvedAt": "2026-03-24T11:09:24.834Z"
+  },
+  "gpt-5-mini": {
+    "provider": "openai",
+    "description": "A faster, more cost-efficient version of GPT-5 designed for well-defined tasks and precise prompts. Supports reasoning with configurable effort levels and offers reduced latency compared to the full GPT-5 model.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-08-07",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-05-31",
+    "resolvedAt": "2026-03-24T11:09:42.822Z"
+  },
+  "gpt-5-mini-2025-08-07": {
+    "provider": "openai",
+    "description": "A faster, more cost-efficient version of GPT-5 designed for well-defined tasks and precise prompts. Supports reasoning with configurable effort levels and offers reduced latency compared to the full GPT-5 model.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-08-07",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-05-31",
+    "resolvedAt": "2026-03-24T11:09:42.822Z"
+  },
+  "gpt-5-nano": {
+    "provider": "openai",
+    "description": "The smallest and fastest variant in the GPT-5 family, optimized for developer tools, rapid interactions, and ultra-low latency environments. Best suited for classification, data extraction, ranking, and sub-agent tasks.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-08-07",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-05-31",
+    "resolvedAt": "2026-03-24T11:11:24.884Z"
+  },
+  "gpt-5-nano-2025-08-07": {
+    "provider": "openai",
+    "description": "The smallest and fastest variant in the GPT-5 family, optimized for developer tools, rapid interactions, and ultra-low latency environments. Best suited for classification, data extraction, ranking, and sub-agent tasks.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-08-07",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-05-31",
+    "resolvedAt": "2026-03-24T11:11:24.884Z"
+  },
+  "gpt-5-pro": {
+    "provider": "openai",
+    "description": "OpenAI's enhanced GPT-5 variant optimized for complex tasks requiring step-by-step reasoning, with reduced hallucination and improved code quality compared to the base GPT-5.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-10-06",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-10-01",
+    "resolvedAt": "2026-03-24T11:11:37.048Z"
+  },
+  "gpt-5-pro-2025-10-06": {
+    "provider": "openai",
+    "description": "OpenAI's enhanced GPT-5 variant optimized for complex tasks requiring step-by-step reasoning, with reduced hallucination and improved code quality compared to the base GPT-5.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-10-06",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-10-01",
+    "resolvedAt": "2026-03-24T11:11:37.048Z"
+  },
+  "gpt-5.1": {
+    "provider": "openai",
+    "description": "GPT-5.1 is OpenAI's frontier-grade model in the GPT-5 series, offering adaptive reasoning with configurable effort levels, improved coding and math performance, and a more natural conversational style compared to GPT-5.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-11-13",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-09-30",
+    "resolvedAt": "2026-03-24T11:11:47.327Z"
+  },
+  "gpt-5.1-2025-11-13": {
+    "provider": "openai",
+    "description": "GPT-5.1 is OpenAI's frontier-grade model in the GPT-5 series, offering adaptive reasoning with configurable effort levels, improved coding and math performance, and a more natural conversational style compared to GPT-5.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-11-13",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-09-30",
+    "resolvedAt": "2026-03-24T11:11:47.327Z"
+  },
+  "gpt-5.2": {
+    "provider": "openai",
+    "description": "OpenAI's flagship multimodal model released December 2025, excelling at long-context reasoning, agentic tool use, software engineering, and professional knowledge work. Available in Instant, Thinking, and Pro variants.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-12-11",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:11:13.129Z"
+  },
+  "gpt-5.2-2025-12-11": {
+    "provider": "openai",
+    "description": "OpenAI's flagship multimodal model released December 2025, excelling at long-context reasoning, agentic tool use, software engineering, and professional knowledge work. Available in Instant, Thinking, and Pro variants.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-12-11",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:11:13.129Z"
+  },
+  "gpt-5.2-pro": {
+    "provider": "openai",
+    "description": "OpenAI's previous pro-tier reasoning model optimized for complex professional work requiring step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. Superseded by GPT-5.4 pro.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-12-11",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:11:12.711Z"
+  },
+  "gpt-5.2-pro-2025-12-11": {
+    "provider": "openai",
+    "description": "OpenAI's previous pro-tier reasoning model optimized for complex professional work requiring step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. Superseded by GPT-5.4 pro.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-12-11",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:11:12.711Z"
+  },
+  "gpt-5.4": {
+    "provider": "openai",
+    "description": "OpenAI's most capable frontier model as of March 2026, featuring state-of-the-art coding, native computer-use capabilities, and a 1M-token context window for professional and agentic workflows.",
+    "contextWindow": 1050000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "code_execution"
+    ],
+    "releaseDate": "2026-03-05",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:09.220Z"
+  },
+  "gpt-5.4-2026-03-05": {
+    "provider": "openai",
+    "description": "OpenAI's most capable frontier model as of March 2026, featuring state-of-the-art coding, native computer-use capabilities, and a 1M-token context window for professional and agentic workflows.",
+    "contextWindow": 1050000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "code_execution"
+    ],
+    "releaseDate": "2026-03-05",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:09.220Z"
+  },
+  "gpt-5.4-mini": {
+    "provider": "openai",
+    "description": "OpenAI's fast and efficient small model from the GPT-5.4 family, designed for high-volume workloads. Approaches GPT-5.4 performance on coding and reasoning while running over 2x faster than GPT-5 mini.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2026-03-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:35.473Z"
+  },
+  "gpt-5.4-mini-2026-03-17": {
+    "provider": "openai",
+    "description": "OpenAI's fast and efficient small model from the GPT-5.4 family, designed for high-volume workloads. Approaches GPT-5.4 performance on coding and reasoning while running over 2x faster than GPT-5 mini.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2026-03-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:35.473Z"
+  },
+  "gpt-5.4-nano": {
+    "provider": "openai",
+    "description": "OpenAI's cheapest GPT-5.4-class model optimized for simple high-volume tasks like classification, data extraction, ranking, and sub-agent delegation in agentic workflows.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2026-03-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:52.285Z"
+  },
+  "gpt-5.4-nano-2026-03-17": {
+    "provider": "openai",
+    "description": "OpenAI's cheapest GPT-5.4-class model optimized for simple high-volume tasks like classification, data extraction, ranking, and sub-agent delegation in agentic workflows.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2026-03-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:52.285Z"
+  },
+  "gpt-5.4-pro": {
+    "provider": "openai",
+    "description": "OpenAI's highest-capability GPT-5.4 variant, using additional compute for harder problems. Available via Responses API only, designed for complex reasoning, coding, and agentic workflows.",
+    "contextWindow": 1050000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "extended_thinking"
+    ],
+    "releaseDate": "2026-03-05",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:56.903Z"
+  },
+  "gpt-5.4-pro-2026-03-05": {
+    "provider": "openai",
+    "description": "OpenAI's highest-capability GPT-5.4 variant, using additional compute for harder problems. Available via Responses API only, designed for complex reasoning, coding, and agentic workflows.",
+    "contextWindow": 1050000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "extended_thinking"
+    ],
+    "releaseDate": "2026-03-05",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:56.903Z"
+  },
+  "o1": {
+    "provider": "openai",
+    "description": "OpenAI's reasoning model designed for complex tasks requiring multi-step logical thinking, excelling at math, science, and coding problems.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-12-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:12:23.948Z"
+  },
+  "o1-2024-12-17": {
+    "provider": "openai",
+    "description": "OpenAI's reasoning model designed for complex tasks requiring multi-step logical thinking, excelling at math, science, and coding problems.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-12-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:12:23.948Z"
+  },
+  "o1-mini": {
+    "provider": "openai",
+    "description": "A smaller, faster, and cheaper reasoning model in OpenAI's o1 series, optimized for coding, math, and science tasks requiring multi-step reasoning.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 65536,
+    "capabilities": [
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-09-12",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-06-30",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:12:37.030Z"
+  },
+  "o1-mini-2024-09-12": {
+    "provider": "openai",
+    "description": "A smaller, faster, and cheaper reasoning model in OpenAI's o1 series, optimized for coding, math, and science tasks requiring multi-step reasoning.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 65536,
+    "capabilities": [
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-09-12",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-06-30",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:12:37.030Z"
+  },
+  "o1-preview": {
+    "provider": "openai",
+    "description": "OpenAI's first reasoning model using chain-of-thought to solve complex problems in science, coding, and math. Predecessor to o1 and o3 series.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2024-09-12",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-10-31",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:12:59.198Z"
+  },
+  "o1-preview-2024-09-12": {
+    "provider": "openai",
+    "description": "OpenAI's first reasoning model using chain-of-thought to solve complex problems in science, coding, and math. Predecessor to o1 and o3 series.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2024-09-12",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-10-31",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:12:59.198Z"
+  },
+  "o1-pro": {
+    "provider": "openai",
+    "description": "A version of OpenAI's o1 reasoning model that uses significantly more compute to deliver better, more consistent answers on complex reasoning tasks in science, coding, and math.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-03-19",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:13:57.532Z"
+  },
+  "o1-pro-2025-03-19": {
+    "provider": "openai",
+    "description": "A version of OpenAI's o1 reasoning model that uses significantly more compute to deliver better, more consistent answers on complex reasoning tasks in science, coding, and math.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-03-19",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:13:57.532Z"
+  },
+  "o3": {
+    "provider": "openai",
+    "description": "OpenAI's advanced reasoning model designed for complex tasks requiring deep reasoning, excelling at software engineering, mathematics, scientific reasoning, and visual reasoning tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-04-16",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:14:04.906Z"
+  },
+  "o3-2025-04-16": {
+    "provider": "openai",
+    "description": "OpenAI's advanced reasoning model designed for complex tasks requiring deep reasoning, excelling at software engineering, mathematics, scientific reasoning, and visual reasoning tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-04-16",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:14:04.906Z"
+  },
+  "o3-mini": {
+    "provider": "openai",
+    "description": "OpenAI's compact reasoning model optimized for STEM tasks, offering strong performance in math, science, and coding at lower cost than o3.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-01-31",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:13:33.788Z"
+  },
+  "o3-mini-2025-01-31": {
+    "provider": "openai",
+    "description": "OpenAI's compact reasoning model optimized for STEM tasks, offering strong performance in math, science, and coding at lower cost than o3.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-01-31",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:13:33.788Z"
+  },
+  "o3-pro": {
+    "provider": "openai",
+    "description": "OpenAI's most reliable reasoning model, a version of o3 designed to think longer and provide more consistently accurate answers for challenging math, science, and coding problems.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-06-10",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:14:10.900Z"
+  },
+  "o3-pro-2025-06-10": {
+    "provider": "openai",
+    "description": "OpenAI's most reliable reasoning model, a version of o3 designed to think longer and provide more consistently accurate answers for challenging math, science, and coding problems.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-06-10",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:14:10.900Z"
+  },
+  "o4-mini": {
+    "provider": "openai",
+    "description": "OpenAI's small reasoning model optimized for fast, cost-efficient reasoning with strong performance in math, coding, and visual tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-04-16",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:14:16.050Z"
+  },
+  "o4-mini-2025-04-16": {
+    "provider": "openai",
+    "description": "OpenAI's small reasoning model optimized for fast, cost-efficient reasoning with strong performance in math, coding, and visual tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-04-16",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:14:16.050Z"
+  }
+}
diff --git a/internal-packages/llm-model-catalog/src/modelCatalog.ts b/internal-packages/llm-model-catalog/src/modelCatalog.ts
new file mode 100644
index 00000000000..71ae921c3e7
--- /dev/null
+++ b/internal-packages/llm-model-catalog/src/modelCatalog.ts
@@ -0,0 +1,2572 @@
+import type { ModelCatalogEntry } from "./types.js";
+
+// Auto-generated from model-catalog.json — do not edit manually.
+// Run `pnpm run generate-catalog` to update the JSON, then `pnpm run generate` to regenerate.
+
+export const modelCatalog: Record<string, ModelCatalogEntry> = {
+  "chatgpt-4o-latest": {
+    "provider": "openai",
+    "description": "OpenAI's flagship multimodal model optimized for speed and cost, capable of processing text, images, and audio with strong performance across reasoning, coding, and creative tasks.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "audio_input",
+      "audio_output",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-05-13",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T10:55:46.469Z",
+    "baseModelName": "chatgpt-4o"
+  },
+  "claude-1.1": {
+    "provider": "anthropic",
+    "description": "An early-generation Claude model from Anthropic, offering basic conversational and text completion capabilities. It was quickly superseded by Claude 1.2, 1.3, and the Claude 2 family.",
+    "contextWindow": 9000,
+    "maxOutputTokens": 8191,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2023-03-14",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": null,
+    "resolvedAt": "2026-03-24T10:55:47.906Z",
+    "baseModelName": null
+  },
+  "claude-1.2": {
+    "provider": "anthropic",
+    "description": "An early-generation Anthropic model, part of the original Claude 1.x family. It offered improved performance over Claude 1.0 but was quickly superseded by Claude 1.3 and later model families.",
+    "contextWindow": 9000,
+    "maxOutputTokens": 8191,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": null,
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": null,
+    "resolvedAt": "2026-03-24T10:55:46.760Z",
+    "baseModelName": null
+  },
+  "claude-1.3": {
+    "provider": "anthropic",
+    "description": "Early-generation Claude model from Anthropic, offering improved performance over Claude 1.0-1.2 in reasoning and instruction-following tasks.",
+    "contextWindow": 100000,
+    "maxOutputTokens": null,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2023-03-14",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": null,
+    "resolvedAt": "2026-03-24T10:55:46.227Z",
+    "baseModelName": null
+  },
+  "claude-2.0": {
+    "provider": "anthropic",
+    "description": "Anthropic's second-generation large language model, offering improved performance over Claude 1.x with longer context support. Succeeded by Claude 2.1 and later the Claude 3 family.",
+    "contextWindow": 100000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2023-07-11",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-02-01",
+    "resolvedAt": "2026-03-24T10:55:45.922Z",
+    "baseModelName": null
+  },
+  "claude-2.1": {
+    "provider": "anthropic",
+    "description": "Anthropic's Claude 2.1 model featuring a 200K context window, reduced hallucination rates compared to Claude 2.0, and improved accuracy on long document comprehension.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "streaming",
+      "tool_use"
+    ],
+    "releaseDate": "2023-11-21",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-01-01",
+    "resolvedAt": "2026-03-24T10:56:22.743Z",
+    "baseModelName": null
+  },
+  "claude-3-5-haiku-20241022": {
+    "provider": "anthropic",
+    "description": "Anthropic's fastest and most cost-effective model in the Claude 3.5 family, optimized for speed and efficiency while maintaining strong performance across common tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-10-22",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-07-01",
+    "resolvedAt": "2026-03-24T10:56:25.724Z",
+    "baseModelName": "claude-3-5-haiku"
+  },
+  "claude-3-5-sonnet-20240620": {
+    "provider": "anthropic",
+    "description": "Anthropic's Claude 3.5 Sonnet is a mid-tier model balancing intelligence and speed, excelling at coding, analysis, and vision tasks while being faster and cheaper than Opus.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-06-20",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-04-01",
+    "resolvedAt": "2026-03-24T10:56:35.401Z",
+    "baseModelName": "claude-3-5-sonnet"
+  },
+  "claude-3-haiku-20240307": {
+    "provider": "anthropic",
+    "description": "Anthropic's fastest and most compact Claude 3 model, optimized for speed and cost-efficiency while maintaining strong performance on everyday tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-03-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-08-01",
+    "resolvedAt": "2026-03-24T10:56:25.288Z",
+    "baseModelName": "claude-3-haiku"
+  },
+  "claude-3-opus-20240229": {
+    "provider": "anthropic",
+    "description": "Anthropic's most capable model in the Claude 3 family, excelling at complex analysis, nuanced content generation, and advanced reasoning tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-03-04",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-08-01",
+    "resolvedAt": "2026-03-24T10:56:26.008Z",
+    "baseModelName": "claude-3-opus"
+  },
+  "claude-3-sonnet-20240229": {
+    "provider": "anthropic",
+    "description": "Mid-tier model in Anthropic's Claude 3 family, balancing performance and speed for a wide range of tasks including analysis, coding, and content generation.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-03-04",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-02-01",
+    "resolvedAt": "2026-03-24T10:56:59.532Z",
+    "baseModelName": "claude-3-sonnet"
+  },
+  "claude-3.5-haiku-latest": {
+    "provider": "anthropic",
+    "description": "Anthropic's fastest and most cost-effective model in the Claude 3.5 family, optimized for speed and efficiency while maintaining strong performance across a wide range of tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-10-29",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-07-01",
+    "resolvedAt": "2026-03-24T10:57:04.392Z",
+    "baseModelName": "claude-3.5-haiku"
+  },
+  "claude-3.5-sonnet-20241022": {
+    "provider": "anthropic",
+    "description": "Anthropic's mid-tier model offering strong reasoning, coding, and analysis capabilities at a balance of speed and intelligence, positioned between Haiku and Opus in the Claude 3.5 family.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-06-20",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-04-01",
+    "resolvedAt": "2026-03-24T10:57:13.346Z",
+    "baseModelName": "claude-3.5-sonnet"
+  },
+  "claude-3.5-sonnet-latest": {
+    "provider": "anthropic",
+    "description": "Anthropic's mid-tier model offering strong reasoning, coding, and analysis capabilities at a balance of speed and intelligence, positioned between Haiku and Opus in the Claude 3.5 family.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-06-20",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-04-01",
+    "resolvedAt": "2026-03-24T10:57:13.346Z",
+    "baseModelName": "claude-3.5-sonnet"
+  },
+  "claude-3.7-sonnet-20250219": {
+    "provider": "anthropic",
+    "description": "Anthropic's Claude 3.7 Sonnet is a hybrid reasoning model that introduced extended thinking capabilities, offering strong performance on coding, math, and complex reasoning tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-02-24",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-04-01",
+    "resolvedAt": "2026-03-24T10:57:12.967Z",
+    "baseModelName": "claude-3.7-sonnet"
+  },
+  "claude-3.7-sonnet-latest": {
+    "provider": "anthropic",
+    "description": "Anthropic's Claude 3.7 Sonnet is a hybrid reasoning model that introduced extended thinking capabilities, offering strong performance on coding, math, and complex reasoning tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-02-24",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-04-01",
+    "resolvedAt": "2026-03-24T10:57:12.967Z",
+    "baseModelName": "claude-3.7-sonnet"
+  },
+  "claude-haiku-4-5-20251001": {
+    "provider": "anthropic",
+    "description": "Anthropic's fastest model with near-frontier intelligence, optimized for speed and cost efficiency while supporting extended thinking and vision.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 64000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-10-01",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-07-01",
+    "resolvedAt": "2026-03-24T10:57:29.685Z",
+    "baseModelName": "claude-haiku-4-5"
+  },
+  "claude-instant-1": {
+    "provider": "anthropic",
+    "description": "Anthropic's fast and cost-effective model optimized for speed and efficiency, positioned as a lighter alternative to Claude 1.x for tasks requiring lower latency.",
+    "contextWindow": 100000,
+    "maxOutputTokens": 8191,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2023-03-14",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-01-06",
+    "knowledgeCutoff": "2023-01-01",
+    "resolvedAt": "2026-03-24T10:57:36.888Z",
+    "baseModelName": null
+  },
+  "claude-instant-1.2": {
+    "provider": "anthropic",
+    "description": "Anthropic's fast and cost-effective model, optimized for speed and efficiency while maintaining strong performance on conversational and text generation tasks.",
+    "contextWindow": 100000,
+    "maxOutputTokens": 8191,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2023-08-09",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-01-01",
+    "resolvedAt": "2026-03-24T10:57:41.865Z",
+    "baseModelName": null
+  },
+  "claude-opus-4-1-20250805": {
+    "provider": "anthropic",
+    "description": "Anthropic's hybrid reasoning model with strong software engineering and agentic capabilities, scoring 74.5% on SWE-bench Verified. Supports both rapid responses and step-by-step extended thinking.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 32000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-08-05",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-03-01",
+    "resolvedAt": "2026-03-24T10:58:36.876Z",
+    "baseModelName": "claude-opus-4-1"
+  },
+  "claude-opus-4-20250514": {
+    "provider": "anthropic",
+    "description": "Anthropic's flagship model from the Claude 4 family, excelling at complex coding tasks, long-running agent workflows, and deep reasoning with extended thinking support.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 32000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-05-14",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-03-01",
+    "resolvedAt": "2026-03-24T10:58:47.518Z",
+    "baseModelName": "claude-opus-4"
+  },
+  "claude-opus-4-5-20251101": {
+    "provider": "anthropic",
+    "description": "Anthropic's flagship intelligence model released in November 2025, excelling at complex reasoning, vision, and extended thinking with the best performance in Anthropic's lineup before Opus 4.6.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 64000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-11-01",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-03-01",
+    "resolvedAt": "2026-03-24T10:58:48.961Z",
+    "baseModelName": "claude-opus-4-5"
+  },
+  "claude-opus-4-6": {
+    "provider": "anthropic",
+    "description": "Anthropic's most intelligent model, optimized for building agents and coding with exceptional reasoning capabilities and extended agentic task horizons.",
+    "contextWindow": 1000000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2026-02-05",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-05-01",
+    "resolvedAt": "2026-03-24T10:58:42.061Z",
+    "baseModelName": null
+  },
+  "claude-sonnet-4-20250514": {
+    "provider": "anthropic",
+    "description": "Anthropic's balanced Claude 4 model offering strong coding, reasoning, and multilingual performance at moderate cost. Now a legacy model superseded by Claude Sonnet 4.5 and 4.6.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 64000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-05-14",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-03-01",
+    "resolvedAt": "2026-03-24T10:58:39.601Z",
+    "baseModelName": "claude-sonnet-4"
+  },
+  "claude-sonnet-4-5-20250929": {
+    "provider": "anthropic",
+    "description": "Anthropic's high-performance mid-tier model with strong coding, reasoning, and multi-step problem solving capabilities. Successor to Claude Sonnet 4, offering improved benchmarks at the same price point.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 64000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-09-29",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T10:59:54.426Z",
+    "baseModelName": "claude-sonnet-4-5"
+  },
+  "claude-sonnet-4-6": {
+    "provider": "anthropic",
+    "description": "Anthropic's best combination of speed and intelligence, excelling at coding, agentic tasks, and computer use, with a 1M token context window and performance rivaling prior Opus-class models.",
+    "contextWindow": 1000000,
+    "maxOutputTokens": 64000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2026-02-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2026-01-01",
+    "resolvedAt": "2026-03-24T10:59:59.014Z",
+    "baseModelName": null
+  },
+  "claude-sonnet-4-latest": {
+    "provider": "anthropic",
+    "description": "Anthropic's balanced Claude 4 model offering strong coding, reasoning, and multilingual performance at moderate cost. Now a legacy model superseded by Claude Sonnet 4.5 and 4.6.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 64000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-05-14",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-03-01",
+    "resolvedAt": "2026-03-24T10:58:39.601Z",
+    "baseModelName": "claude-sonnet-4"
+  },
+  "gemini-1.0-pro": {
+    "provider": "google",
+    "description": "Google's first-generation Gemini Pro model, a mid-size multimodal model designed for text generation, reasoning, and chat applications. Succeeded by Gemini 1.5 Pro.",
+    "contextWindow": 32760,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-12-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-02-15",
+    "knowledgeCutoff": "2023-04-01",
+    "resolvedAt": "2026-03-24T10:59:26.767Z",
+    "baseModelName": null
+  },
+  "gemini-1.0-pro-001": {
+    "provider": "google",
+    "description": "Google's first-generation Pro model optimized for text generation, reasoning, and multi-turn conversation tasks, part of the original Gemini 1.0 lineup.",
+    "contextWindow": 30720,
+    "maxOutputTokens": 2048,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-02-15",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-02-15",
+    "knowledgeCutoff": "2023-04-01",
+    "resolvedAt": "2026-03-24T10:59:27.391Z",
+    "baseModelName": null
+  },
+  "gemini-1.0-pro-latest": {
+    "provider": "google",
+    "description": "Google's first-generation Gemini Pro model, a mid-size multimodal model designed for text generation, reasoning, and chat applications. Succeeded by Gemini 1.5 Pro.",
+    "contextWindow": 32760,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-12-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-02-15",
+    "knowledgeCutoff": "2023-04-01",
+    "resolvedAt": "2026-03-24T10:59:26.767Z",
+    "baseModelName": "gemini-1.0-pro"
+  },
+  "gemini-1.5-pro-latest": {
+    "provider": "google",
+    "description": "Google's mid-size multimodal model with a massive context window, strong at long-document understanding, code generation, and multi-turn conversation.",
+    "contextWindow": 2097152,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "audio_input"
+    ],
+    "releaseDate": "2024-02-15",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-09-24",
+    "knowledgeCutoff": "2024-04-01",
+    "resolvedAt": "2026-03-24T10:59:25.463Z",
+    "baseModelName": "gemini-1.5-pro"
+  },
+  "gemini-2.0-flash": {
+    "provider": "google",
+    "description": "Google's second-generation workhorse model optimized for speed, with native tool use, multimodal input (text, images, audio, video), and a 1M token context window.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2025-02-05",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-06-01",
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:01:15.429Z",
+    "baseModelName": null
+  },
+  "gemini-2.0-flash-001": {
+    "provider": "google",
+    "description": "Google's fast and efficient multimodal model that outperforms Gemini 1.5 Pro on key benchmarks at twice the speed, supporting text, image, audio, and video inputs with native tool use.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "audio_input",
+      "image_generation",
+      "audio_output",
+      "code_execution"
+    ],
+    "releaseDate": "2025-02-05",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-06-01",
+    "knowledgeCutoff": "2024-08-01",
+    "resolvedAt": "2026-03-24T11:01:04.084Z",
+    "baseModelName": null
+  },
+  "gemini-2.0-flash-lite-preview": {
+    "provider": "google",
+    "description": "A lightweight, cost-efficient variant of Gemini 2.0 Flash optimized for low latency and high throughput, supporting multimodal input with text output.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-02-05",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-06-01",
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:00:56.775Z",
+    "baseModelName": null
+  },
+  "gemini-2.0-flash-lite-preview-02-05": {
+    "provider": "google",
+    "description": "Google's cost-optimized, low-latency model in the Gemini 2.0 family, designed for high-volume tasks like summarization, multimodal processing, and categorization.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-02-05",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-12-09",
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:01:34.165Z",
+    "baseModelName": null
+  },
+  "gemini-2.5-flash": {
+    "provider": "google",
+    "description": "Google's best price-performance model optimized for low-latency, high-volume tasks requiring reasoning, with built-in thinking capabilities and multimodal input support.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 65536,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2025-06-01",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:01:25.200Z",
+    "baseModelName": null
+  },
+  "gemini-2.5-flash-lite": {
+    "provider": "google",
+    "description": "Google's most cost-efficient Gemini model, optimized for low-latency use cases with strong reasoning, multilingual, and long-context capabilities at minimal cost.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 65535,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2025-07-22",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-07-22",
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:02:30.060Z",
+    "baseModelName": null
+  },
+  "gemini-2.5-pro": {
+    "provider": "google",
+    "description": "Google's most advanced reasoning model with deep thinking capabilities, excelling at complex tasks like coding, math, and multimodal understanding across text, images, audio, and video.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 65535,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2025-03-25",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-06-17",
+    "knowledgeCutoff": "2025-01-31",
+    "resolvedAt": "2026-03-24T11:02:25.573Z",
+    "baseModelName": null
+  },
+  "gemini-3-flash-preview": {
+    "provider": "google",
+    "description": "Google's high-speed thinking model that matches Gemini 2.5 Pro performance at ~3x faster speed and lower cost, designed for agentic workflows, multi-turn chat, and coding assistance with configurable reasoning levels.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 65536,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2025-12-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:02:13.388Z",
+    "baseModelName": null
+  },
+  "gemini-3-pro-preview": {
+    "provider": "google",
+    "description": "Google's flagship reasoning and multimodal model with strong coding and agentic capabilities, now deprecated in favor of Gemini 3.1 Pro.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 65536,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2025-11-01",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-03-09",
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:02:29.313Z",
+    "baseModelName": null
+  },
+  "gemini-3.1-flash-lite-preview": {
+    "provider": "google",
+    "description": "Google's most cost-efficient multimodal model in the Gemini 3 series, optimized for high-volume, low-latency tasks like translation, classification, and simple data extraction. Offers 2.5x faster time-to-first-token than Gemini 2.5 Flash.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 65536,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2026-03-03",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:02:29.253Z",
+    "baseModelName": null
+  },
+  "gemini-3.1-pro-preview": {
+    "provider": "google",
+    "description": "Google's most advanced reasoning model in the Gemini 3.1 family, excelling at complex problem-solving across text, audio, images, video, and code with a 1M token context window and extended thinking capabilities.",
+    "contextWindow": 1048576,
+    "maxOutputTokens": 65536,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking",
+      "code_execution",
+      "audio_input"
+    ],
+    "releaseDate": "2026-02-19",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:03:33.071Z",
+    "baseModelName": null
+  },
+  "gemini-pro": {
+    "provider": "google",
+    "description": "Google's first-generation Gemini model for text generation, reasoning, and multi-turn conversation. Superseded by Gemini 1.5 Pro and later models.",
+    "contextWindow": 32768,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-12-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-04-09",
+    "knowledgeCutoff": "2023-04-01",
+    "resolvedAt": "2026-03-24T11:03:45.401Z",
+    "baseModelName": null
+  },
+  "gpt-3.5-turbo": {
+    "provider": "openai",
+    "description": "OpenAI's fast and cost-effective model optimized for chat and instruction-following tasks, now superseded by GPT-4o mini.",
+    "contextWindow": 16385,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2023-03-01",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-09-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:03:11.412Z",
+    "baseModelName": null
+  },
+  "gpt-3.5-turbo-0125": {
+    "provider": "openai",
+    "description": "A fast and cost-effective GPT-3.5 Turbo snapshot optimized for chat completions, offering improved accuracy for function calling and reduced instances of incomplete responses.",
+    "contextWindow": 16385,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-01-25",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-09-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:03:11.310Z",
+    "baseModelName": null
+  },
+  "gpt-3.5-turbo-0301": {
+    "provider": "openai",
+    "description": "Early snapshot of GPT-3.5 Turbo, OpenAI's first ChatGPT-optimized model for chat completions. Fast and cost-effective for simple tasks but superseded by later revisions.",
+    "contextWindow": 4096,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "streaming",
+      "fine_tunable"
+    ],
+    "releaseDate": "2023-03-01",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2024-06-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:03:12.060Z",
+    "baseModelName": null
+  },
+  "gpt-3.5-turbo-0613": {
+    "provider": "openai",
+    "description": "A snapshot of GPT-3.5 Turbo from June 2023, optimized for chat and instruction-following tasks with function calling support.",
+    "contextWindow": 4096,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2023-06-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2024-09-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:04:04.463Z",
+    "baseModelName": null
+  },
+  "gpt-3.5-turbo-1106": {
+    "provider": "openai",
+    "description": "A dated snapshot of GPT-3.5 Turbo released in November 2023, offering improved instruction following, JSON mode, and parallel function calling over previous GPT-3.5 variants.",
+    "contextWindow": 16385,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2023-11-06",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-09-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:04:23.054Z",
+    "baseModelName": null
+  },
+  "gpt-3.5-turbo-16k": {
+    "provider": "openai",
+    "description": "Extended context version of GPT-3.5 Turbo with 16K token context window, offering the same capabilities as the base model but able to process longer inputs.",
+    "contextWindow": 16384,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "streaming",
+      "json_mode",
+      "fine_tunable",
+      "tool_use"
+    ],
+    "releaseDate": "2023-06-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-09-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:04:36.307Z",
+    "baseModelName": null
+  },
+  "gpt-3.5-turbo-16k-0613": {
+    "provider": "openai",
+    "description": "Extended context window variant of GPT-3.5 Turbo with 16K token context, snapshot from June 2023. Optimized for chat completions with longer document processing.",
+    "contextWindow": 16384,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2023-06-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2024-09-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:04:22.894Z",
+    "baseModelName": null
+  },
+  "gpt-3.5-turbo-instruct": {
+    "provider": "openai",
+    "description": "OpenAI's GPT-3.5 Turbo Instruct is a completions-only model (not chat) optimized for following explicit instructions, replacing the legacy text-davinci-003 model.",
+    "contextWindow": 4096,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "fine_tunable"
+    ],
+    "releaseDate": "2023-09-19",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-01-27",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:04:22.309Z",
+    "baseModelName": null
+  },
+  "gpt-4": {
+    "provider": "openai",
+    "description": "OpenAI's flagship large language model that preceded GPT-4o, known for strong reasoning and instruction-following capabilities across a wide range of tasks.",
+    "contextWindow": 8192,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-03-14",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-06-06",
+    "knowledgeCutoff": "2023-12-01",
+    "resolvedAt": "2026-03-24T11:04:36.773Z",
+    "baseModelName": null
+  },
+  "gpt-4-0125-preview": {
+    "provider": "openai",
+    "description": "An improved GPT-4 Turbo preview model with better task completion, reduced laziness in code generation, and enhanced instruction following.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-01-25",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-12-01",
+    "resolvedAt": "2026-03-24T11:04:54.196Z",
+    "baseModelName": null
+  },
+  "gpt-4-0314": {
+    "provider": "openai",
+    "description": "Original GPT-4 snapshot from March 2023, a large multimodal model (text-only at launch) that was one of OpenAI's first GPT-4 releases. Now deprecated and replaced by newer GPT-4 variants.",
+    "contextWindow": 8192,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2023-03-14",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2024-06-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:05:14.112Z",
+    "baseModelName": null
+  },
+  "gpt-4-0613": {
+    "provider": "openai",
+    "description": "A snapshot of GPT-4 from June 2023, offering strong reasoning and instruction-following capabilities. It was one of the first widely available GPT-4 variants with function calling support.",
+    "contextWindow": 8192,
+    "maxOutputTokens": 8192,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2023-06-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-06-06",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:05:13.885Z",
+    "baseModelName": null
+  },
+  "gpt-4-1106-preview": {
+    "provider": "openai",
+    "description": "GPT-4 Turbo preview model with 128K context window, offering improved instruction following and JSON mode support at reduced cost compared to GPT-4.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-11-06",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-04-01",
+    "resolvedAt": "2026-03-24T11:05:12.960Z",
+    "baseModelName": null
+  },
+  "gpt-4-32k": {
+    "provider": "openai",
+    "description": "Extended context window variant of GPT-4 with 32,768 token capacity, offering the same capabilities as GPT-4 but able to process longer documents and conversations.",
+    "contextWindow": 32768,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-03-14",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-06-06",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:05:14.584Z",
+    "baseModelName": null
+  },
+  "gpt-4-32k-0314": {
+    "provider": "openai",
+    "description": "Extended context (32k token) variant of the original GPT-4 launch snapshot from March 2024, offering the same capabilities as gpt-4-0314 but with 4x the context window.",
+    "contextWindow": 32768,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2023-03-14",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2024-06-13",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:05:32.044Z",
+    "baseModelName": null
+  },
+  "gpt-4-32k-0613": {
+    "provider": "openai",
+    "description": "Extended context window variant of GPT-4 with 32,768 token context, based on the June 2023 snapshot. Offers the same capabilities as GPT-4 but with 4x the context length.",
+    "contextWindow": 32768,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-06-13",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-06-06",
+    "knowledgeCutoff": "2021-09-01",
+    "resolvedAt": "2026-03-24T11:05:53.070Z",
+    "baseModelName": null
+  },
+  "gpt-4-preview": {
+    "provider": "openai",
+    "description": "GPT-4 Turbo preview model with 128K context window, JSON mode, and parallel function calling. A preview release in the GPT-4 Turbo series, now deprecated in favor of newer models.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-11-06",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-06-06",
+    "knowledgeCutoff": "2023-04-01",
+    "resolvedAt": "2026-03-24T11:06:54.248Z",
+    "baseModelName": null
+  },
+  "gpt-4-turbo": {
+    "provider": "openai",
+    "description": "OpenAI's optimized GPT-4 variant offering faster inference and lower cost than the original GPT-4, with vision capabilities and a 128K context window.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-04-09",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-12-01",
+    "resolvedAt": "2026-03-24T11:05:51.415Z",
+    "baseModelName": null
+  },
+  "gpt-4-turbo-2024-04-09": {
+    "provider": "openai",
+    "description": "OpenAI's optimized GPT-4 variant offering faster inference and lower cost than the original GPT-4, with vision capabilities and a 128K context window.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-04-09",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-12-01",
+    "resolvedAt": "2026-03-24T11:05:51.415Z",
+    "baseModelName": "gpt-4-turbo"
+  },
+  "gpt-4-turbo-preview": {
+    "provider": "openai",
+    "description": "An early preview of GPT-4 Turbo with a 128K context window, offering improved instruction following and JSON mode support at reduced cost compared to GPT-4.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-01-25",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-12-01",
+    "resolvedAt": "2026-03-24T11:05:52.346Z",
+    "baseModelName": null
+  },
+  "gpt-4-turbo-vision": {
+    "provider": "openai",
+    "description": "OpenAI's GPT-4 Turbo model with vision capabilities, able to analyze and understand images alongside text. It was a preview model later superseded by GPT-4 Turbo (gpt-4-turbo-2024-04-09) and then GPT-4o.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2023-11-06",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2024-12-06",
+    "knowledgeCutoff": "2023-04-01",
+    "resolvedAt": "2026-03-24T11:06:38.455Z",
+    "baseModelName": null
+  },
+  "gpt-4.1": {
+    "provider": "openai",
+    "description": "OpenAI's flagship model optimized for coding, instruction following, and tool calling with a 1M token context window. Excels at structured outputs and long-context tasks.",
+    "contextWindow": 1047576,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-04-14",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:07:00.439Z",
+    "baseModelName": null
+  },
+  "gpt-4.1-2025-04-14": {
+    "provider": "openai",
+    "description": "OpenAI's flagship model optimized for coding, instruction following, and tool calling with a 1M token context window. Excels at structured outputs and long-context tasks.",
+    "contextWindow": 1047576,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-04-14",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:07:00.439Z",
+    "baseModelName": "gpt-4.1"
+  },
+  "gpt-4.1-mini": {
+    "provider": "openai",
+    "description": "A compact, cost-efficient model in OpenAI's GPT-4.1 family that matches or exceeds GPT-4o on many benchmarks while offering nearly half the latency and significantly lower cost.",
+    "contextWindow": 1000000,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2025-04-14",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:08:14.524Z",
+    "baseModelName": null
+  },
+  "gpt-4.1-mini-2025-04-14": {
+    "provider": "openai",
+    "description": "A compact, cost-efficient model in OpenAI's GPT-4.1 family that matches or exceeds GPT-4o on many benchmarks while offering nearly half the latency and significantly lower cost.",
+    "contextWindow": 1000000,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2025-04-14",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:08:14.524Z",
+    "baseModelName": "gpt-4.1-mini"
+  },
+  "gpt-4.1-nano": {
+    "provider": "openai",
+    "description": "OpenAI's fastest and most cost-effective model in the GPT-4.1 family, optimized for low-latency tasks like classification, autocompletion, and lightweight agentic workflows with strong instruction-following and tool-calling capabilities.",
+    "contextWindow": 1047576,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-04-14",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:08:04.533Z",
+    "baseModelName": null
+  },
+  "gpt-4.1-nano-2025-04-14": {
+    "provider": "openai",
+    "description": "OpenAI's fastest and most cost-effective model in the GPT-4.1 family, optimized for low-latency tasks like classification, autocompletion, and lightweight agentic workflows with strong instruction-following and tool-calling capabilities.",
+    "contextWindow": 1047576,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-04-14",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:08:04.533Z",
+    "baseModelName": "gpt-4.1-nano"
+  },
+  "gpt-4.5-preview": {
+    "provider": "openai",
+    "description": "OpenAI's largest pretrained model before the GPT-5 series, emphasizing broad knowledge, creative writing, and improved emotional intelligence over reasoning-focused models.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-02-27",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-07-14",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:07:57.880Z",
+    "baseModelName": null
+  },
+  "gpt-4.5-preview-2025-02-27": {
+    "provider": "openai",
+    "description": "OpenAI's largest pretrained model before the GPT-5 series, emphasizing broad knowledge, creative writing, and improved emotional intelligence over reasoning-focused models.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-02-27",
+    "isHidden": true,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2025-07-14",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:07:57.880Z",
+    "baseModelName": "gpt-4.5-preview"
+  },
+  "gpt-4o": {
+    "provider": "openai",
+    "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "audio_input",
+      "audio_output",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-05-13",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:07:31.638Z",
+    "baseModelName": null
+  },
+  "gpt-4o-2024-05-13": {
+    "provider": "openai",
+    "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "audio_input",
+      "audio_output",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-05-13",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:07:31.638Z",
+    "baseModelName": "gpt-4o"
+  },
+  "gpt-4o-2024-08-06": {
+    "provider": "openai",
+    "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "audio_input",
+      "audio_output",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-05-13",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:07:31.638Z",
+    "baseModelName": "gpt-4o"
+  },
+  "gpt-4o-2024-11-20": {
+    "provider": "openai",
+    "description": "OpenAI's flagship multimodal model combining strong reasoning with vision, audio, and tool use capabilities at faster speeds and lower cost than GPT-4 Turbo.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "audio_input",
+      "audio_output",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-05-13",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:07:31.638Z",
+    "baseModelName": "gpt-4o"
+  },
+  "gpt-4o-audio-preview": {
+    "provider": "openai",
+    "description": "GPT-4o variant with native audio input and output capabilities via the Chat Completions API, supporting both text and audio modalities for conversational and voice-based applications.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "audio_input",
+      "audio_output",
+      "tool_use",
+      "streaming"
+    ],
+    "releaseDate": "2024-10-01",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-05-07",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:08:09.590Z",
+    "baseModelName": null
+  },
+  "gpt-4o-audio-preview-2024-10-01": {
+    "provider": "openai",
+    "description": "GPT-4o variant with native audio input and output capabilities via the Chat Completions API, supporting both text and audio modalities for conversational and voice-based applications.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "audio_input",
+      "audio_output",
+      "tool_use",
+      "streaming"
+    ],
+    "releaseDate": "2024-10-01",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": "2026-05-07",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:08:09.590Z",
+    "baseModelName": "gpt-4o-audio-preview"
+  },
+  "gpt-4o-mini": {
+    "provider": "openai",
+    "description": "Fast, affordable small model optimized for focused tasks. Positioned as OpenAI's cost-efficient option with strong performance on benchmarks relative to its size.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-07-18",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:09:50.130Z",
+    "baseModelName": null
+  },
+  "gpt-4o-mini-2024-07-18": {
+    "provider": "openai",
+    "description": "Fast, affordable small model optimized for focused tasks. Positioned as OpenAI's cost-efficient option with strong performance on benchmarks relative to its size.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "fine_tunable"
+    ],
+    "releaseDate": "2024-07-18",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:09:50.130Z",
+    "baseModelName": "gpt-4o-mini"
+  },
+  "gpt-4o-realtime-preview": {
+    "provider": "openai",
+    "description": "OpenAI's real-time multimodal model capable of processing and generating both text and audio over WebRTC or WebSocket, enabling low-latency voice conversations and audio interactions.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "audio_input",
+      "audio_output",
+      "tool_use",
+      "streaming"
+    ],
+    "releaseDate": "2024-10-01",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2026-05-07",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:09:35.495Z",
+    "baseModelName": null
+  },
+  "gpt-4o-realtime-preview-2024-10-01": {
+    "provider": "openai",
+    "description": "OpenAI's real-time multimodal model capable of processing and generating both text and audio over WebRTC or WebSocket, enabling low-latency voice conversations and audio interactions.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 4096,
+    "capabilities": [
+      "audio_input",
+      "audio_output",
+      "tool_use",
+      "streaming"
+    ],
+    "releaseDate": "2024-10-01",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2026-05-07",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:09:35.495Z",
+    "baseModelName": "gpt-4o-realtime-preview"
+  },
+  "gpt-5": {
+    "provider": "openai",
+    "description": "OpenAI's flagship reasoning model released August 2025, featuring a 400K token context window with strong coding, reasoning, and agentic capabilities.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "image_generation",
+      "code_execution"
+    ],
+    "releaseDate": "2025-08-07",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-09-30",
+    "resolvedAt": "2026-03-24T11:09:28.216Z",
+    "baseModelName": null
+  },
+  "gpt-5-2025-08-07": {
+    "provider": "openai",
+    "description": "OpenAI's flagship reasoning model released August 2025, featuring a 400K token context window with strong coding, reasoning, and agentic capabilities.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "image_generation",
+      "code_execution"
+    ],
+    "releaseDate": "2025-08-07",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-09-30",
+    "resolvedAt": "2026-03-24T11:09:28.216Z",
+    "baseModelName": "gpt-5"
+  },
+  "gpt-5-chat-latest": {
+    "provider": "openai",
+    "description": "Non-reasoning GPT-5 model used in ChatGPT, optimized for conversational tasks. Supports text and image inputs with function calling and structured outputs.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 16384,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-08-07",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-09-30",
+    "resolvedAt": "2026-03-24T11:09:24.834Z",
+    "baseModelName": "gpt-5-chat"
+  },
+  "gpt-5-mini": {
+    "provider": "openai",
+    "description": "A faster, more cost-efficient version of GPT-5 designed for well-defined tasks and precise prompts. Supports reasoning with configurable effort levels and offers reduced latency compared to the full GPT-5 model.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-08-07",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-05-31",
+    "resolvedAt": "2026-03-24T11:09:42.822Z",
+    "baseModelName": null
+  },
+  "gpt-5-mini-2025-08-07": {
+    "provider": "openai",
+    "description": "A faster, more cost-efficient version of GPT-5 designed for well-defined tasks and precise prompts. Supports reasoning with configurable effort levels and offers reduced latency compared to the full GPT-5 model.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-08-07",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-05-31",
+    "resolvedAt": "2026-03-24T11:09:42.822Z",
+    "baseModelName": "gpt-5-mini"
+  },
+  "gpt-5-nano": {
+    "provider": "openai",
+    "description": "The smallest and fastest variant in the GPT-5 family, optimized for developer tools, rapid interactions, and ultra-low latency environments. Best suited for classification, data extraction, ranking, and sub-agent tasks.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-08-07",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-05-31",
+    "resolvedAt": "2026-03-24T11:11:24.884Z",
+    "baseModelName": null
+  },
+  "gpt-5-nano-2025-08-07": {
+    "provider": "openai",
+    "description": "The smallest and fastest variant in the GPT-5 family, optimized for developer tools, rapid interactions, and ultra-low latency environments. Best suited for classification, data extraction, ranking, and sub-agent tasks.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-08-07",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-05-31",
+    "resolvedAt": "2026-03-24T11:11:24.884Z",
+    "baseModelName": "gpt-5-nano"
+  },
+  "gpt-5-pro": {
+    "provider": "openai",
+    "description": "OpenAI's enhanced GPT-5 variant optimized for complex tasks requiring step-by-step reasoning, with reduced hallucination and improved code quality compared to the base GPT-5.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-10-06",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-10-01",
+    "resolvedAt": "2026-03-24T11:11:37.048Z",
+    "baseModelName": null
+  },
+  "gpt-5-pro-2025-10-06": {
+    "provider": "openai",
+    "description": "OpenAI's enhanced GPT-5 variant optimized for complex tasks requiring step-by-step reasoning, with reduced hallucination and improved code quality compared to the base GPT-5.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-10-06",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-10-01",
+    "resolvedAt": "2026-03-24T11:11:37.048Z",
+    "baseModelName": "gpt-5-pro"
+  },
+  "gpt-5.1": {
+    "provider": "openai",
+    "description": "GPT-5.1 is OpenAI's frontier-grade model in the GPT-5 series, offering adaptive reasoning with configurable effort levels, improved coding and math performance, and a more natural conversational style compared to GPT-5.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-11-13",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-09-30",
+    "resolvedAt": "2026-03-24T11:11:47.327Z",
+    "baseModelName": null
+  },
+  "gpt-5.1-2025-11-13": {
+    "provider": "openai",
+    "description": "GPT-5.1 is OpenAI's frontier-grade model in the GPT-5 series, offering adaptive reasoning with configurable effort levels, improved coding and math performance, and a more natural conversational style compared to GPT-5.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2025-11-13",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-09-30",
+    "resolvedAt": "2026-03-24T11:11:47.327Z",
+    "baseModelName": "gpt-5.1"
+  },
+  "gpt-5.2": {
+    "provider": "openai",
+    "description": "OpenAI's flagship multimodal model released December 2025, excelling at long-context reasoning, agentic tool use, software engineering, and professional knowledge work. Available in Instant, Thinking, and Pro variants.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-12-11",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:11:13.129Z",
+    "baseModelName": null
+  },
+  "gpt-5.2-2025-12-11": {
+    "provider": "openai",
+    "description": "OpenAI's flagship multimodal model released December 2025, excelling at long-context reasoning, agentic tool use, software engineering, and professional knowledge work. Available in Instant, Thinking, and Pro variants.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-12-11",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:11:13.129Z",
+    "baseModelName": "gpt-5.2"
+  },
+  "gpt-5.2-pro": {
+    "provider": "openai",
+    "description": "OpenAI's previous pro-tier reasoning model optimized for complex professional work requiring step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. Superseded by GPT-5.4 pro.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-12-11",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:11:12.711Z",
+    "baseModelName": null
+  },
+  "gpt-5.2-pro-2025-12-11": {
+    "provider": "openai",
+    "description": "OpenAI's previous pro-tier reasoning model optimized for complex professional work requiring step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. Superseded by GPT-5.4 pro.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-12-11",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:11:12.711Z",
+    "baseModelName": "gpt-5.2-pro"
+  },
+  "gpt-5.4": {
+    "provider": "openai",
+    "description": "OpenAI's most capable frontier model as of March 2026, featuring state-of-the-art coding, native computer-use capabilities, and a 1M-token context window for professional and agentic workflows.",
+    "contextWindow": 1050000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "code_execution"
+    ],
+    "releaseDate": "2026-03-05",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:09.220Z",
+    "baseModelName": null
+  },
+  "gpt-5.4-2026-03-05": {
+    "provider": "openai",
+    "description": "OpenAI's most capable frontier model as of March 2026, featuring state-of-the-art coding, native computer-use capabilities, and a 1M-token context window for professional and agentic workflows.",
+    "contextWindow": 1050000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "code_execution"
+    ],
+    "releaseDate": "2026-03-05",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:09.220Z",
+    "baseModelName": "gpt-5.4"
+  },
+  "gpt-5.4-mini": {
+    "provider": "openai",
+    "description": "OpenAI's fast and efficient small model from the GPT-5.4 family, designed for high-volume workloads. Approaches GPT-5.4 performance on coding and reasoning while running over 2x faster than GPT-5 mini.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2026-03-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:35.473Z",
+    "baseModelName": null
+  },
+  "gpt-5.4-mini-2026-03-17": {
+    "provider": "openai",
+    "description": "OpenAI's fast and efficient small model from the GPT-5.4 family, designed for high-volume workloads. Approaches GPT-5.4 performance on coding and reasoning while running over 2x faster than GPT-5 mini.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2026-03-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:35.473Z",
+    "baseModelName": "gpt-5.4-mini"
+  },
+  "gpt-5.4-nano": {
+    "provider": "openai",
+    "description": "OpenAI's cheapest GPT-5.4-class model optimized for simple high-volume tasks like classification, data extraction, ranking, and sub-agent delegation in agentic workflows.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2026-03-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:52.285Z",
+    "baseModelName": null
+  },
+  "gpt-5.4-nano-2026-03-17": {
+    "provider": "openai",
+    "description": "OpenAI's cheapest GPT-5.4-class model optimized for simple high-volume tasks like classification, data extraction, ranking, and sub-agent delegation in agentic workflows.",
+    "contextWindow": 400000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2026-03-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:52.285Z",
+    "baseModelName": "gpt-5.4-nano"
+  },
+  "gpt-5.4-pro": {
+    "provider": "openai",
+    "description": "OpenAI's highest-capability GPT-5.4 variant, using additional compute for harder problems. Available via Responses API only, designed for complex reasoning, coding, and agentic workflows.",
+    "contextWindow": 1050000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "extended_thinking"
+    ],
+    "releaseDate": "2026-03-05",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:56.903Z",
+    "baseModelName": null
+  },
+  "gpt-5.4-pro-2026-03-05": {
+    "provider": "openai",
+    "description": "OpenAI's highest-capability GPT-5.4 variant, using additional compute for harder problems. Available via Responses API only, designed for complex reasoning, coding, and agentic workflows.",
+    "contextWindow": 1050000,
+    "maxOutputTokens": 128000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "extended_thinking"
+    ],
+    "releaseDate": "2026-03-05",
+    "isHidden": false,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-08-31",
+    "resolvedAt": "2026-03-24T11:12:56.903Z",
+    "baseModelName": "gpt-5.4-pro"
+  },
+  "o1": {
+    "provider": "openai",
+    "description": "OpenAI's reasoning model designed for complex tasks requiring multi-step logical thinking, excelling at math, science, and coding problems.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-12-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:12:23.948Z",
+    "baseModelName": null
+  },
+  "o1-2024-12-17": {
+    "provider": "openai",
+    "description": "OpenAI's reasoning model designed for complex tasks requiring multi-step logical thinking, excelling at math, science, and coding problems.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-12-17",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:12:23.948Z",
+    "baseModelName": "o1"
+  },
+  "o1-mini": {
+    "provider": "openai",
+    "description": "A smaller, faster, and cheaper reasoning model in OpenAI's o1 series, optimized for coding, math, and science tasks requiring multi-step reasoning.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 65536,
+    "capabilities": [
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-09-12",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-06-30",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:12:37.030Z",
+    "baseModelName": null
+  },
+  "o1-mini-2024-09-12": {
+    "provider": "openai",
+    "description": "A smaller, faster, and cheaper reasoning model in OpenAI's o1 series, optimized for coding, math, and science tasks requiring multi-step reasoning.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 65536,
+    "capabilities": [
+      "streaming",
+      "json_mode"
+    ],
+    "releaseDate": "2024-09-12",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-06-30",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:12:37.030Z",
+    "baseModelName": "o1-mini"
+  },
+  "o1-preview": {
+    "provider": "openai",
+    "description": "OpenAI's first reasoning model using chain-of-thought to solve complex problems in science, coding, and math. Predecessor to o1 and o3 series.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2024-09-12",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-10-31",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:12:59.198Z",
+    "baseModelName": null
+  },
+  "o1-preview-2024-09-12": {
+    "provider": "openai",
+    "description": "OpenAI's first reasoning model using chain-of-thought to solve complex problems in science, coding, and math. Predecessor to o1 and o3 series.",
+    "contextWindow": 128000,
+    "maxOutputTokens": 32768,
+    "capabilities": [
+      "streaming"
+    ],
+    "releaseDate": "2024-09-12",
+    "isHidden": true,
+    "supportsStructuredOutput": false,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": "2025-10-31",
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:12:59.198Z",
+    "baseModelName": "o1-preview"
+  },
+  "o1-pro": {
+    "provider": "openai",
+    "description": "A version of OpenAI's o1 reasoning model that uses significantly more compute to deliver better, more consistent answers on complex reasoning tasks in science, coding, and math.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-03-19",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:13:57.532Z",
+    "baseModelName": null
+  },
+  "o1-pro-2025-03-19": {
+    "provider": "openai",
+    "description": "A version of OpenAI's o1 reasoning model that uses significantly more compute to deliver better, more consistent answers on complex reasoning tasks in science, coding, and math.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-03-19",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2023-10-01",
+    "resolvedAt": "2026-03-24T11:13:57.532Z",
+    "baseModelName": "o1-pro"
+  },
+  "o3": {
+    "provider": "openai",
+    "description": "OpenAI's advanced reasoning model designed for complex tasks requiring deep reasoning, excelling at software engineering, mathematics, scientific reasoning, and visual reasoning tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-04-16",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:14:04.906Z",
+    "baseModelName": null
+  },
+  "o3-2025-04-16": {
+    "provider": "openai",
+    "description": "OpenAI's advanced reasoning model designed for complex tasks requiring deep reasoning, excelling at software engineering, mathematics, scientific reasoning, and visual reasoning tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-04-16",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:14:04.906Z",
+    "baseModelName": "o3"
+  },
+  "o3-mini": {
+    "provider": "openai",
+    "description": "OpenAI's compact reasoning model optimized for STEM tasks, offering strong performance in math, science, and coding at lower cost than o3.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-01-31",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:13:33.788Z",
+    "baseModelName": null
+  },
+  "o3-mini-2025-01-31": {
+    "provider": "openai",
+    "description": "OpenAI's compact reasoning model optimized for STEM tasks, offering strong performance in math, science, and coding at lower cost than o3.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-01-31",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2025-01-01",
+    "resolvedAt": "2026-03-24T11:13:33.788Z",
+    "baseModelName": "o3-mini"
+  },
+  "o3-pro": {
+    "provider": "openai",
+    "description": "OpenAI's most reliable reasoning model, a version of o3 designed to think longer and provide more consistently accurate answers for challenging math, science, and coding problems.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-06-10",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:14:10.900Z",
+    "baseModelName": null
+  },
+  "o3-pro-2025-06-10": {
+    "provider": "openai",
+    "description": "OpenAI's most reliable reasoning model, a version of o3 designed to think longer and provide more consistently accurate answers for challenging math, science, and coding problems.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-06-10",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": false,
+    "supportsStreamingToolCalls": false,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:14:10.900Z",
+    "baseModelName": "o3-pro"
+  },
+  "o4-mini": {
+    "provider": "openai",
+    "description": "OpenAI's small reasoning model optimized for fast, cost-efficient reasoning with strong performance in math, coding, and visual tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-04-16",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:14:16.050Z",
+    "baseModelName": null
+  },
+  "o4-mini-2025-04-16": {
+    "provider": "openai",
+    "description": "OpenAI's small reasoning model optimized for fast, cost-efficient reasoning with strong performance in math, coding, and visual tasks.",
+    "contextWindow": 200000,
+    "maxOutputTokens": 100000,
+    "capabilities": [
+      "vision",
+      "tool_use",
+      "streaming",
+      "json_mode",
+      "extended_thinking"
+    ],
+    "releaseDate": "2025-04-16",
+    "isHidden": false,
+    "supportsStructuredOutput": true,
+    "supportsParallelToolCalls": true,
+    "supportsStreamingToolCalls": true,
+    "deprecationDate": null,
+    "knowledgeCutoff": "2024-06-01",
+    "resolvedAt": "2026-03-24T11:14:16.050Z",
+    "baseModelName": "o4-mini"
+  }
+};
diff --git a/internal-packages/llm-pricing/src/registry.test.ts b/internal-packages/llm-model-catalog/src/registry.test.ts
similarity index 100%
rename from internal-packages/llm-pricing/src/registry.test.ts
rename to internal-packages/llm-model-catalog/src/registry.test.ts
diff --git a/internal-packages/llm-pricing/src/registry.ts b/internal-packages/llm-model-catalog/src/registry.ts
similarity index 100%
rename from internal-packages/llm-pricing/src/registry.ts
rename to internal-packages/llm-model-catalog/src/registry.ts
diff --git a/internal-packages/llm-pricing/src/seed.ts b/internal-packages/llm-model-catalog/src/seed.ts
similarity index 67%
rename from internal-packages/llm-pricing/src/seed.ts
rename to internal-packages/llm-model-catalog/src/seed.ts
index d068c62a66d..72d212a9120 100644
--- a/internal-packages/llm-pricing/src/seed.ts
+++ b/internal-packages/llm-model-catalog/src/seed.ts
@@ -1,10 +1,13 @@
 import type { PrismaClient } from "@trigger.dev/database";
 import { generateFriendlyId } from "@trigger.dev/core/v3/isomorphic";
 import { defaultModelPrices } from "./defaultPrices.js";
+import { modelCatalog } from "./modelCatalog.js";
+import { syncLlmCatalog } from "./sync.js";
 
 export async function seedLlmPricing(prisma: PrismaClient): Promise<{
   modelsCreated: number;
   modelsSkipped: number;
+  modelsUpdated: number;
 }> {
   let modelsCreated = 0;
   let modelsSkipped = 0;
@@ -23,6 +26,9 @@ export async function seedLlmPricing(prisma: PrismaClient): Promise<{
       continue;
     }
 
+    // Look up catalog metadata for this model
+    const catalog = modelCatalog[modelDef.modelName];
+
     // Create model + tiers atomically so partial models can't be left behind
     await prisma.$transaction(async (tx) => {
       const model = await tx.llmModel.create({
@@ -32,6 +38,14 @@ export async function seedLlmPricing(prisma: PrismaClient): Promise<{
           matchPattern: modelDef.matchPattern,
           startDate: modelDef.startDate ? new Date(modelDef.startDate) : null,
           source: "default",
+          // Catalog metadata (from model-catalog.json)
+          provider: catalog?.provider ?? null,
+          description: catalog?.description ?? null,
+          contextWindow: catalog?.contextWindow ?? null,
+          maxOutputTokens: catalog?.maxOutputTokens ?? null,
+          capabilities: catalog?.capabilities ?? [],
+          isHidden: catalog?.isHidden ?? false,
+          baseModelName: catalog?.baseModelName ?? null,
         },
       });
 
@@ -58,5 +72,8 @@ export async function seedLlmPricing(prisma: PrismaClient): Promise<{
     modelsCreated++;
   }
 
-  return { modelsCreated, modelsSkipped };
+  // Sync catalog metadata on existing default models
+  const syncResult = await syncLlmCatalog(prisma);
+
+  return { modelsCreated, modelsSkipped, modelsUpdated: syncResult.modelsUpdated };
 }
diff --git a/internal-packages/llm-model-catalog/src/sync.ts b/internal-packages/llm-model-catalog/src/sync.ts
new file mode 100644
index 00000000000..b600e39a692
--- /dev/null
+++ b/internal-packages/llm-model-catalog/src/sync.ts
@@ -0,0 +1,55 @@
+import type { PrismaClient } from "@trigger.dev/database";
+import { defaultModelPrices } from "./defaultPrices.js";
+import { modelCatalog } from "./modelCatalog.js";
+
+export async function syncLlmCatalog(prisma: PrismaClient): Promise<{
+  modelsUpdated: number;
+  modelsSkipped: number;
+}> {
+  let modelsUpdated = 0;
+  let modelsSkipped = 0;
+
+  for (const modelDef of defaultModelPrices) {
+    const existing = await prisma.llmModel.findFirst({
+      where: {
+        projectId: null,
+        modelName: modelDef.modelName,
+      },
+    });
+
+    // Skip if model doesn't exist yet (seed handles creation)
+    if (!existing) {
+      modelsSkipped++;
+      continue;
+    }
+
+    // Don't overwrite admin-edited models
+    if (existing.source !== "default") {
+      modelsSkipped++;
+      continue;
+    }
+
+    const catalog = modelCatalog[modelDef.modelName];
+
+    await prisma.llmModel.update({
+      where: { id: existing.id },
+      data: {
+        // Update match pattern and start date from Langfuse (may have changed)
+        matchPattern: modelDef.matchPattern,
+        startDate: modelDef.startDate ? new Date(modelDef.startDate) : null,
+        // Update catalog metadata
+        provider: catalog?.provider ?? existing.provider,
+        description: catalog?.description ?? existing.description,
+        contextWindow: catalog?.contextWindow ?? existing.contextWindow,
+        maxOutputTokens: catalog?.maxOutputTokens ?? existing.maxOutputTokens,
+        capabilities: catalog?.capabilities ?? existing.capabilities,
+        isHidden: catalog?.isHidden ?? existing.isHidden,
+        baseModelName: catalog?.baseModelName ?? existing.baseModelName,
+      },
+    });
+
+    modelsUpdated++;
+  }
+
+  return { modelsUpdated, modelsSkipped };
+}
diff --git a/internal-packages/llm-model-catalog/src/types.ts b/internal-packages/llm-model-catalog/src/types.ts
new file mode 100644
index 00000000000..d6c6638d620
--- /dev/null
+++ b/internal-packages/llm-model-catalog/src/types.ts
@@ -0,0 +1,87 @@
+import type { Decimal } from "@trigger.dev/database";
+
+export type PricingCondition = {
+  usageDetailPattern: string;
+  operator: "gt" | "gte" | "lt" | "lte" | "eq" | "neq";
+  value: number;
+};
+
+export type LlmPriceEntry = {
+  usageType: string;
+  price: number;
+};
+
+export type LlmPricingTierWithPrices = {
+  id: string;
+  name: string;
+  isDefault: boolean;
+  priority: number;
+  conditions: PricingCondition[];
+  prices: LlmPriceEntry[];
+};
+
+export type LlmModelWithPricing = {
+  id: string;
+  friendlyId: string;
+  modelName: string;
+  matchPattern: string;
+  startDate: Date | null;
+  pricingTiers: LlmPricingTierWithPrices[];
+};
+
+export type LlmCostResult = {
+  matchedModelId: string;
+  matchedModelName: string;
+  pricingTierId: string;
+  pricingTierName: string;
+  inputCost: number;
+  outputCost: number;
+  totalCost: number;
+  costDetails: Record<string, number>;
+};
+
+export type ModelCatalogEntry = {
+  provider: string;
+  description: string;
+  contextWindow: number | null;
+  maxOutputTokens: number | null;
+  capabilities: string[];
+  /** ISO date string of when the model was publicly released (e.g. "2025-06-15"). */
+  releaseDate: string | null;
+  /** Whether the model is deprecated/legacy and should be hidden from the registry by default. */
+  isHidden: boolean;
+  /** Whether the model supports reliable structured JSON output (schema adherence). */
+  supportsStructuredOutput: boolean;
+  /** Whether the model can call multiple tools in a single turn. */
+  supportsParallelToolCalls: boolean;
+  /** Whether the model supports streaming partial tool call results. */
+  supportsStreamingToolCalls: boolean;
+  /** ISO date string of when the model will be deprecated/sunset, if known. */
+  deprecationDate: string | null;
+  /** ISO date string of the model's training data cutoff (e.g. "2024-10-01"). */
+  knowledgeCutoff: string | null;
+  /** ISO timestamp of when this entry was last researched/resolved. */
+  resolvedAt: string;
+  /** The base model this is a variant of, or null if this IS the base model. */
+  baseModelName: string | null;
+};
+
+export type DefaultModelDefinition = {
+  modelName: string;
+  matchPattern: string;
+  startDate?: string;
+  // Catalog metadata (merged from model-catalog.json during seed)
+  provider?: string;
+  description?: string;
+  contextWindow?: number | null;
+  maxOutputTokens?: number | null;
+  capabilities?: string[];
+  isHidden?: boolean;
+  pricingTiers: Array<{
+    name: string;
+    isDefault: boolean;
+    priority: number;
+    conditions: PricingCondition[];
+    prices: Record<string, number>;
+  }>;
+};
diff --git a/internal-packages/llm-pricing/tsconfig.json b/internal-packages/llm-model-catalog/tsconfig.json
similarity index 100%
rename from internal-packages/llm-pricing/tsconfig.json
rename to internal-packages/llm-model-catalog/tsconfig.json
diff --git a/internal-packages/llm-pricing/package.json b/internal-packages/llm-pricing/package.json
deleted file mode 100644
index 8cf9e366f2c..00000000000
--- a/internal-packages/llm-pricing/package.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-  "name": "@internal/llm-pricing",
-  "private": true,
-  "version": "0.0.1",
-  "main": "./src/index.ts",
-  "types": "./src/index.ts",
-  "type": "module",
-  "dependencies": {
-    "@trigger.dev/core": "workspace:*",
-    "@trigger.dev/database": "workspace:*"
-  },
-  "scripts": {
-    "typecheck": "tsc --noEmit",
-    "generate": "echo 'defaultPrices.ts is pre-committed — run sync-prices to update'",
-    "sync-prices": "bash scripts/sync-model-prices.sh",
-    "sync-prices:check": "bash scripts/sync-model-prices.sh --check"
-  }
-}
diff --git a/internal-packages/llm-pricing/src/types.ts b/internal-packages/llm-pricing/src/types.ts
deleted file mode 100644
index 2deec6246ed..00000000000
--- a/internal-packages/llm-pricing/src/types.ts
+++ /dev/null
@@ -1,54 +0,0 @@
-import type { Decimal } from "@trigger.dev/database";
-
-export type PricingCondition = {
-  usageDetailPattern: string;
-  operator: "gt" | "gte" | "lt" | "lte" | "eq" | "neq";
-  value: number;
-};
-
-export type LlmPriceEntry = {
-  usageType: string;
-  price: number;
-};
-
-export type LlmPricingTierWithPrices = {
-  id: string;
-  name: string;
-  isDefault: boolean;
-  priority: number;
-  conditions: PricingCondition[];
-  prices: LlmPriceEntry[];
-};
-
-export type LlmModelWithPricing = {
-  id: string;
-  friendlyId: string;
-  modelName: string;
-  matchPattern: string;
-  startDate: Date | null;
-  pricingTiers: LlmPricingTierWithPrices[];
-};
-
-export type LlmCostResult = {
-  matchedModelId: string;
-  matchedModelName: string;
-  pricingTierId: string;
-  pricingTierName: string;
-  inputCost: number;
-  outputCost: number;
-  totalCost: number;
-  costDetails: Record<string, number>;
-};
-
-export type DefaultModelDefinition = {
-  modelName: string;
-  matchPattern: string;
-  startDate?: string;
-  pricingTiers: Array<{
-    name: string;
-    isDefault: boolean;
-    priority: number;
-    conditions: PricingCondition[];
-    prices: Record<string, number>;
-  }>;
-};
diff --git a/internal-packages/tsql/src/index.test.ts b/internal-packages/tsql/src/index.test.ts
index 7a5182668d9..a93fb0fd4b0 100644
--- a/internal-packages/tsql/src/index.test.ts
+++ b/internal-packages/tsql/src/index.test.ts
@@ -53,7 +53,6 @@ const lookupTableSchema: TableSchema = {
 /**
  * Test table schema WITHOUT tenant columns (e.g., global reference data)
  */
-// @ts-expect-error - tenant columns are required but not set
 const nonTenantTableSchema: TableSchema = {
   name: "reference_data",
   clickhouseName: "trigger_dev.reference_data",
diff --git a/internal-packages/tsql/src/query/functions.ts b/internal-packages/tsql/src/query/functions.ts
index fcb5dd6e3d0..f184ed8f382 100644
--- a/internal-packages/tsql/src/query/functions.ts
+++ b/internal-packages/tsql/src/query/functions.ts
@@ -531,6 +531,14 @@ export const TSQL_AGGREGATIONS: Record<string, TSQLFunctionMeta> = {
   quantile: { clickhouseName: "quantile", minArgs: 1, maxArgs: 1, minParams: 1, maxParams: 1, aggregate: true },
   quantileIf: { clickhouseName: "quantileIf", minArgs: 2, maxArgs: 2, minParams: 1, maxParams: 1, aggregate: true },
   quantiles: { clickhouseName: "quantiles", minArgs: 1, aggregate: true },
+  // -Merge combinators for AggregatingMergeTree tables
+  quantilesMerge: { clickhouseName: "quantilesMerge", minArgs: 1, maxArgs: 1, minParams: 1, aggregate: true },
+  quantileMerge: { clickhouseName: "quantileMerge", minArgs: 1, maxArgs: 1, minParams: 1, maxParams: 1, aggregate: true },
+  sumMerge: { clickhouseName: "sumMerge", minArgs: 1, maxArgs: 1, aggregate: true },
+  avgMerge: { clickhouseName: "avgMerge", minArgs: 1, maxArgs: 1, aggregate: true },
+  countMerge: { clickhouseName: "countMerge", minArgs: 1, maxArgs: 1, aggregate: true },
+  minMerge: { clickhouseName: "minMerge", minArgs: 1, maxArgs: 1, aggregate: true },
+  maxMerge: { clickhouseName: "maxMerge", minArgs: 1, maxArgs: 1, aggregate: true },
 
   // Statistical functions
   simpleLinearRegression: { clickhouseName: "simpleLinearRegression", minArgs: 2, maxArgs: 2, aggregate: true },
diff --git a/internal-packages/tsql/src/query/schema.ts b/internal-packages/tsql/src/query/schema.ts
index 00a28382de5..615d112c2f1 100644
--- a/internal-packages/tsql/src/query/schema.ts
+++ b/internal-packages/tsql/src/query/schema.ts
@@ -367,8 +367,8 @@ export interface TableSchema {
   clickhouseName: string;
   /** Column definitions for this table */
   columns: Record<string, ColumnSchema>;
-  /** Tenant isolation column configuration */
-  tenantColumns: TenantColumnConfig;
+  /** Tenant isolation column configuration. Omit for global tables with no tenant scoping. */
+  tenantColumns?: TenantColumnConfig;
   /** Description of the table for documentation/autocomplete */
   description?: string;
   /** Whether this table can be joined to other tables */
@@ -866,9 +866,11 @@ export function sanitizeErrorMessage(message: string, schemas: TableSchema[]): s
     // Map table names
     tableNameMap.set(table.clickhouseName, table.name);
 
-    // Collect tenant column names to strip
+    // Collect tenant column names to strip (global tables have no tenant columns)
     const tenantCols = table.tenantColumns;
-    columnsToStrip.push(tenantCols.organizationId, tenantCols.projectId, tenantCols.environmentId);
+    if (tenantCols) {
+      columnsToStrip.push(tenantCols.organizationId, tenantCols.projectId, tenantCols.environmentId);
+    }
 
     // Collect required filter columns to strip
     if (table.requiredFilters) {
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index bc27428575a..7d96d581f40 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -305,9 +305,9 @@ importers:
       '@internal/cache':
         specifier: workspace:*
         version: link:../../internal-packages/cache
-      '@internal/llm-pricing':
+      '@internal/llm-model-catalog':
         specifier: workspace:*
-        version: link:../../internal-packages/llm-pricing
+        version: link:../../internal-packages/llm-model-catalog
       '@internal/redis':
         specifier: workspace:*
         version: link:../../internal-packages/redis
@@ -1131,7 +1131,7 @@ importers:
         specifier: 18.2.69
         version: 18.2.69
 
-  internal-packages/llm-pricing:
+  internal-packages/llm-model-catalog:
     dependencies:
       '@trigger.dev/core':
         specifier: workspace:*