Variable textModels`Const`

textModels: readonly [
    {
        cachedInputTokenCost: 0.075;
        description: "GPT-4o mini ('o' for 'omni') is a fast, affordable small model for focused tasks. It accepts both text and image inputs, and produces text outputs (including Structured Outputs). It is ideal for fine-tuning, and model outputs from a larger model like GPT-4o can be distilled to GPT-4o-mini to produce similar results at lower cost and latency. Knowledge cutoff: July 2025.";
        inputTokenCost: 0.15;
        maxInputTokens: 128000;
        maxOutputTokens: 16384;
        modelName: "gpt-4o-mini";
        outputTokenCost: 0.6;
        outputTokensPerSecond: 65;
        provider: "openai";
        type: "text";
    },
    {
        cachedInputTokenCost: 1.25;
        description: "GPT-4o ('o' for 'omni') is our versatile, high-intelligence flagship model. It accepts both text and image inputs, and produces text outputs (including Structured Outputs). Knowledge cutoff: April 2024.";
        inputTokenCost: 2.5;
        maxInputTokens: 128000;
        maxOutputTokens: 16384;
        modelName: "gpt-4o";
        outputTokenCost: 10;
        outputTokensPerSecond: 143;
        provider: "openai";
        type: "text";
    },
    {
        cachedInputTokenCost: 0.5;
        description: "o3 is a reasoning model that sets a new standard for math, science, coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. Knowledge cutoff: June 2024.";
        inputTokenCost: 2;
        maxInputTokens: 200000;
        maxOutputTokens: 100000;
        modelName: "o3";
        outputTokenCost: 8;
        outputTokensPerSecond: 94;
        provider: "openai";
        reasoning: {
            canDisable: false;
            defaultLevel: "medium";
            levels: readonly ["low", "medium", "high"];
            outputsSignatures: false;
            outputsThinking: false;
        };
        type: "text";
    },
    {
        cachedInputTokenCost: 0.55;
        description: "o3-mini is our most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks. Knowledge cutoff: June 2024.";
        inputTokenCost: 1.1;
        maxInputTokens: 500000;
        maxOutputTokens: 100000;
        modelName: "o3-mini";
        outputTokenCost: 4.4;
        outputTokensPerSecond: 214;
        provider: "openai";
        reasoning: {
            canDisable: false;
            defaultLevel: "medium";
            levels: readonly ["low", "medium", "high"];
            outputsSignatures: false;
            outputsThinking: false;
        };
        type: "text";
    },
    {
        cachedInputTokenCost: 0.3;
        description: "Latest small o-series model optimized for fast, effective reasoning with exceptional performance in coding and visual tasks. Knowledge cutoff: June 2024.";
        inputTokenCost: 0.6;
        maxInputTokens: 200000;
        maxOutputTokens: 100000;
        modelName: "o4-mini";
        outputTokenCost: 2.4;
        outputTokensPerSecond: 135;
        provider: "openai";
        reasoning: {
            canDisable: false;
            defaultLevel: "medium";
            levels: readonly ["low", "medium", "high"];
            outputsSignatures: false;
            outputsThinking: false;
        };
        type: "text";
    },
    {
        description: "o3-pro uses more compute for complex reasoning tasks. Available via Responses API only. Requests may take several minutes. Knowledge cutoff: June 2024.";
        inputTokenCost: 20;
        maxInputTokens: 200000;
        maxOutputTokens: 100000;
        modelName: "o3-pro";
        outputTokenCost: 80;
        provider: "openai";
        reasoning: {
            canDisable: false;
            outputsSignatures: false;
            outputsThinking: false;
        };
        type: "text";
    },
    {
        cachedInputTokenCost: 7.5;
        description: "o1 is a reasoning model designed to excel at complex reasoning tasks including science, math, and coding. The knowledge cutoff for o1 models is October, 2023.";
        inputTokenCost: 15;
        maxInputTokens: 200000;
        maxOutputTokens: 100000;
        modelName: "o1";
        outputTokenCost: 60;
        outputTokensPerSecond: 100;
        provider: "openai";
        reasoning: {
            canDisable: false;
            defaultLevel: "medium";
            levels: readonly ["low", "medium", "high"];
            outputsSignatures: false;
            outputsThinking: false;
        };
        type: "text";
    },
    {
        description: "GPT-4 is an older version of a high-intelligence GPT model, usable in Chat Completions. Learn more in the text generation guide. The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.";
        disabled: true;
        inputTokenCost: 10;
        maxInputTokens: 128000;
        maxOutputTokens: 4096;
        modelName: "gpt-4-turbo";
        outputTokenCost: 30;
        provider: "openai";
        type: "text";
    },
    {
        description: "GPT-4 is an older version of a high-intelligence GPT model, usable in Chat Completions. Learn more in the text generation guide. The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.";
        disabled: true;
        inputTokenCost: 30;
        maxInputTokens: 8192;
        maxOutputTokens: 8192;
        modelName: "gpt-4";
        outputTokenCost: 60;
        provider: "openai";
        type: "text";
    },
    {
        description: "GPT-3.5 Turbo models can understand and generate natural language or code and have been optimized for chat using the Chat Completions API but work well for non-chat tasks as well. gpt-4o-mini should be used in place of gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.";
        disabled: true;
        inputTokenCost: 0.5;
        maxInputTokens: 16385;
        maxOutputTokens: 4096;
        modelName: "gpt-3.5-turbo";
        outputTokenCost: 1.5;
        provider: "openai";
        type: "text";
    },
    {
        cachedInputTokenCost: 0.5;
        description: "GPT-4.1 excels at instruction following and tool calling with 1M token context window. Knowledge cutoff: June 2024.";
        inputTokenCost: 2;
        maxInputTokens: 1047576;
        maxOutputTokens: 32768;
        modelName: "gpt-4.1";
        outputTokenCost: 8;
        outputTokensPerSecond: 105;
        provider: "openai";
        type: "text";
    },
    {
        cachedInputTokenCost: 0.1;
        description: "GPT-4.1 mini excels at instruction following and tool calling with 1M token context window and low latency. Knowledge cutoff: June 2024.";
        inputTokenCost: 0.4;
        maxInputTokens: 1047576;
        maxOutputTokens: 32768;
        modelName: "gpt-4.1-mini";
        outputTokenCost: 1.6;
        outputTokensPerSecond: 78;
        provider: "openai";
        type: "text";
    },
    {
        cachedInputTokenCost: 0.025;
        description: "GPT-4.1 nano is the fastest and most affordable GPT-4.1 variant with 1M token context window. Knowledge cutoff: June 2024.";
        inputTokenCost: 0.1;
        maxInputTokens: 1047576;
        maxOutputTokens: 32768;
        modelName: "gpt-4.1-nano";
        outputTokenCost: 0.4;
        outputTokensPerSecond: 142;
        provider: "openai";
        type: "text";
    },
    {
        cachedInputTokenCost: 0.125;
        description: "GPT-5 is a frontier reasoning model with 400K context window. Supports reasoning tokens. Knowledge cutoff: September 2024.";
        inputTokenCost: 1.25;
        maxInputTokens: 400000;
        maxOutputTokens: 128000;
        modelName: "gpt-5";
        outputTokenCost: 10;
        outputTokensPerSecond: 72;
        provider: "openai";
        reasoning: {
            canDisable: false;
            defaultLevel: "medium";
            levels: readonly ["minimal", "low", "medium", "high"];
            outputsSignatures: false;
            outputsThinking: false;
        };
        type: "text";
    },
    {
        cachedInputTokenCost: 0.025;
        description: "GPT-5 mini is a faster, more cost-efficient version of GPT-5 with 400K context window. Knowledge cutoff: May 2024.";
        inputTokenCost: 0.25;
        maxInputTokens: 400000;
        maxOutputTokens: 128000;
        modelName: "gpt-5-mini";
        outputTokenCost: 2;
        outputTokensPerSecond: 69;
        provider: "openai";
        reasoning: {
            canDisable: false;
            defaultLevel: "medium";
            levels: readonly ["minimal", "low", "medium", "high"];
            outputsSignatures: false;
            outputsThinking: false;
        };
        type: "text";
    },
    {
        cachedInputTokenCost: 0.005;
        description: "GPT-5 nano is the fastest and most affordable GPT-5 variant with 400K context window. Knowledge cutoff: May 2024.";
        inputTokenCost: 0.05;
        maxInputTokens: 400000;
        maxOutputTokens: 128000;
        modelName: "gpt-5-nano";
        outputTokenCost: 0.4;
        outputTokensPerSecond: 140;
        provider: "openai";
        reasoning: {
            canDisable: false;
            defaultLevel: "medium";
            levels: readonly ["minimal", "low", "medium", "high"];
            outputsSignatures: false;
            outputsThinking: false;
        };
        type: "text";
    },
    {
        cachedInputTokenCost: 0.125;
        description: "GPT-5.1 is the flagship model for coding and agentic tasks with configurable reasoning effort. 400K context window. Knowledge cutoff: September 2024.";
        inputTokenCost: 1.25;
        maxInputTokens: 400000;
        maxOutputTokens: 128000;
        modelName: "gpt-5.1";
        outputTokenCost: 10;
        provider: "openai";
        reasoning: {
            canDisable: true;
            defaultLevel: "none";
            levels: readonly ["none", "low", "medium", "high"];
            outputsSignatures: false;
            outputsThinking: false;
        };
        type: "text";
    },
    {
        cachedInputTokenCost: 0.175;
        description: "GPT-5.2 is the flagship model for coding and agentic tasks across industries. 400K context window. Knowledge cutoff: August 2025.";
        inputTokenCost: 1.75;
        maxInputTokens: 400000;
        maxOutputTokens: 128000;
        modelName: "gpt-5.2";
        outputTokenCost: 14;
        outputTokensPerSecond: 61;
        provider: "openai";
        reasoning: {
            canDisable: true;
            defaultLevel: "none";
            levels: readonly ["none", "low", "medium", "high"];
            outputsSignatures: false;
            outputsThinking: false;
        };
        type: "text";
    },
    {
        description: "GPT-5.2 Pro uses more compute for complex reasoning tasks. 400K context window. Knowledge cutoff: August 2025.";
        inputTokenCost: 21;
        maxInputTokens: 400000;
        maxOutputTokens: 128000;
        modelName: "gpt-5.2-pro";
        outputTokenCost: 168;
        provider: "openai";
        reasoning: {
            canDisable: false;
            outputsSignatures: false;
            outputsThinking: false;
        };
        type: "text";
    },
    {
        cachedInputTokenCost: 0.25;
        description: "GPT-5.4 is the most capable and efficient frontier model for complex professional work. 1M context window, state-of-the-art coding and tool use. Standard pricing for ≤272K tokens, 2x input/1.5x output for >272K. Knowledge cutoff: August 2025.";
        inputTokenCost: 2.5;
        maxInputTokens: 1050000;
        maxOutputTokens: 128000;
        modelName: "gpt-5.4";
        outputTokenCost: 15;
        provider: "openai";
        reasoning: {
            canDisable: true;
            defaultLevel: "none";
            levels: readonly ["none", "low", "medium", "high", "xhigh"];
            outputsSignatures: false;
            outputsThinking: false;
        };
        type: "text";
    },
    {
        description: "GPT-5.4 Pro uses more compute for complex reasoning tasks. 1M context window. Standard pricing for ≤272K tokens. Knowledge cutoff: August 2025.";
        inputTokenCost: 30;
        maxInputTokens: 1050000;
        maxOutputTokens: 128000;
        modelName: "gpt-5.4-pro";
        outputTokenCost: 180;
        provider: "openai";
        reasoning: {
            canDisable: false;
            defaultLevel: "medium";
            levels: readonly ["medium", "high", "xhigh"];
            outputsSignatures: false;
            outputsThinking: false;
        };
        type: "text";
    },
    {
        description: "Latest Gemini 3.1 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Feb 2026.";
        inputTokenCost: 2;
        maxInputTokens: 1048576;
        maxOutputTokens: 65536;
        modelName: "gemini-3.1-pro-preview";
        outputTokenCost: 12;
        outputTokensPerSecond: 112;
        provider: "google";
        reasoning: {
            canDisable: false;
            defaultLevel: "high";
            levels: readonly ["low", "medium", "high"];
            outputsSignatures: true;
            outputsThinking: true;
        };
        type: "text";
    },
    {
        description: "DEPRECATED: Shut down March 9, 2026. Use gemini-3.1-pro-preview instead.";
        disabled: true;
        inputTokenCost: 2;
        maxInputTokens: 1048576;
        maxOutputTokens: 65536;
        modelName: "gemini-3-pro-preview";
        outputTokenCost: 12;
        provider: "google";
        type: "text";
    },
    {
        description: "Latest Gemini 3 flash model with 1M context window and 64K output. Outperforms 2.5 Pro while being 3x faster. Optimized for agentic workflows and coding. Includes context caching for 90% cost reductions.";
        inputTokenCost: 0.5;
        maxInputTokens: 1048576;
        maxOutputTokens: 65536;
        modelName: "gemini-3-flash-preview";
        outputTokenCost: 3;
        outputTokensPerSecond: 146;
        provider: "google";
        reasoning: {
            canDisable: false;
            defaultLevel: "high";
            levels: readonly ["minimal", "low", "medium", "high"];
            outputsSignatures: true;
            outputsThinking: true;
        };
        type: "text";
    },
    {
        description: "Most cost-effective Gemini 3.1 model with thinking support and 1M context window. 2.5x faster TTFA and 45% faster output than 2.5 Flash. Released March 2026.";
        inputTokenCost: 0.25;
        maxInputTokens: 1048576;
        maxOutputTokens: 65536;
        modelName: "gemini-3.1-flash-lite-preview";
        outputTokenCost: 1.5;
        outputTokensPerSecond: 379;
        provider: "google";
        reasoning: {
            canDisable: false;
            defaultLevel: "minimal";
            levels: readonly ["minimal", "low", "medium", "high"];
            outputsSignatures: true;
            outputsThinking: true;
        };
        type: "text";
    },
    {
        description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/$15.00 output). Batch API: 50% discount.";
        inputTokenCost: 1.25;
        maxInputTokens: 2097152;
        maxOutputTokens: 65536;
        modelName: "gemini-2.5-pro";
        outputTokenCost: 10;
        outputTokensPerSecond: 134;
        provider: "google";
        reasoning: {
            canDisable: false;
            outputsSignatures: true;
            outputsThinking: true;
        };
        type: "text";
    },
    {
        description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.";
        inputTokenCost: 0.3;
        maxInputTokens: 1048576;
        maxOutputTokens: 65536;
        modelName: "gemini-2.5-flash";
        outputTokenCost: 2.5;
        outputTokensPerSecond: 245;
        provider: "google";
        reasoning: {
            canDisable: true;
            outputsSignatures: true;
            outputsThinking: true;
        };
        type: "text";
    },
    {
        description: "Most cost-effective Gemini 2.5 option for high-throughput applications. 1M context window.";
        inputTokenCost: 0.1;
        maxInputTokens: 1048576;
        maxOutputTokens: 65536;
        modelName: "gemini-2.5-flash-lite";
        outputTokenCost: 0.4;
        outputTokensPerSecond: 400;
        provider: "google";
        reasoning: {
            canDisable: true;
            outputsSignatures: false;
            outputsThinking: true;
        };
        type: "text";
    },
    {
        description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window. DEPRECATED: Will be shut down on March 31, 2026.";
        disabled: true;
        inputTokenCost: 0.1;
        maxInputTokens: 1048576;
        maxOutputTokens: 8192;
        modelName: "gemini-2.0-flash";
        outputTokenCost: 0.4;
        outputTokensPerSecond: 213;
        provider: "google";
        type: "text";
    },
    {
        description: "Strongest model quality, especially for code & world knowledge; 2M long context. In private beta.";
        disabled: true;
        inputTokenCost: 0.5;
        maxInputTokens: 2097152;
        maxOutputTokens: 8192;
        modelName: "gemini-2.0-pro-exp-02-05";
        outputTokenCost: 1.5;
        provider: "google";
        type: "text";
    },
    {
        description: "Cost effective offering to support high throughput. DEPRECATED: Will be shut down on March 31, 2026. Use gemini-2.5-flash-lite instead.";
        disabled: true;
        inputTokenCost: 0.075;
        maxInputTokens: 1048576;
        maxOutputTokens: 8192;
        modelName: "gemini-2.0-flash-lite";
        outputTokenCost: 0.3;
        provider: "google";
        type: "text";
    },
    {
        costUnit: "characters";
        description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
        disabled: true;
        inputTokenCost: 0.01875;
        maxInputTokens: 1048576;
        maxOutputTokens: 8192;
        modelName: "gemini-1.5-flash";
        outputTokenCost: 0.075;
        outputTokensPerSecond: 178;
        provider: "google";
        type: "text";
    },
    {
        costUnit: "characters";
        description: "RETIRED: No longer available. Use gemini-2.5-pro instead.";
        disabled: true;
        inputTokenCost: 0.3125;
        maxInputTokens: 2097152;
        maxOutputTokens: 8192;
        modelName: "gemini-1.5-pro";
        outputTokenCost: 1.25;
        outputTokensPerSecond: 59;
        provider: "google";
        type: "text";
    },
    {
        costUnit: "characters";
        description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
        disabled: true;
        inputTokenCost: 0.125;
        maxInputTokens: 32760;
        maxOutputTokens: 8192;
        modelName: "gemini-1.0-pro";
        outputTokenCost: 0.375;
        provider: "google";
        type: "text";
    },
    {
        cachedInputTokenCost: 0.5;
        description: "The most intelligent Claude model for building agents and coding. 200K context window (1M in beta), 128K max output.";
        inputTokenCost: 5;
        maxInputTokens: 200000;
        maxOutputTokens: 131072;
        modelName: "claude-opus-4-6";
        outputTokenCost: 25;
        outputTokensPerSecond: 53;
        provider: "anthropic";
        reasoning: {
            canDisable: true;
            outputsSignatures: true;
            outputsThinking: true;
        };
        type: "text";
    },
    {
        cachedInputTokenCost: 0.3;
        description: "The best combination of speed and intelligence. 200K context window (1M in beta), 64K max output.";
        inputTokenCost: 3;
        maxInputTokens: 200000;
        maxOutputTokens: 64000;
        modelName: "claude-sonnet-4-6";
        outputTokenCost: 15;
        provider: "anthropic";
        reasoning: {
            canDisable: true;
            outputsSignatures: true;
            outputsThinking: true;
        };
        type: "text";
    },
    {
        cachedInputTokenCost: 0.1;
        description: "The fastest Claude model with near-frontier intelligence. 200K context window, 64K max output.";
        inputTokenCost: 1;
        maxInputTokens: 200000;
        maxOutputTokens: 64000;
        modelName: "claude-haiku-4-5-20251001";
        outputTokenCost: 5;
        outputTokensPerSecond: 97;
        provider: "anthropic";
        reasoning: {
            canDisable: true;
            outputsSignatures: true;
            outputsThinking: true;
        };
        type: "text";
    },
    {
        description: "Claude 3.7 Sonnet — legacy model. Use claude-sonnet-4-6 instead.";
        disabled: true;
        inputTokenCost: 3;
        maxInputTokens: 200000;
        maxOutputTokens: 8192;
        modelName: "claude-3-7-sonnet-latest";
        outputTokenCost: 15;
        outputTokensPerSecond: 78;
        provider: "anthropic";
        reasoning: {
            canDisable: true;
            outputsSignatures: true;
            outputsThinking: true;
        };
        type: "text";
    },
    {
        description: "Claude 3.5 Haiku — legacy model. Use claude-haiku-4-5-20251001 instead.";
        disabled: true;
        inputTokenCost: 0.8;
        maxInputTokens: 200000;
        maxOutputTokens: 8192;
        modelName: "claude-3-5-haiku-latest";
        outputTokenCost: 4;
        outputTokensPerSecond: 66;
        provider: "anthropic";
        type: "text";
    },
] = ...

Variable textModelsConst

Settings

Variable textModels`Const`