smoltalk
    Preparing search index...

    Variable textModelsConst

    textModels: readonly [
        {
            cachedInputTokenCost: 0.075;
            description: "GPT-4o mini ('o' for 'omni') is a fast, affordable small model for focused tasks. It accepts both text and image inputs, and produces text outputs (including Structured Outputs). It is ideal for fine-tuning, and model outputs from a larger model like GPT-4o can be distilled to GPT-4o-mini to produce similar results at lower cost and latency. Knowledge cutoff: July 2025.";
            inputTokenCost: 0.15;
            maxInputTokens: 128000;
            maxOutputTokens: 16384;
            modelName: "gpt-4o-mini";
            outputTokenCost: 0.6;
            outputTokensPerSecond: 65;
            provider: "openai";
            type: "text";
        },
        {
            cachedInputTokenCost: 1.25;
            description: "GPT-4o ('o' for 'omni') is our versatile, high-intelligence flagship model. It accepts both text and image inputs, and produces text outputs (including Structured Outputs). Knowledge cutoff: April 2024.";
            inputTokenCost: 2.5;
            maxInputTokens: 128000;
            maxOutputTokens: 16384;
            modelName: "gpt-4o";
            outputTokenCost: 10;
            outputTokensPerSecond: 143;
            provider: "openai";
            type: "text";
        },
        {
            cachedInputTokenCost: 0.5;
            description: "o3 is a reasoning model that sets a new standard for math, science, coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. Knowledge cutoff: June 2024.";
            inputTokenCost: 2;
            maxInputTokens: 200000;
            maxOutputTokens: 100000;
            modelName: "o3";
            outputTokenCost: 8;
            outputTokensPerSecond: 94;
            provider: "openai";
            reasoning: {
                canDisable: false;
                defaultLevel: "medium";
                levels: readonly ["low", "medium", "high"];
                outputsSignatures: false;
                outputsThinking: false;
            };
            type: "text";
        },
        {
            cachedInputTokenCost: 0.55;
            description: "o3-mini is our most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks. Knowledge cutoff: June 2024.";
            inputTokenCost: 1.1;
            maxInputTokens: 500000;
            maxOutputTokens: 100000;
            modelName: "o3-mini";
            outputTokenCost: 4.4;
            outputTokensPerSecond: 214;
            provider: "openai";
            reasoning: {
                canDisable: false;
                defaultLevel: "medium";
                levels: readonly ["low", "medium", "high"];
                outputsSignatures: false;
                outputsThinking: false;
            };
            type: "text";
        },
        {
            cachedInputTokenCost: 0.3;
            description: "Latest small o-series model optimized for fast, effective reasoning with exceptional performance in coding and visual tasks. Knowledge cutoff: June 2024.";
            inputTokenCost: 0.6;
            maxInputTokens: 200000;
            maxOutputTokens: 100000;
            modelName: "o4-mini";
            outputTokenCost: 2.4;
            outputTokensPerSecond: 135;
            provider: "openai";
            reasoning: {
                canDisable: false;
                defaultLevel: "medium";
                levels: readonly ["low", "medium", "high"];
                outputsSignatures: false;
                outputsThinking: false;
            };
            type: "text";
        },
        {
            description: "o3-pro uses more compute for complex reasoning tasks. Available via Responses API only. Requests may take several minutes. Knowledge cutoff: June 2024.";
            inputTokenCost: 20;
            maxInputTokens: 200000;
            maxOutputTokens: 100000;
            modelName: "o3-pro";
            outputTokenCost: 80;
            provider: "openai";
            reasoning: {
                canDisable: false;
                outputsSignatures: false;
                outputsThinking: false;
            };
            type: "text";
        },
        {
            cachedInputTokenCost: 7.5;
            description: "o1 is a reasoning model designed to excel at complex reasoning tasks including science, math, and coding. The knowledge cutoff for o1 models is October, 2023.";
            inputTokenCost: 15;
            maxInputTokens: 200000;
            maxOutputTokens: 100000;
            modelName: "o1";
            outputTokenCost: 60;
            outputTokensPerSecond: 100;
            provider: "openai";
            reasoning: {
                canDisable: false;
                defaultLevel: "medium";
                levels: readonly ["low", "medium", "high"];
                outputsSignatures: false;
                outputsThinking: false;
            };
            type: "text";
        },
        {
            description: "GPT-4 is an older version of a high-intelligence GPT model, usable in Chat Completions. Learn more in the text generation guide. The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.";
            disabled: true;
            inputTokenCost: 10;
            maxInputTokens: 128000;
            maxOutputTokens: 4096;
            modelName: "gpt-4-turbo";
            outputTokenCost: 30;
            provider: "openai";
            type: "text";
        },
        {
            description: "GPT-4 is an older version of a high-intelligence GPT model, usable in Chat Completions. Learn more in the text generation guide. The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.";
            disabled: true;
            inputTokenCost: 30;
            maxInputTokens: 8192;
            maxOutputTokens: 8192;
            modelName: "gpt-4";
            outputTokenCost: 60;
            provider: "openai";
            type: "text";
        },
        {
            description: "GPT-3.5 Turbo models can understand and generate natural language or code and have been optimized for chat using the Chat Completions API but work well for non-chat tasks as well. gpt-4o-mini should be used in place of gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.";
            disabled: true;
            inputTokenCost: 0.5;
            maxInputTokens: 16385;
            maxOutputTokens: 4096;
            modelName: "gpt-3.5-turbo";
            outputTokenCost: 1.5;
            provider: "openai";
            type: "text";
        },
        {
            cachedInputTokenCost: 0.5;
            description: "GPT-4.1 excels at instruction following and tool calling with 1M token context window. Knowledge cutoff: June 2024.";
            inputTokenCost: 2;
            maxInputTokens: 1047576;
            maxOutputTokens: 32768;
            modelName: "gpt-4.1";
            outputTokenCost: 8;
            outputTokensPerSecond: 105;
            provider: "openai";
            type: "text";
        },
        {
            cachedInputTokenCost: 0.1;
            description: "GPT-4.1 mini excels at instruction following and tool calling with 1M token context window and low latency. Knowledge cutoff: June 2024.";
            inputTokenCost: 0.4;
            maxInputTokens: 1047576;
            maxOutputTokens: 32768;
            modelName: "gpt-4.1-mini";
            outputTokenCost: 1.6;
            outputTokensPerSecond: 78;
            provider: "openai";
            type: "text";
        },
        {
            cachedInputTokenCost: 0.025;
            description: "GPT-4.1 nano is the fastest and most affordable GPT-4.1 variant with 1M token context window. Knowledge cutoff: June 2024.";
            inputTokenCost: 0.1;
            maxInputTokens: 1047576;
            maxOutputTokens: 32768;
            modelName: "gpt-4.1-nano";
            outputTokenCost: 0.4;
            outputTokensPerSecond: 142;
            provider: "openai";
            type: "text";
        },
        {
            cachedInputTokenCost: 0.125;
            description: "GPT-5 is a frontier reasoning model with 400K context window. Supports reasoning tokens. Knowledge cutoff: September 2024.";
            inputTokenCost: 1.25;
            maxInputTokens: 400000;
            maxOutputTokens: 128000;
            modelName: "gpt-5";
            outputTokenCost: 10;
            outputTokensPerSecond: 72;
            provider: "openai";
            reasoning: {
                canDisable: false;
                defaultLevel: "medium";
                levels: readonly ["minimal", "low", "medium", "high"];
                outputsSignatures: false;
                outputsThinking: false;
            };
            type: "text";
        },
        {
            cachedInputTokenCost: 0.025;
            description: "GPT-5 mini is a faster, more cost-efficient version of GPT-5 with 400K context window. Knowledge cutoff: May 2024.";
            inputTokenCost: 0.25;
            maxInputTokens: 400000;
            maxOutputTokens: 128000;
            modelName: "gpt-5-mini";
            outputTokenCost: 2;
            outputTokensPerSecond: 69;
            provider: "openai";
            reasoning: {
                canDisable: false;
                defaultLevel: "medium";
                levels: readonly ["minimal", "low", "medium", "high"];
                outputsSignatures: false;
                outputsThinking: false;
            };
            type: "text";
        },
        {
            cachedInputTokenCost: 0.005;
            description: "GPT-5 nano is the fastest and most affordable GPT-5 variant with 400K context window. Knowledge cutoff: May 2024.";
            inputTokenCost: 0.05;
            maxInputTokens: 400000;
            maxOutputTokens: 128000;
            modelName: "gpt-5-nano";
            outputTokenCost: 0.4;
            outputTokensPerSecond: 140;
            provider: "openai";
            reasoning: {
                canDisable: false;
                defaultLevel: "medium";
                levels: readonly ["minimal", "low", "medium", "high"];
                outputsSignatures: false;
                outputsThinking: false;
            };
            type: "text";
        },
        {
            cachedInputTokenCost: 0.125;
            description: "GPT-5.1 is the flagship model for coding and agentic tasks with configurable reasoning effort. 400K context window. Knowledge cutoff: September 2024.";
            inputTokenCost: 1.25;
            maxInputTokens: 400000;
            maxOutputTokens: 128000;
            modelName: "gpt-5.1";
            outputTokenCost: 10;
            provider: "openai";
            reasoning: {
                canDisable: true;
                defaultLevel: "none";
                levels: readonly ["none", "low", "medium", "high"];
                outputsSignatures: false;
                outputsThinking: false;
            };
            type: "text";
        },
        {
            cachedInputTokenCost: 0.175;
            description: "GPT-5.2 is the flagship model for coding and agentic tasks across industries. 400K context window. Knowledge cutoff: August 2025.";
            inputTokenCost: 1.75;
            maxInputTokens: 400000;
            maxOutputTokens: 128000;
            modelName: "gpt-5.2";
            outputTokenCost: 14;
            outputTokensPerSecond: 61;
            provider: "openai";
            reasoning: {
                canDisable: true;
                defaultLevel: "none";
                levels: readonly ["none", "low", "medium", "high"];
                outputsSignatures: false;
                outputsThinking: false;
            };
            type: "text";
        },
        {
            description: "GPT-5.2 Pro uses more compute for complex reasoning tasks. 400K context window. Knowledge cutoff: August 2025.";
            inputTokenCost: 21;
            maxInputTokens: 400000;
            maxOutputTokens: 128000;
            modelName: "gpt-5.2-pro";
            outputTokenCost: 168;
            provider: "openai";
            reasoning: {
                canDisable: false;
                outputsSignatures: false;
                outputsThinking: false;
            };
            type: "text";
        },
        {
            cachedInputTokenCost: 0.25;
            description: "GPT-5.4 is the most capable and efficient frontier model for complex professional work. 1M context window, state-of-the-art coding and tool use. Standard pricing for ≤272K tokens, 2x input/1.5x output for >272K. Knowledge cutoff: August 2025.";
            inputTokenCost: 2.5;
            maxInputTokens: 1050000;
            maxOutputTokens: 128000;
            modelName: "gpt-5.4";
            outputTokenCost: 15;
            provider: "openai";
            reasoning: {
                canDisable: true;
                defaultLevel: "none";
                levels: readonly ["none", "low", "medium", "high", "xhigh"];
                outputsSignatures: false;
                outputsThinking: false;
            };
            type: "text";
        },
        {
            description: "GPT-5.4 Pro uses more compute for complex reasoning tasks. 1M context window. Standard pricing for ≤272K tokens. Knowledge cutoff: August 2025.";
            inputTokenCost: 30;
            maxInputTokens: 1050000;
            maxOutputTokens: 128000;
            modelName: "gpt-5.4-pro";
            outputTokenCost: 180;
            provider: "openai";
            reasoning: {
                canDisable: false;
                defaultLevel: "medium";
                levels: readonly ["medium", "high", "xhigh"];
                outputsSignatures: false;
                outputsThinking: false;
            };
            type: "text";
        },
        {
            description: "Latest Gemini 3.1 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Feb 2026.";
            inputTokenCost: 2;
            maxInputTokens: 1048576;
            maxOutputTokens: 65536;
            modelName: "gemini-3.1-pro-preview";
            outputTokenCost: 12;
            outputTokensPerSecond: 112;
            provider: "google";
            reasoning: {
                canDisable: false;
                defaultLevel: "high";
                levels: readonly ["low", "medium", "high"];
                outputsSignatures: true;
                outputsThinking: true;
            };
            type: "text";
        },
        {
            description: "DEPRECATED: Shut down March 9, 2026. Use gemini-3.1-pro-preview instead.";
            disabled: true;
            inputTokenCost: 2;
            maxInputTokens: 1048576;
            maxOutputTokens: 65536;
            modelName: "gemini-3-pro-preview";
            outputTokenCost: 12;
            provider: "google";
            type: "text";
        },
        {
            description: "Latest Gemini 3 flash model with 1M context window and 64K output. Outperforms 2.5 Pro while being 3x faster. Optimized for agentic workflows and coding. Includes context caching for 90% cost reductions.";
            inputTokenCost: 0.5;
            maxInputTokens: 1048576;
            maxOutputTokens: 65536;
            modelName: "gemini-3-flash-preview";
            outputTokenCost: 3;
            outputTokensPerSecond: 146;
            provider: "google";
            reasoning: {
                canDisable: false;
                defaultLevel: "high";
                levels: readonly ["minimal", "low", "medium", "high"];
                outputsSignatures: true;
                outputsThinking: true;
            };
            type: "text";
        },
        {
            description: "Most cost-effective Gemini 3.1 model with thinking support and 1M context window. 2.5x faster TTFA and 45% faster output than 2.5 Flash. Released March 2026.";
            inputTokenCost: 0.25;
            maxInputTokens: 1048576;
            maxOutputTokens: 65536;
            modelName: "gemini-3.1-flash-lite-preview";
            outputTokenCost: 1.5;
            outputTokensPerSecond: 379;
            provider: "google";
            reasoning: {
                canDisable: false;
                defaultLevel: "minimal";
                levels: readonly ["minimal", "low", "medium", "high"];
                outputsSignatures: true;
                outputsThinking: true;
            };
            type: "text";
        },
        {
            description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/$15.00 output). Batch API: 50% discount.";
            inputTokenCost: 1.25;
            maxInputTokens: 2097152;
            maxOutputTokens: 65536;
            modelName: "gemini-2.5-pro";
            outputTokenCost: 10;
            outputTokensPerSecond: 134;
            provider: "google";
            reasoning: {
                canDisable: false;
                outputsSignatures: true;
                outputsThinking: true;
            };
            type: "text";
        },
        {
            description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.";
            inputTokenCost: 0.3;
            maxInputTokens: 1048576;
            maxOutputTokens: 65536;
            modelName: "gemini-2.5-flash";
            outputTokenCost: 2.5;
            outputTokensPerSecond: 245;
            provider: "google";
            reasoning: {
                canDisable: true;
                outputsSignatures: true;
                outputsThinking: true;
            };
            type: "text";
        },
        {
            description: "Most cost-effective Gemini 2.5 option for high-throughput applications. 1M context window.";
            inputTokenCost: 0.1;
            maxInputTokens: 1048576;
            maxOutputTokens: 65536;
            modelName: "gemini-2.5-flash-lite";
            outputTokenCost: 0.4;
            outputTokensPerSecond: 400;
            provider: "google";
            reasoning: {
                canDisable: true;
                outputsSignatures: false;
                outputsThinking: true;
            };
            type: "text";
        },
        {
            description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window. DEPRECATED: Will be shut down on March 31, 2026.";
            disabled: true;
            inputTokenCost: 0.1;
            maxInputTokens: 1048576;
            maxOutputTokens: 8192;
            modelName: "gemini-2.0-flash";
            outputTokenCost: 0.4;
            outputTokensPerSecond: 213;
            provider: "google";
            type: "text";
        },
        {
            description: "Strongest model quality, especially for code & world knowledge; 2M long context. In private beta.";
            disabled: true;
            inputTokenCost: 0.5;
            maxInputTokens: 2097152;
            maxOutputTokens: 8192;
            modelName: "gemini-2.0-pro-exp-02-05";
            outputTokenCost: 1.5;
            provider: "google";
            type: "text";
        },
        {
            description: "Cost effective offering to support high throughput. DEPRECATED: Will be shut down on March 31, 2026. Use gemini-2.5-flash-lite instead.";
            disabled: true;
            inputTokenCost: 0.075;
            maxInputTokens: 1048576;
            maxOutputTokens: 8192;
            modelName: "gemini-2.0-flash-lite";
            outputTokenCost: 0.3;
            provider: "google";
            type: "text";
        },
        {
            costUnit: "characters";
            description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
            disabled: true;
            inputTokenCost: 0.01875;
            maxInputTokens: 1048576;
            maxOutputTokens: 8192;
            modelName: "gemini-1.5-flash";
            outputTokenCost: 0.075;
            outputTokensPerSecond: 178;
            provider: "google";
            type: "text";
        },
        {
            costUnit: "characters";
            description: "RETIRED: No longer available. Use gemini-2.5-pro instead.";
            disabled: true;
            inputTokenCost: 0.3125;
            maxInputTokens: 2097152;
            maxOutputTokens: 8192;
            modelName: "gemini-1.5-pro";
            outputTokenCost: 1.25;
            outputTokensPerSecond: 59;
            provider: "google";
            type: "text";
        },
        {
            costUnit: "characters";
            description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
            disabled: true;
            inputTokenCost: 0.125;
            maxInputTokens: 32760;
            maxOutputTokens: 8192;
            modelName: "gemini-1.0-pro";
            outputTokenCost: 0.375;
            provider: "google";
            type: "text";
        },
        {
            cachedInputTokenCost: 0.5;
            description: "The most intelligent Claude model for building agents and coding. 200K context window (1M in beta), 128K max output.";
            inputTokenCost: 5;
            maxInputTokens: 200000;
            maxOutputTokens: 131072;
            modelName: "claude-opus-4-6";
            outputTokenCost: 25;
            outputTokensPerSecond: 53;
            provider: "anthropic";
            reasoning: {
                canDisable: true;
                outputsSignatures: true;
                outputsThinking: true;
            };
            type: "text";
        },
        {
            cachedInputTokenCost: 0.3;
            description: "The best combination of speed and intelligence. 200K context window (1M in beta), 64K max output.";
            inputTokenCost: 3;
            maxInputTokens: 200000;
            maxOutputTokens: 64000;
            modelName: "claude-sonnet-4-6";
            outputTokenCost: 15;
            provider: "anthropic";
            reasoning: {
                canDisable: true;
                outputsSignatures: true;
                outputsThinking: true;
            };
            type: "text";
        },
        {
            cachedInputTokenCost: 0.1;
            description: "The fastest Claude model with near-frontier intelligence. 200K context window, 64K max output.";
            inputTokenCost: 1;
            maxInputTokens: 200000;
            maxOutputTokens: 64000;
            modelName: "claude-haiku-4-5-20251001";
            outputTokenCost: 5;
            outputTokensPerSecond: 97;
            provider: "anthropic";
            reasoning: {
                canDisable: true;
                outputsSignatures: true;
                outputsThinking: true;
            };
            type: "text";
        },
        {
            description: "Claude 3.7 Sonnet — legacy model. Use claude-sonnet-4-6 instead.";
            disabled: true;
            inputTokenCost: 3;
            maxInputTokens: 200000;
            maxOutputTokens: 8192;
            modelName: "claude-3-7-sonnet-latest";
            outputTokenCost: 15;
            outputTokensPerSecond: 78;
            provider: "anthropic";
            reasoning: {
                canDisable: true;
                outputsSignatures: true;
                outputsThinking: true;
            };
            type: "text";
        },
        {
            description: "Claude 3.5 Haiku — legacy model. Use claude-haiku-4-5-20251001 instead.";
            disabled: true;
            inputTokenCost: 0.8;
            maxInputTokens: 200000;
            maxOutputTokens: 8192;
            modelName: "claude-3-5-haiku-latest";
            outputTokenCost: 4;
            outputTokensPerSecond: 66;
            provider: "anthropic";
            type: "text";
        },
        {
            description: "Runs via ollama";
            maxInputTokens: 128000;
            maxOutputTokens: 128000;
            modelName: "deepseek-r1:8b";
            provider: "ollama";
            type: "text";
        },
        {
            description: "Runs via ollama";
            maxInputTokens: 128000;
            maxOutputTokens: 128000;
            modelName: "mistral:latest";
            provider: "ollama";
            type: "text";
        },
        {
            description: "Fine tuned Mistral 7B model, chunked into parts of 50 chars each, 100 iterations.";
            maxInputTokens: 8192;
            maxOutputTokens: 8192;
            modelName: "mistral-adapters-chunk50-iters100";
            provider: "local";
            type: "text";
        },
        {
            maxInputTokens: 256;
            maxOutputTokens: 256;
            modelName: "llama-7b";
            provider: "replicate";
            type: "text";
        },
    ] = ...