smoltalk
    Preparing search index...

    Function getModel

    • Parameters

      Returns
          | TextModel
          | { modelName: "whisper-local"; provider: "local"; type: "speech-to-text" }
          | {
              modelName: "whisper-web";
              perMinuteCost: 0.006;
              provider: "openai";
              type: "speech-to-text";
          }
          | {
              cachedInputTokenCost: 0.075;
              description: "GPT-4o mini ('o' for 'omni') is a fast, affordable small model for focused tasks. It accepts both text and image inputs, and produces text outputs (including Structured Outputs). It is ideal for fine-tuning, and model outputs from a larger model like GPT-4o can be distilled to GPT-4o-mini to produce similar results at lower cost and latency. Knowledge cutoff: July 2025.";
              inputTokenCost: 0.15;
              maxInputTokens: 128000;
              maxOutputTokens: 16384;
              modelName: "gpt-4o-mini";
              outputTokenCost: 0.6;
              outputTokensPerSecond: 65;
              provider: "openai";
              type: "text";
          }
          | {
              cachedInputTokenCost: 1.25;
              description: "GPT-4o ('o' for 'omni') is our versatile, high-intelligence flagship model. It accepts both text and image inputs, and produces text outputs (including Structured Outputs). Knowledge cutoff: April 2024.";
              inputTokenCost: 2.5;
              maxInputTokens: 128000;
              maxOutputTokens: 16384;
              modelName: "gpt-4o";
              outputTokenCost: 10;
              outputTokensPerSecond: 143;
              provider: "openai";
              type: "text";
          }
          | {
              cachedInputTokenCost: 0.5;
              description: "o3 is a reasoning model that sets a new standard for math, science, coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. Knowledge cutoff: June 2024.";
              inputTokenCost: 2;
              maxInputTokens: 200000;
              maxOutputTokens: 100000;
              modelName: "o3";
              outputTokenCost: 8;
              outputTokensPerSecond: 94;
              provider: "openai";
              reasoning: {
                  canDisable: false;
                  defaultLevel: "medium";
                  levels: readonly ["low", "medium", "high"];
                  outputsSignatures: false;
                  outputsThinking: false;
              };
              type: "text";
          }
          | {
              cachedInputTokenCost: 0.55;
              description: "o3-mini is our most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks. Knowledge cutoff: June 2024.";
              inputTokenCost: 1.1;
              maxInputTokens: 500000;
              maxOutputTokens: 100000;
              modelName: "o3-mini";
              outputTokenCost: 4.4;
              outputTokensPerSecond: 214;
              provider: "openai";
              reasoning: {
                  canDisable: false;
                  defaultLevel: "medium";
                  levels: readonly ["low", "medium", "high"];
                  outputsSignatures: false;
                  outputsThinking: false;
              };
              type: "text";
          }
          | {
              cachedInputTokenCost: 0.3;
              description: "Latest small o-series model optimized for fast, effective reasoning with exceptional performance in coding and visual tasks. Knowledge cutoff: June 2024.";
              inputTokenCost: 0.6;
              maxInputTokens: 200000;
              maxOutputTokens: 100000;
              modelName: "o4-mini";
              outputTokenCost: 2.4;
              outputTokensPerSecond: 135;
              provider: "openai";
              reasoning: {
                  canDisable: false;
                  defaultLevel: "medium";
                  levels: readonly ["low", "medium", "high"];
                  outputsSignatures: false;
                  outputsThinking: false;
              };
              type: "text";
          }
          | {
              description: "o3-pro uses more compute for complex reasoning tasks. Available via Responses API only. Requests may take several minutes. Knowledge cutoff: June 2024.";
              inputTokenCost: 20;
              maxInputTokens: 200000;
              maxOutputTokens: 100000;
              modelName: "o3-pro";
              outputTokenCost: 80;
              provider: "openai";
              reasoning: {
                  canDisable: false;
                  outputsSignatures: false;
                  outputsThinking: false;
              };
              type: "text";
          }
          | {
              cachedInputTokenCost: 7.5;
              description: "o1 is a reasoning model designed to excel at complex reasoning tasks including science, math, and coding. The knowledge cutoff for o1 models is October, 2023.";
              inputTokenCost: 15;
              maxInputTokens: 200000;
              maxOutputTokens: 100000;
              modelName: "o1";
              outputTokenCost: 60;
              outputTokensPerSecond: 100;
              provider: "openai";
              reasoning: {
                  canDisable: false;
                  defaultLevel: "medium";
                  levels: readonly ["low", "medium", "high"];
                  outputsSignatures: false;
                  outputsThinking: false;
              };
              type: "text";
          }
          | {
              description: "GPT-4 is an older version of a high-intelligence GPT model, usable in Chat Completions. Learn more in the text generation guide. The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.";
              disabled: true;
              inputTokenCost: 10;
              maxInputTokens: 128000;
              maxOutputTokens: 4096;
              modelName: "gpt-4-turbo";
              outputTokenCost: 30;
              provider: "openai";
              type: "text";
          }
          | {
              description: "GPT-4 is an older version of a high-intelligence GPT model, usable in Chat Completions. Learn more in the text generation guide. The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.";
              disabled: true;
              inputTokenCost: 30;
              maxInputTokens: 8192;
              maxOutputTokens: 8192;
              modelName: "gpt-4";
              outputTokenCost: 60;
              provider: "openai";
              type: "text";
          }
          | {
              description: "GPT-3.5 Turbo models can understand and generate natural language or code and have been optimized for chat using the Chat Completions API but work well for non-chat tasks as well. gpt-4o-mini should be used in place of gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.";
              disabled: true;
              inputTokenCost: 0.5;
              maxInputTokens: 16385;
              maxOutputTokens: 4096;
              modelName: "gpt-3.5-turbo";
              outputTokenCost: 1.5;
              provider: "openai";
              type: "text";
          }
          | {
              cachedInputTokenCost: 0.5;
              description: "GPT-4.1 excels at instruction following and tool calling with 1M token context window. Knowledge cutoff: June 2024.";
              inputTokenCost: 2;
              maxInputTokens: 1047576;
              maxOutputTokens: 32768;
              modelName: "gpt-4.1";
              outputTokenCost: 8;
              outputTokensPerSecond: 105;
              provider: "openai";
              type: "text";
          }
          | {
              cachedInputTokenCost: 0.1;
              description: "GPT-4.1 mini excels at instruction following and tool calling with 1M token context window and low latency. Knowledge cutoff: June 2024.";
              inputTokenCost: 0.4;
              maxInputTokens: 1047576;
              maxOutputTokens: 32768;
              modelName: "gpt-4.1-mini";
              outputTokenCost: 1.6;
              outputTokensPerSecond: 78;
              provider: "openai";
              type: "text";
          }
          | {
              cachedInputTokenCost: 0.025;
              description: "GPT-4.1 nano is the fastest and most affordable GPT-4.1 variant with 1M token context window. Knowledge cutoff: June 2024.";
              inputTokenCost: 0.1;
              maxInputTokens: 1047576;
              maxOutputTokens: 32768;
              modelName: "gpt-4.1-nano";
              outputTokenCost: 0.4;
              outputTokensPerSecond: 142;
              provider: "openai";
              type: "text";
          }
          | {
              cachedInputTokenCost: 0.125;
              description: "GPT-5 is a frontier reasoning model with 400K context window. Supports reasoning tokens. Knowledge cutoff: September 2024.";
              inputTokenCost: 1.25;
              maxInputTokens: 400000;
              maxOutputTokens: 128000;
              modelName: "gpt-5";
              outputTokenCost: 10;
              outputTokensPerSecond: 72;
              provider: "openai";
              reasoning: {
                  canDisable: false;
                  defaultLevel: "medium";
                  levels: readonly ["minimal", "low", "medium", "high"];
                  outputsSignatures: false;
                  outputsThinking: false;
              };
              type: "text";
          }
          | {
              cachedInputTokenCost: 0.025;
              description: "GPT-5 mini is a faster, more cost-efficient version of GPT-5 with 400K context window. Knowledge cutoff: May 2024.";
              inputTokenCost: 0.25;
              maxInputTokens: 400000;
              maxOutputTokens: 128000;
              modelName: "gpt-5-mini";
              outputTokenCost: 2;
              outputTokensPerSecond: 69;
              provider: "openai";
              reasoning: {
                  canDisable: false;
                  defaultLevel: "medium";
                  levels: readonly ["minimal", "low", "medium", "high"];
                  outputsSignatures: false;
                  outputsThinking: false;
              };
              type: "text";
          }
          | {
              cachedInputTokenCost: 0.005;
              description: "GPT-5 nano is the fastest and most affordable GPT-5 variant with 400K context window. Knowledge cutoff: May 2024.";
              inputTokenCost: 0.05;
              maxInputTokens: 400000;
              maxOutputTokens: 128000;
              modelName: "gpt-5-nano";
              outputTokenCost: 0.4;
              outputTokensPerSecond: 140;
              provider: "openai";
              reasoning: {
                  canDisable: false;
                  defaultLevel: "medium";
                  levels: readonly ["minimal", "low", "medium", "high"];
                  outputsSignatures: false;
                  outputsThinking: false;
              };
              type: "text";
          }
          | {
              cachedInputTokenCost: 0.125;
              description: "GPT-5.1 is the flagship model for coding and agentic tasks with configurable reasoning effort. 400K context window. Knowledge cutoff: September 2024.";
              inputTokenCost: 1.25;
              maxInputTokens: 400000;
              maxOutputTokens: 128000;
              modelName: "gpt-5.1";
              outputTokenCost: 10;
              provider: "openai";
              reasoning: {
                  canDisable: true;
                  defaultLevel: "none";
                  levels: readonly ["none", "low", "medium", "high"];
                  outputsSignatures: false;
                  outputsThinking: false;
              };
              type: "text";
          }
          | {
              cachedInputTokenCost: 0.175;
              description: "GPT-5.2 is the flagship model for coding and agentic tasks across industries. 400K context window. Knowledge cutoff: August 2025.";
              inputTokenCost: 1.75;
              maxInputTokens: 400000;
              maxOutputTokens: 128000;
              modelName: "gpt-5.2";
              outputTokenCost: 14;
              outputTokensPerSecond: 61;
              provider: "openai";
              reasoning: {
                  canDisable: true;
                  defaultLevel: "none";
                  levels: readonly ["none", "low", "medium", "high"];
                  outputsSignatures: false;
                  outputsThinking: false;
              };
              type: "text";
          }
          | {
              description: "GPT-5.2 Pro uses more compute for complex reasoning tasks. 400K context window. Knowledge cutoff: August 2025.";
              inputTokenCost: 21;
              maxInputTokens: 400000;
              maxOutputTokens: 128000;
              modelName: "gpt-5.2-pro";
              outputTokenCost: 168;
              provider: "openai";
              reasoning: {
                  canDisable: false;
                  outputsSignatures: false;
                  outputsThinking: false;
              };
              type: "text";
          }
          | {
              cachedInputTokenCost: 0.25;
              description: "GPT-5.4 is the most capable and efficient frontier model for complex professional work. 1M context window, state-of-the-art coding and tool use. Standard pricing for ≤272K tokens, 2x input/1.5x output for >272K. Knowledge cutoff: August 2025.";
              inputTokenCost: 2.5;
              maxInputTokens: 1050000;
              maxOutputTokens: 128000;
              modelName: "gpt-5.4";
              outputTokenCost: 15;
              provider: "openai";
              reasoning: {
                  canDisable: true;
                  defaultLevel: "none";
                  levels: readonly ["none", "low", "medium", "high", "xhigh"];
                  outputsSignatures: false;
                  outputsThinking: false;
              };
              type: "text";
          }
          | {
              description: "GPT-5.4 Pro uses more compute for complex reasoning tasks. 1M context window. Standard pricing for ≤272K tokens. Knowledge cutoff: August 2025.";
              inputTokenCost: 30;
              maxInputTokens: 1050000;
              maxOutputTokens: 128000;
              modelName: "gpt-5.4-pro";
              outputTokenCost: 180;
              provider: "openai";
              reasoning: {
                  canDisable: false;
                  defaultLevel: "medium";
                  levels: readonly ["medium", "high", "xhigh"];
                  outputsSignatures: false;
                  outputsThinking: false;
              };
              type: "text";
          }
          | {
              description: "Latest Gemini 3.1 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Feb 2026.";
              inputTokenCost: 2;
              maxInputTokens: 1048576;
              maxOutputTokens: 65536;
              modelName: "gemini-3.1-pro-preview";
              outputTokenCost: 12;
              outputTokensPerSecond: 112;
              provider: "google";
              reasoning: {
                  canDisable: false;
                  defaultLevel: "high";
                  levels: readonly ["low", "medium", "high"];
                  outputsSignatures: true;
                  outputsThinking: true;
              };
              type: "text";
          }
          | {
              description: "DEPRECATED: Shut down March 9, 2026. Use gemini-3.1-pro-preview instead.";
              disabled: true;
              inputTokenCost: 2;
              maxInputTokens: 1048576;
              maxOutputTokens: 65536;
              modelName: "gemini-3-pro-preview";
              outputTokenCost: 12;
              provider: "google";
              type: "text";
          }
          | {
              description: "Latest Gemini 3 flash model with 1M context window and 64K output. Outperforms 2.5 Pro while being 3x faster. Optimized for agentic workflows and coding. Includes context caching for 90% cost reductions.";
              inputTokenCost: 0.5;
              maxInputTokens: 1048576;
              maxOutputTokens: 65536;
              modelName: "gemini-3-flash-preview";
              outputTokenCost: 3;
              outputTokensPerSecond: 146;
              provider: "google";
              reasoning: {
                  canDisable: false;
                  defaultLevel: "high";
                  levels: readonly ["minimal", "low", "medium", "high"];
                  outputsSignatures: true;
                  outputsThinking: true;
              };
              type: "text";
          }
          | {
              description: "Most cost-effective Gemini 3.1 model with thinking support and 1M context window. 2.5x faster TTFA and 45% faster output than 2.5 Flash. Released March 2026.";
              inputTokenCost: 0.25;
              maxInputTokens: 1048576;
              maxOutputTokens: 65536;
              modelName: "gemini-3.1-flash-lite-preview";
              outputTokenCost: 1.5;
              outputTokensPerSecond: 379;
              provider: "google";
              reasoning: {
                  canDisable: false;
                  defaultLevel: "minimal";
                  levels: readonly ["minimal", "low", "medium", "high"];
                  outputsSignatures: true;
                  outputsThinking: true;
              };
              type: "text";
          }
          | {
              description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/$15.00 output). Batch API: 50% discount.";
              inputTokenCost: 1.25;
              maxInputTokens: 2097152;
              maxOutputTokens: 65536;
              modelName: "gemini-2.5-pro";
              outputTokenCost: 10;
              outputTokensPerSecond: 134;
              provider: "google";
              reasoning: {
                  canDisable: false;
                  outputsSignatures: true;
                  outputsThinking: true;
              };
              type: "text";
          }
          | {
              description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.";
              inputTokenCost: 0.3;
              maxInputTokens: 1048576;
              maxOutputTokens: 65536;
              modelName: "gemini-2.5-flash";
              outputTokenCost: 2.5;
              outputTokensPerSecond: 245;
              provider: "google";
              reasoning: {
                  canDisable: true;
                  outputsSignatures: true;
                  outputsThinking: true;
              };
              type: "text";
          }
          | {
              description: "Most cost-effective Gemini 2.5 option for high-throughput applications. 1M context window.";
              inputTokenCost: 0.1;
              maxInputTokens: 1048576;
              maxOutputTokens: 65536;
              modelName: "gemini-2.5-flash-lite";
              outputTokenCost: 0.4;
              outputTokensPerSecond: 400;
              provider: "google";
              reasoning: {
                  canDisable: true;
                  outputsSignatures: false;
                  outputsThinking: true;
              };
              type: "text";
          }
          | {
              description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window. DEPRECATED: Will be shut down on March 31, 2026.";
              disabled: true;
              inputTokenCost: 0.1;
              maxInputTokens: 1048576;
              maxOutputTokens: 8192;
              modelName: "gemini-2.0-flash";
              outputTokenCost: 0.4;
              outputTokensPerSecond: 213;
              provider: "google";
              type: "text";
          }
          | {
              description: "Strongest model quality, especially for code & world knowledge; 2M long context. In private beta.";
              disabled: true;
              inputTokenCost: 0.5;
              maxInputTokens: 2097152;
              maxOutputTokens: 8192;
              modelName: "gemini-2.0-pro-exp-02-05";
              outputTokenCost: 1.5;
              provider: "google";
              type: "text";
          }
          | {
              description: "Cost effective offering to support high throughput. DEPRECATED: Will be shut down on March 31, 2026. Use gemini-2.5-flash-lite instead.";
              disabled: true;
              inputTokenCost: 0.075;
              maxInputTokens: 1048576;
              maxOutputTokens: 8192;
              modelName: "gemini-2.0-flash-lite";
              outputTokenCost: 0.3;
              provider: "google";
              type: "text";
          }
          | {
              costUnit: "characters";
              description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
              disabled: true;
              inputTokenCost: 0.01875;
              maxInputTokens: 1048576;
              maxOutputTokens: 8192;
              modelName: "gemini-1.5-flash";
              outputTokenCost: 0.075;
              outputTokensPerSecond: 178;
              provider: "google";
              type: "text";
          }
          | {
              costUnit: "characters";
              description: "RETIRED: No longer available. Use gemini-2.5-pro instead.";
              disabled: true;
              inputTokenCost: 0.3125;
              maxInputTokens: 2097152;
              maxOutputTokens: 8192;
              modelName: "gemini-1.5-pro";
              outputTokenCost: 1.25;
              outputTokensPerSecond: 59;
              provider: "google";
              type: "text";
          }
          | {
              costUnit: "characters";
              description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
              disabled: true;
              inputTokenCost: 0.125;
              maxInputTokens: 32760;
              maxOutputTokens: 8192;
              modelName: "gemini-1.0-pro";
              outputTokenCost: 0.375;
              provider: "google";
              type: "text";
          }
          | {
              cachedInputTokenCost: 0.5;
              description: "The most intelligent Claude model for building agents and coding. 200K context window (1M in beta), 128K max output.";
              inputTokenCost: 5;
              maxInputTokens: 200000;
              maxOutputTokens: 131072;
              modelName: "claude-opus-4-6";
              outputTokenCost: 25;
              outputTokensPerSecond: 53;
              provider: "anthropic";
              reasoning: {
                  canDisable: true;
                  outputsSignatures: true;
                  outputsThinking: true;
              };
              type: "text";
          }
          | {
              cachedInputTokenCost: 0.3;
              description: "The best combination of speed and intelligence. 200K context window (1M in beta), 64K max output.";
              inputTokenCost: 3;
              maxInputTokens: 200000;
              maxOutputTokens: 64000;
              modelName: "claude-sonnet-4-6";
              outputTokenCost: 15;
              provider: "anthropic";
              reasoning: {
                  canDisable: true;
                  outputsSignatures: true;
                  outputsThinking: true;
              };
              type: "text";
          }
          | {
              cachedInputTokenCost: 0.1;
              description: "The fastest Claude model with near-frontier intelligence. 200K context window, 64K max output.";
              inputTokenCost: 1;
              maxInputTokens: 200000;
              maxOutputTokens: 64000;
              modelName: "claude-haiku-4-5-20251001";
              outputTokenCost: 5;
              outputTokensPerSecond: 97;
              provider: "anthropic";
              reasoning: {
                  canDisable: true;
                  outputsSignatures: true;
                  outputsThinking: true;
              };
              type: "text";
          }
          | {
              description: "Claude 3.7 Sonnet — legacy model. Use claude-sonnet-4-6 instead.";
              disabled: true;
              inputTokenCost: 3;
              maxInputTokens: 200000;
              maxOutputTokens: 8192;
              modelName: "claude-3-7-sonnet-latest";
              outputTokenCost: 15;
              outputTokensPerSecond: 78;
              provider: "anthropic";
              reasoning: {
                  canDisable: true;
                  outputsSignatures: true;
                  outputsThinking: true;
              };
              type: "text";
          }
          | {
              description: "Claude 3.5 Haiku — legacy model. Use claude-haiku-4-5-20251001 instead.";
              disabled: true;
              inputTokenCost: 0.8;
              maxInputTokens: 200000;
              maxOutputTokens: 8192;
              modelName: "claude-3-5-haiku-latest";
              outputTokenCost: 4;
              outputTokensPerSecond: 66;
              provider: "anthropic";
              type: "text";
          }
          | {
              description: "Runs via ollama";
              maxInputTokens: 128000;
              maxOutputTokens: 128000;
              modelName: "deepseek-r1:8b";
              provider: "ollama";
              type: "text";
          }
          | {
              description: "Runs via ollama";
              maxInputTokens: 128000;
              maxOutputTokens: 128000;
              modelName: "mistral:latest";
              provider: "ollama";
              type: "text";
          }
          | {
              description: "Fine tuned Mistral 7B model, chunked into parts of 50 chars each, 100 iterations.";
              maxInputTokens: 8192;
              maxOutputTokens: 8192;
              modelName: "mistral-adapters-chunk50-iters100";
              provider: "local";
              type: "text";
          }
          | {
              maxInputTokens: 256;
              maxOutputTokens: 256;
              modelName: "llama-7b";
              provider: "replicate";
              type: "text";
          }
          | {
              costPerImage: 0.05;
              modelName: "google/imagen-3";
              provider: "replicate";
              type: "image";
          }
          | {
              costPerImage: 0.01;
              modelName: "minimax/image-01";
              outputType: "Array";
              provider: "replicate";
              type: "image";
          }
          | {
              costPerImage: 0.03;
              modelName: "flux-modal";
              provider: "modal";
              type: "image";
          }
          | {
              costPerImage: 0.25;
              modelName: "gpt-image-1";
              provider: "openai";
              type: "image";
          }
          | {
              costPerImage: 0.04;
              description: "aka nano-banana";
              modelName: "gemini-2.5-flash-image-preview";
              provider: "google";
              type: "image";
          }
          | {
              costPerImage: 0.05;
              description: "High-fidelity image generation with reasoning-enhanced composition. Supports legible text rendering, complex multi-turn editing, and character consistency using up to 14 reference inputs.";
              modelName: "gemini-3-pro-image-preview";
              provider: "google";
              type: "image";
          }
          | undefined