getModel( modelName:ModelName, ): |TextModel |{modelName:"whisper-local";provider:"local";type:"speech-to-text"} |{ modelName:"whisper-web"; perMinuteCost:0.006; provider:"openai"; type:"speech-to-text"; } |{ cachedInputTokenCost:0.075; description:"GPT-4o mini ('o' for 'omni') is a fast,
affordable small model for focused tasks. It accepts both text
and image inputs, and produces text outputs (including
Structured Outputs). It is ideal for fine-tuning, and model
outputs from a larger model like GPT-4o can be distilled to
GPT-4o-mini to produce similar results at lower cost and
latency. Knowledge cutoff: July 2025."; inputTokenCost:0.15; maxInputTokens:128000; maxOutputTokens:16384; modelName:"gpt-4o-mini"; outputTokenCost:0.6; outputTokensPerSecond:65; provider:"openai"; type:"text"; } |{ cachedInputTokenCost:1.25; description:"GPT-4o ('o' for 'omni') is our
versatile, high-intelligence flagship model. It accepts both
text and image inputs, and produces text outputs (including
Structured Outputs). Knowledge cutoff: April 2024."; inputTokenCost:2.5; maxInputTokens:128000; maxOutputTokens:16384; modelName:"gpt-4o"; outputTokenCost:10; outputTokensPerSecond:143; provider:"openai"; type:"text"; } |{ cachedInputTokenCost:0.5; description:"o3 is a reasoning model that sets a new standard for
math, science, coding, visual reasoning tasks, and technical
writing. Part of the o-series of reasoning models. Knowledge
cutoff: June 2024."; inputTokenCost:2; maxInputTokens:200000; maxOutputTokens:100000; modelName:"o3"; outputTokenCost:8; outputTokensPerSecond:94; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:0.55; description:"o3-mini is our most recent small reasoning model,
providing high intelligence at the same cost and latency
targets of o1-mini. o3-mini also supports key developer
features, like Structured Outputs, function calling, Batch
API, and more. Like other models in the o-series, it is
designed to excel at science, math, and coding tasks.
Knowledge cutoff: June 2024."; inputTokenCost:1.1; maxInputTokens:500000; maxOutputTokens:100000; modelName:"o3-mini"; outputTokenCost:4.4; outputTokensPerSecond:214; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:0.3; description:"Latest small o-series model optimized for fast,
effective reasoning with exceptional performance in coding and
visual tasks. Knowledge cutoff: June 2024."; inputTokenCost:0.6; maxInputTokens:200000; maxOutputTokens:100000; modelName:"o4-mini"; outputTokenCost:2.4; outputTokensPerSecond:135; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ description:"o3-pro uses more compute for complex reasoning tasks.
Available via Responses API only. Requests may take several
minutes. Knowledge cutoff: June 2024."; inputTokenCost:20; maxInputTokens:200000; maxOutputTokens:100000; modelName:"o3-pro"; outputTokenCost:80; provider:"openai"; reasoning:{ canDisable:false; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:7.5; description:"o1 is a reasoning model designed to excel at complex
reasoning tasks including science, math, and coding. The
knowledge cutoff for o1 models is October, 2023."; inputTokenCost:15; maxInputTokens:200000; maxOutputTokens:100000; modelName:"o1"; outputTokenCost:60; outputTokensPerSecond:100; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ description:"GPT-4 is an older version of a high-intelligence GPT
model, usable in Chat Completions. Learn more in the text
generation guide. The knowledge cutoff for the latest GPT-4
Turbo version is December, 2023."; disabled:true; inputTokenCost:10; maxInputTokens:128000; maxOutputTokens:4096; modelName:"gpt-4-turbo"; outputTokenCost:30; provider:"openai"; type:"text"; } |{ description:"GPT-4 is an older version of a high-intelligence GPT
model, usable in Chat Completions. Learn more in the text
generation guide. The knowledge cutoff for the latest GPT-4
Turbo version is December, 2023."; disabled:true; inputTokenCost:30; maxInputTokens:8192; maxOutputTokens:8192; modelName:"gpt-4"; outputTokenCost:60; provider:"openai"; type:"text"; } |{ description:"GPT-3.5 Turbo models can understand and generate
natural language or code and have been optimized for chat
using the Chat Completions API but work well for non-chat
tasks as well. gpt-4o-mini should be used in place of
gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and
just as fast."; disabled:true; inputTokenCost:0.5; maxInputTokens:16385; maxOutputTokens:4096; modelName:"gpt-3.5-turbo"; outputTokenCost:1.5; provider:"openai"; type:"text"; } |{ cachedInputTokenCost:0.5; description:"GPT-4.1 excels at instruction following and tool
calling with 1M token context window. Knowledge cutoff: June
2024."; inputTokenCost:2; maxInputTokens:1047576; maxOutputTokens:32768; modelName:"gpt-4.1"; outputTokenCost:8; outputTokensPerSecond:105; provider:"openai"; type:"text"; } |{ cachedInputTokenCost:0.1; description:"GPT-4.1 mini excels at instruction following and tool
calling with 1M token context window and low latency.
Knowledge cutoff: June 2024."; inputTokenCost:0.4; maxInputTokens:1047576; maxOutputTokens:32768; modelName:"gpt-4.1-mini"; outputTokenCost:1.6; outputTokensPerSecond:78; provider:"openai"; type:"text"; } |{ cachedInputTokenCost:0.025; description:"GPT-4.1 nano is the fastest and most affordable GPT-4.1
variant with 1M token context window. Knowledge cutoff: June
2024."; inputTokenCost:0.1; maxInputTokens:1047576; maxOutputTokens:32768; modelName:"gpt-4.1-nano"; outputTokenCost:0.4; outputTokensPerSecond:142; provider:"openai"; type:"text"; } |{ cachedInputTokenCost:0.125; description:"GPT-5 is a frontier reasoning model with 400K context
window. Supports reasoning tokens. Knowledge cutoff: September
2024."; inputTokenCost:1.25; maxInputTokens:400000; maxOutputTokens:128000; modelName:"gpt-5"; outputTokenCost:10; outputTokensPerSecond:72; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["minimal","low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:0.025; description:"GPT-5 mini is a faster, more cost-efficient version of
GPT-5 with 400K context window. Knowledge cutoff: May
2024."; inputTokenCost:0.25; maxInputTokens:400000; maxOutputTokens:128000; modelName:"gpt-5-mini"; outputTokenCost:2; outputTokensPerSecond:69; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["minimal","low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:0.005; description:"GPT-5 nano is the fastest and most affordable GPT-5
variant with 400K context window. Knowledge cutoff: May
2024."; inputTokenCost:0.05; maxInputTokens:400000; maxOutputTokens:128000; modelName:"gpt-5-nano"; outputTokenCost:0.4; outputTokensPerSecond:140; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["minimal","low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:0.125; description:"GPT-5.1 is the flagship model for coding and agentic
tasks with configurable reasoning effort. 400K context window.
Knowledge cutoff: September 2024."; inputTokenCost:1.25; maxInputTokens:400000; maxOutputTokens:128000; modelName:"gpt-5.1"; outputTokenCost:10; provider:"openai"; reasoning:{ canDisable:true; defaultLevel:"none"; levels:readonly["none","low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:0.175; description:"GPT-5.2 is the flagship model for coding and agentic
tasks across industries. 400K context window. Knowledge
cutoff: August 2025."; inputTokenCost:1.75; maxInputTokens:400000; maxOutputTokens:128000; modelName:"gpt-5.2"; outputTokenCost:14; outputTokensPerSecond:61; provider:"openai"; reasoning:{ canDisable:true; defaultLevel:"none"; levels:readonly["none","low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ description:"GPT-5.2 Pro uses more compute for complex reasoning
tasks. 400K context window. Knowledge cutoff: August
2025."; inputTokenCost:21; maxInputTokens:400000; maxOutputTokens:128000; modelName:"gpt-5.2-pro"; outputTokenCost:168; provider:"openai"; reasoning:{ canDisable:false; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:0.25; description:"GPT-5.4 is the most capable and efficient frontier
model for complex professional work. 1M context window,
state-of-the-art coding and tool use. Standard pricing for
≤272K tokens, 2x input/1.5x output for >272K. Knowledge
cutoff: August 2025."; inputTokenCost:2.5; maxInputTokens:1050000; maxOutputTokens:128000; modelName:"gpt-5.4"; outputTokenCost:15; provider:"openai"; reasoning:{ canDisable:true; defaultLevel:"none"; levels:readonly["none","low","medium","high","xhigh"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ description:"GPT-5.4 Pro uses more compute for complex reasoning
tasks. 1M context window. Standard pricing for ≤272K tokens.
Knowledge cutoff: August 2025."; inputTokenCost:30; maxInputTokens:1050000; maxOutputTokens:128000; modelName:"gpt-5.4-pro"; outputTokenCost:180; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["medium","high","xhigh"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ description:"Latest Gemini 3.1 Pro with 1M context window and 64K
output. Standard pricing for ≤200k tokens ($2.00 input/$12.00
output), higher rates for >200k tokens ($4.00 input/$18.00
output). Released Feb 2026."; inputTokenCost:2; maxInputTokens:1048576; maxOutputTokens:65536; modelName:"gemini-3.1-pro-preview"; outputTokenCost:12; outputTokensPerSecond:112; provider:"google"; reasoning:{ canDisable:false; defaultLevel:"high"; levels:readonly["low","medium","high"]; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ description:"DEPRECATED: Shut down March 9, 2026. Use
gemini-3.1-pro-preview instead."; disabled:true; inputTokenCost:2; maxInputTokens:1048576; maxOutputTokens:65536; modelName:"gemini-3-pro-preview"; outputTokenCost:12; provider:"google"; type:"text"; } |{ description:"Latest Gemini 3 flash model with 1M context window and
64K output. Outperforms 2.5 Pro while being 3x faster.
Optimized for agentic workflows and coding. Includes context
caching for 90% cost reductions."; inputTokenCost:0.5; maxInputTokens:1048576; maxOutputTokens:65536; modelName:"gemini-3-flash-preview"; outputTokenCost:3; outputTokensPerSecond:146; provider:"google"; reasoning:{ canDisable:false; defaultLevel:"high"; levels:readonly["minimal","low","medium","high"]; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ description:"Most cost-effective Gemini 3.1 model with thinking
support and 1M context window. 2.5x faster TTFA and 45% faster
output than 2.5 Flash. Released March 2026."; inputTokenCost:0.25; maxInputTokens:1048576; maxOutputTokens:65536; modelName:"gemini-3.1-flash-lite-preview"; outputTokenCost:1.5; outputTokensPerSecond:379; provider:"google"; reasoning:{ canDisable:false; defaultLevel:"minimal"; levels:readonly["minimal","low","medium","high"]; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ description:"High-performance Gemini 2.5 model with 2M context
window. Adaptive thinking for complex reasoning and coding.
Standard pricing for ≤200k tokens ($1.25 input/$10.00 output),
higher rates for >200k tokens ($2.50 input/$15.00 output).
Batch API: 50% discount."; inputTokenCost:1.25; maxInputTokens:2097152; maxOutputTokens:65536; modelName:"gemini-2.5-pro"; outputTokenCost:10; outputTokensPerSecond:134; provider:"google"; reasoning:{ canDisable:false; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ description:"Balanced Gemini 2.5 model with excellent
performance-to-cost ratio. Lightning-fast with controllable
thinking budgets. 1M context window. Context caching available
for up to 75% cost reduction."; inputTokenCost:0.3; maxInputTokens:1048576; maxOutputTokens:65536; modelName:"gemini-2.5-flash"; outputTokenCost:2.5; outputTokensPerSecond:245; provider:"google"; reasoning:{ canDisable:true; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ description:"Most cost-effective Gemini 2.5 option for
high-throughput applications. 1M context window."; inputTokenCost:0.1; maxInputTokens:1048576; maxOutputTokens:65536; modelName:"gemini-2.5-flash-lite"; outputTokenCost:0.4; outputTokensPerSecond:400; provider:"google"; reasoning:{ canDisable:true; outputsSignatures:false; outputsThinking:true; }; type:"text"; } |{ description:"Workhorse model for all daily tasks. Strong overall
performance and supports real-time streaming Live API. 1M
context window. DEPRECATED: Will be shut down on March 31,
2026."; disabled:true; inputTokenCost:0.1; maxInputTokens:1048576; maxOutputTokens:8192; modelName:"gemini-2.0-flash"; outputTokenCost:0.4; outputTokensPerSecond:213; provider:"google"; type:"text"; } |{ description:"Strongest model quality, especially for code &
world knowledge; 2M long context. In private beta."; disabled:true; inputTokenCost:0.5; maxInputTokens:2097152; maxOutputTokens:8192; modelName:"gemini-2.0-pro-exp-02-05"; outputTokenCost:1.5; provider:"google"; type:"text"; } |{ description:"Cost effective offering to support high throughput.
DEPRECATED: Will be shut down on March 31, 2026. Use
gemini-2.5-flash-lite instead."; disabled:true; inputTokenCost:0.075; maxInputTokens:1048576; maxOutputTokens:8192; modelName:"gemini-2.0-flash-lite"; outputTokenCost:0.3; provider:"google"; type:"text"; } |{ costUnit:"characters"; description:"RETIRED: No longer available. Use gemini-2.5-flash
instead."; disabled:true; inputTokenCost:0.01875; maxInputTokens:1048576; maxOutputTokens:8192; modelName:"gemini-1.5-flash"; outputTokenCost:0.075; outputTokensPerSecond:178; provider:"google"; type:"text"; } |{ costUnit:"characters"; description:"RETIRED: No longer available. Use gemini-2.5-pro
instead."; disabled:true; inputTokenCost:0.3125; maxInputTokens:2097152; maxOutputTokens:8192; modelName:"gemini-1.5-pro"; outputTokenCost:1.25; outputTokensPerSecond:59; provider:"google"; type:"text"; } |{ costUnit:"characters"; description:"RETIRED: No longer available. Use gemini-2.5-flash
instead."; disabled:true; inputTokenCost:0.125; maxInputTokens:32760; maxOutputTokens:8192; modelName:"gemini-1.0-pro"; outputTokenCost:0.375; provider:"google"; type:"text"; } |{ cachedInputTokenCost:0.5; description:"The most intelligent Claude model for building agents
and coding. 200K context window (1M in beta), 128K max
output."; inputTokenCost:5; maxInputTokens:200000; maxOutputTokens:131072; modelName:"claude-opus-4-6"; outputTokenCost:25; outputTokensPerSecond:53; provider:"anthropic"; reasoning:{ canDisable:true; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ cachedInputTokenCost:0.3; description:"The best combination of speed and intelligence. 200K
context window (1M in beta), 64K max output."; inputTokenCost:3; maxInputTokens:200000; maxOutputTokens:64000; modelName:"claude-sonnet-4-6"; outputTokenCost:15; provider:"anthropic"; reasoning:{ canDisable:true; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ cachedInputTokenCost:0.1; description:"The fastest Claude model with near-frontier
intelligence. 200K context window, 64K max output."; inputTokenCost:1; maxInputTokens:200000; maxOutputTokens:64000; modelName:"claude-haiku-4-5-20251001"; outputTokenCost:5; outputTokensPerSecond:97; provider:"anthropic"; reasoning:{ canDisable:true; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ description:"Claude 3.7 Sonnet — legacy model. Use claude-sonnet-4-6
instead."; disabled:true; inputTokenCost:3; maxInputTokens:200000; maxOutputTokens:8192; modelName:"claude-3-7-sonnet-latest"; outputTokenCost:15; outputTokensPerSecond:78; provider:"anthropic"; reasoning:{ canDisable:true; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ description:"Claude 3.5 Haiku — legacy model. Use
claude-haiku-4-5-20251001 instead."; disabled:true; inputTokenCost:0.8; maxInputTokens:200000; maxOutputTokens:8192; modelName:"claude-3-5-haiku-latest"; outputTokenCost:4; outputTokensPerSecond:66; provider:"anthropic"; type:"text"; } |{ description:"Runs via ollama"; maxInputTokens:128000; maxOutputTokens:128000; modelName:"deepseek-r1:8b"; provider:"ollama"; type:"text"; } |{ description:"Runs via ollama"; maxInputTokens:128000; maxOutputTokens:128000; modelName:"mistral:latest"; provider:"ollama"; type:"text"; } |{ description:"Fine tuned Mistral 7B model, chunked into parts of 50
chars each, 100 iterations."; maxInputTokens:8192; maxOutputTokens:8192; modelName:"mistral-adapters-chunk50-iters100"; provider:"local"; type:"text"; } |{ maxInputTokens:256; maxOutputTokens:256; modelName:"llama-7b"; provider:"replicate"; type:"text"; } |{ costPerImage:0.05; modelName:"google/imagen-3"; provider:"replicate"; type:"image"; } |{ costPerImage:0.01; modelName:"minimax/image-01"; outputType:"Array"; provider:"replicate"; type:"image"; } |{ costPerImage:0.03; modelName:"flux-modal"; provider:"modal"; type:"image"; } |{ costPerImage:0.25; modelName:"gpt-image-1"; provider:"openai"; type:"image"; } |{ costPerImage:0.04; description:"aka nano-banana"; modelName:"gemini-2.5-flash-image-preview"; provider:"google"; type:"image"; } |{ costPerImage:0.05; description:"High-fidelity image generation with reasoning-enhanced
composition. Supports legible text rendering, complex
multi-turn editing, and character consistency using up to 14
reference inputs."; modelName:"gemini-3-pro-image-preview"; provider:"google"; type:"image"; } |undefined
Returns |TextModel |{modelName:"whisper-local";provider:"local";type:"speech-to-text"} |{ modelName:"whisper-web"; perMinuteCost:0.006; provider:"openai"; type:"speech-to-text"; } |{ cachedInputTokenCost:0.075; description:"GPT-4o mini ('o' for 'omni') is a
fast, affordable small model for focused tasks. It accepts
both text and image inputs, and produces text outputs
(including Structured Outputs). It is ideal for fine-tuning,
and model outputs from a larger model like GPT-4o can be
distilled to GPT-4o-mini to produce similar results at lower
cost and latency. Knowledge cutoff: July 2025."; inputTokenCost:0.15; maxInputTokens:128000; maxOutputTokens:16384; modelName:"gpt-4o-mini"; outputTokenCost:0.6; outputTokensPerSecond:65; provider:"openai"; type:"text"; } |{ cachedInputTokenCost:1.25; description:"GPT-4o ('o' for 'omni') is our
versatile, high-intelligence flagship model. It accepts both
text and image inputs, and produces text outputs (including
Structured Outputs). Knowledge cutoff: April
2024."; inputTokenCost:2.5; maxInputTokens:128000; maxOutputTokens:16384; modelName:"gpt-4o"; outputTokenCost:10; outputTokensPerSecond:143; provider:"openai"; type:"text"; } |{ cachedInputTokenCost:0.5; description:"o3 is a reasoning model that sets a new standard for
math, science, coding, visual reasoning tasks, and technical
writing. Part of the o-series of reasoning models. Knowledge
cutoff: June 2024."; inputTokenCost:2; maxInputTokens:200000; maxOutputTokens:100000; modelName:"o3"; outputTokenCost:8; outputTokensPerSecond:94; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:0.55; description:"o3-mini is our most recent small reasoning model,
providing high intelligence at the same cost and latency
targets of o1-mini. o3-mini also supports key developer
features, like Structured Outputs, function calling, Batch
API, and more. Like other models in the o-series, it is
designed to excel at science, math, and coding tasks.
Knowledge cutoff: June 2024."; inputTokenCost:1.1; maxInputTokens:500000; maxOutputTokens:100000; modelName:"o3-mini"; outputTokenCost:4.4; outputTokensPerSecond:214; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:0.3; description:"Latest small o-series model optimized for fast,
effective reasoning with exceptional performance in coding
and visual tasks. Knowledge cutoff: June 2024."; inputTokenCost:0.6; maxInputTokens:200000; maxOutputTokens:100000; modelName:"o4-mini"; outputTokenCost:2.4; outputTokensPerSecond:135; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ description:"o3-pro uses more compute for complex reasoning tasks.
Available via Responses API only. Requests may take several
minutes. Knowledge cutoff: June 2024."; inputTokenCost:20; maxInputTokens:200000; maxOutputTokens:100000; modelName:"o3-pro"; outputTokenCost:80; provider:"openai"; reasoning:{ canDisable:false; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:7.5; description:"o1 is a reasoning model designed to excel at complex
reasoning tasks including science, math, and coding. The
knowledge cutoff for o1 models is October, 2023."; inputTokenCost:15; maxInputTokens:200000; maxOutputTokens:100000; modelName:"o1"; outputTokenCost:60; outputTokensPerSecond:100; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ description:"GPT-4 is an older version of a high-intelligence GPT
model, usable in Chat Completions. Learn more in the text
generation guide. The knowledge cutoff for the latest GPT-4
Turbo version is December, 2023."; disabled:true; inputTokenCost:10; maxInputTokens:128000; maxOutputTokens:4096; modelName:"gpt-4-turbo"; outputTokenCost:30; provider:"openai"; type:"text"; } |{ description:"GPT-4 is an older version of a high-intelligence GPT
model, usable in Chat Completions. Learn more in the text
generation guide. The knowledge cutoff for the latest GPT-4
Turbo version is December, 2023."; disabled:true; inputTokenCost:30; maxInputTokens:8192; maxOutputTokens:8192; modelName:"gpt-4"; outputTokenCost:60; provider:"openai"; type:"text"; } |{ description:"GPT-3.5 Turbo models can understand and generate
natural language or code and have been optimized for chat
using the Chat Completions API but work well for non-chat
tasks as well. gpt-4o-mini should be used in place of
gpt-3.5-turbo, as it is cheaper, more capable, multimodal,
and just as fast."; disabled:true; inputTokenCost:0.5; maxInputTokens:16385; maxOutputTokens:4096; modelName:"gpt-3.5-turbo"; outputTokenCost:1.5; provider:"openai"; type:"text"; } |{ cachedInputTokenCost:0.5; description:"GPT-4.1 excels at instruction following and tool
calling with 1M token context window. Knowledge cutoff: June
2024."; inputTokenCost:2; maxInputTokens:1047576; maxOutputTokens:32768; modelName:"gpt-4.1"; outputTokenCost:8; outputTokensPerSecond:105; provider:"openai"; type:"text"; } |{ cachedInputTokenCost:0.1; description:"GPT-4.1 mini excels at instruction following and tool
calling with 1M token context window and low latency.
Knowledge cutoff: June 2024."; inputTokenCost:0.4; maxInputTokens:1047576; maxOutputTokens:32768; modelName:"gpt-4.1-mini"; outputTokenCost:1.6; outputTokensPerSecond:78; provider:"openai"; type:"text"; } |{ cachedInputTokenCost:0.025; description:"GPT-4.1 nano is the fastest and most affordable
GPT-4.1 variant with 1M token context window. Knowledge
cutoff: June 2024."; inputTokenCost:0.1; maxInputTokens:1047576; maxOutputTokens:32768; modelName:"gpt-4.1-nano"; outputTokenCost:0.4; outputTokensPerSecond:142; provider:"openai"; type:"text"; } |{ cachedInputTokenCost:0.125; description:"GPT-5 is a frontier reasoning model with 400K context
window. Supports reasoning tokens. Knowledge cutoff:
September 2024."; inputTokenCost:1.25; maxInputTokens:400000; maxOutputTokens:128000; modelName:"gpt-5"; outputTokenCost:10; outputTokensPerSecond:72; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["minimal","low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:0.025; description:"GPT-5 mini is a faster, more cost-efficient version
of GPT-5 with 400K context window. Knowledge cutoff: May
2024."; inputTokenCost:0.25; maxInputTokens:400000; maxOutputTokens:128000; modelName:"gpt-5-mini"; outputTokenCost:2; outputTokensPerSecond:69; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["minimal","low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:0.005; description:"GPT-5 nano is the fastest and most affordable GPT-5
variant with 400K context window. Knowledge cutoff: May
2024."; inputTokenCost:0.05; maxInputTokens:400000; maxOutputTokens:128000; modelName:"gpt-5-nano"; outputTokenCost:0.4; outputTokensPerSecond:140; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["minimal","low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:0.125; description:"GPT-5.1 is the flagship model for coding and agentic
tasks with configurable reasoning effort. 400K context
window. Knowledge cutoff: September 2024."; inputTokenCost:1.25; maxInputTokens:400000; maxOutputTokens:128000; modelName:"gpt-5.1"; outputTokenCost:10; provider:"openai"; reasoning:{ canDisable:true; defaultLevel:"none"; levels:readonly["none","low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:0.175; description:"GPT-5.2 is the flagship model for coding and agentic
tasks across industries. 400K context window. Knowledge
cutoff: August 2025."; inputTokenCost:1.75; maxInputTokens:400000; maxOutputTokens:128000; modelName:"gpt-5.2"; outputTokenCost:14; outputTokensPerSecond:61; provider:"openai"; reasoning:{ canDisable:true; defaultLevel:"none"; levels:readonly["none","low","medium","high"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ description:"GPT-5.2 Pro uses more compute for complex reasoning
tasks. 400K context window. Knowledge cutoff: August
2025."; inputTokenCost:21; maxInputTokens:400000; maxOutputTokens:128000; modelName:"gpt-5.2-pro"; outputTokenCost:168; provider:"openai"; reasoning:{ canDisable:false; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ cachedInputTokenCost:0.25; description:"GPT-5.4 is the most capable and efficient frontier
model for complex professional work. 1M context window,
state-of-the-art coding and tool use. Standard pricing for
≤272K tokens, 2x input/1.5x output for >272K. Knowledge
cutoff: August 2025."; inputTokenCost:2.5; maxInputTokens:1050000; maxOutputTokens:128000; modelName:"gpt-5.4"; outputTokenCost:15; provider:"openai"; reasoning:{ canDisable:true; defaultLevel:"none"; levels:readonly["none","low","medium","high","xhigh"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ description:"GPT-5.4 Pro uses more compute for complex reasoning
tasks. 1M context window. Standard pricing for ≤272K tokens.
Knowledge cutoff: August 2025."; inputTokenCost:30; maxInputTokens:1050000; maxOutputTokens:128000; modelName:"gpt-5.4-pro"; outputTokenCost:180; provider:"openai"; reasoning:{ canDisable:false; defaultLevel:"medium"; levels:readonly["medium","high","xhigh"]; outputsSignatures:false; outputsThinking:false; }; type:"text"; } |{ description:"Latest Gemini 3.1 Pro with 1M context window and 64K
output. Standard pricing for ≤200k tokens ($2.00
input/$12.00 output), higher rates for >200k tokens
($4.00 input/$18.00 output). Released Feb 2026."; inputTokenCost:2; maxInputTokens:1048576; maxOutputTokens:65536; modelName:"gemini-3.1-pro-preview"; outputTokenCost:12; outputTokensPerSecond:112; provider:"google"; reasoning:{ canDisable:false; defaultLevel:"high"; levels:readonly["low","medium","high"]; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ description:"DEPRECATED: Shut down March 9, 2026. Use
gemini-3.1-pro-preview instead."; disabled:true; inputTokenCost:2; maxInputTokens:1048576; maxOutputTokens:65536; modelName:"gemini-3-pro-preview"; outputTokenCost:12; provider:"google"; type:"text"; } |{ description:"Latest Gemini 3 flash model with 1M context window
and 64K output. Outperforms 2.5 Pro while being 3x faster.
Optimized for agentic workflows and coding. Includes context
caching for 90% cost reductions."; inputTokenCost:0.5; maxInputTokens:1048576; maxOutputTokens:65536; modelName:"gemini-3-flash-preview"; outputTokenCost:3; outputTokensPerSecond:146; provider:"google"; reasoning:{ canDisable:false; defaultLevel:"high"; levels:readonly["minimal","low","medium","high"]; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ description:"Most cost-effective Gemini 3.1 model with thinking
support and 1M context window. 2.5x faster TTFA and 45%
faster output than 2.5 Flash. Released March
2026."; inputTokenCost:0.25; maxInputTokens:1048576; maxOutputTokens:65536; modelName:"gemini-3.1-flash-lite-preview"; outputTokenCost:1.5; outputTokensPerSecond:379; provider:"google"; reasoning:{ canDisable:false; defaultLevel:"minimal"; levels:readonly["minimal","low","medium","high"]; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ description:"High-performance Gemini 2.5 model with 2M context
window. Adaptive thinking for complex reasoning and coding.
Standard pricing for ≤200k tokens ($1.25 input/$10.00
output), higher rates for >200k tokens ($2.50
input/$15.00 output). Batch API: 50% discount."; inputTokenCost:1.25; maxInputTokens:2097152; maxOutputTokens:65536; modelName:"gemini-2.5-pro"; outputTokenCost:10; outputTokensPerSecond:134; provider:"google"; reasoning:{ canDisable:false; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ description:"Balanced Gemini 2.5 model with excellent
performance-to-cost ratio. Lightning-fast with controllable
thinking budgets. 1M context window. Context caching
available for up to 75% cost reduction."; inputTokenCost:0.3; maxInputTokens:1048576; maxOutputTokens:65536; modelName:"gemini-2.5-flash"; outputTokenCost:2.5; outputTokensPerSecond:245; provider:"google"; reasoning:{ canDisable:true; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ description:"Most cost-effective Gemini 2.5 option for
high-throughput applications. 1M context window."; inputTokenCost:0.1; maxInputTokens:1048576; maxOutputTokens:65536; modelName:"gemini-2.5-flash-lite"; outputTokenCost:0.4; outputTokensPerSecond:400; provider:"google"; reasoning:{ canDisable:true; outputsSignatures:false; outputsThinking:true; }; type:"text"; } |{ description:"Workhorse model for all daily tasks. Strong overall
performance and supports real-time streaming Live API. 1M
context window. DEPRECATED: Will be shut down on March 31,
2026."; disabled:true; inputTokenCost:0.1; maxInputTokens:1048576; maxOutputTokens:8192; modelName:"gemini-2.0-flash"; outputTokenCost:0.4; outputTokensPerSecond:213; provider:"google"; type:"text"; } |{ description:"Strongest model quality, especially for code &
world knowledge; 2M long context. In private
beta."; disabled:true; inputTokenCost:0.5; maxInputTokens:2097152; maxOutputTokens:8192; modelName:"gemini-2.0-pro-exp-02-05"; outputTokenCost:1.5; provider:"google"; type:"text"; } |{ description:"Cost effective offering to support high throughput.
DEPRECATED: Will be shut down on March 31, 2026. Use
gemini-2.5-flash-lite instead."; disabled:true; inputTokenCost:0.075; maxInputTokens:1048576; maxOutputTokens:8192; modelName:"gemini-2.0-flash-lite"; outputTokenCost:0.3; provider:"google"; type:"text"; } |{ costUnit:"characters"; description:"RETIRED: No longer available. Use gemini-2.5-flash
instead."; disabled:true; inputTokenCost:0.01875; maxInputTokens:1048576; maxOutputTokens:8192; modelName:"gemini-1.5-flash"; outputTokenCost:0.075; outputTokensPerSecond:178; provider:"google"; type:"text"; } |{ costUnit:"characters"; description:"RETIRED: No longer available. Use gemini-2.5-pro
instead."; disabled:true; inputTokenCost:0.3125; maxInputTokens:2097152; maxOutputTokens:8192; modelName:"gemini-1.5-pro"; outputTokenCost:1.25; outputTokensPerSecond:59; provider:"google"; type:"text"; } |{ costUnit:"characters"; description:"RETIRED: No longer available. Use gemini-2.5-flash
instead."; disabled:true; inputTokenCost:0.125; maxInputTokens:32760; maxOutputTokens:8192; modelName:"gemini-1.0-pro"; outputTokenCost:0.375; provider:"google"; type:"text"; } |{ cachedInputTokenCost:0.5; description:"The most intelligent Claude model for building agents
and coding. 200K context window (1M in beta), 128K max
output."; inputTokenCost:5; maxInputTokens:200000; maxOutputTokens:131072; modelName:"claude-opus-4-6"; outputTokenCost:25; outputTokensPerSecond:53; provider:"anthropic"; reasoning:{ canDisable:true; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ cachedInputTokenCost:0.3; description:"The best combination of speed and intelligence. 200K
context window (1M in beta), 64K max output."; inputTokenCost:3; maxInputTokens:200000; maxOutputTokens:64000; modelName:"claude-sonnet-4-6"; outputTokenCost:15; provider:"anthropic"; reasoning:{ canDisable:true; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ cachedInputTokenCost:0.1; description:"The fastest Claude model with near-frontier
intelligence. 200K context window, 64K max
output."; inputTokenCost:1; maxInputTokens:200000; maxOutputTokens:64000; modelName:"claude-haiku-4-5-20251001"; outputTokenCost:5; outputTokensPerSecond:97; provider:"anthropic"; reasoning:{ canDisable:true; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ description:"Claude 3.7 Sonnet — legacy model. Use
claude-sonnet-4-6 instead."; disabled:true; inputTokenCost:3; maxInputTokens:200000; maxOutputTokens:8192; modelName:"claude-3-7-sonnet-latest"; outputTokenCost:15; outputTokensPerSecond:78; provider:"anthropic"; reasoning:{ canDisable:true; outputsSignatures:true; outputsThinking:true; }; type:"text"; } |{ description:"Claude 3.5 Haiku — legacy model. Use
claude-haiku-4-5-20251001 instead."; disabled:true; inputTokenCost:0.8; maxInputTokens:200000; maxOutputTokens:8192; modelName:"claude-3-5-haiku-latest"; outputTokenCost:4; outputTokensPerSecond:66; provider:"anthropic"; type:"text"; } |{ description:"Runs via ollama"; maxInputTokens:128000; maxOutputTokens:128000; modelName:"deepseek-r1:8b"; provider:"ollama"; type:"text"; } |{ description:"Runs via ollama"; maxInputTokens:128000; maxOutputTokens:128000; modelName:"mistral:latest"; provider:"ollama"; type:"text"; } |{ description:"Fine tuned Mistral 7B model, chunked into parts of 50
chars each, 100 iterations."; maxInputTokens:8192; maxOutputTokens:8192; modelName:"mistral-adapters-chunk50-iters100"; provider:"local"; type:"text"; } |{ maxInputTokens:256; maxOutputTokens:256; modelName:"llama-7b"; provider:"replicate"; type:"text"; } |{ costPerImage:0.05; modelName:"google/imagen-3"; provider:"replicate"; type:"image"; } |{ costPerImage:0.01; modelName:"minimax/image-01"; outputType:"Array"; provider:"replicate"; type:"image"; } |{ costPerImage:0.03; modelName:"flux-modal"; provider:"modal"; type:"image"; } |{ costPerImage:0.25; modelName:"gpt-image-1"; provider:"openai"; type:"image"; } |{ costPerImage:0.04; description:"aka nano-banana"; modelName:"gemini-2.5-flash-image-preview"; provider:"google"; type:"image"; } |{ costPerImage:0.05; description:"High-fidelity image generation with
reasoning-enhanced composition. Supports legible text
rendering, complex multi-turn editing, and character
consistency using up to 14 reference inputs."; modelName:"gemini-3-pro-image-preview"; provider:"google"; type:"image"; } |undefined