Merge c3f17d65e1 into 8df4a2670b

2025-09-17 09:16:36 +08:00 · 2025-08-26 19:07:27 +00:00 · 2025-08-26 19:07:27 +00:00 · 6c953a732e
commit 6c953a732e
parent 8df4a2670b c3f17d65e1
11 changed files with 281 additions and 62 deletions
--- a/relay/adaptor/anthropic/constants.go
+++ b/relay/adaptor/anthropic/constants.go
@ -1,13 +1,21 @@
 package anthropic
 var ModelList = []string{
 	// Legacy models
 	"claude-instant-1.2", "claude-2.0", "claude-2.1",
 	// Claude 3 series
 	"claude-3-haiku-20240307",
 	"claude-3-5-haiku-20241022",
 	"claude-3-5-haiku-latest",
 	"claude-3-sonnet-20240229",
 	"claude-3-opus-20240229",
 	// Claude 3.5 series
 	"claude-3-5-haiku-20241022",
 	"claude-3-5-haiku-latest",
 	"claude-3-5-sonnet-20240620",
 	"claude-3-5-sonnet-20241022",
 	"claude-3-5-sonnet-latest",
 	// Claude 4 series
 	"claude-4-opus",
 	"claude-4-sonnet",
 	// Claude 4.1 series  
 	"claude-4.1-opus",
 }
--- a/relay/adaptor/cohere/constant.go
+++ b/relay/adaptor/cohere/constant.go
@ -4,6 +4,10 @@ var ModelList = []string{
 	"command", "command-nightly",
 	"command-light", "command-light-nightly",
 	"command-r", "command-r-plus",
 	// Latest models
 	"command-a-03-2025",
 	"command-r-plus-04-2024",
 	"command-r-08-2024",
 }
 func init() {
--- a/relay/adaptor/deepseek/constants.go
+++ b/relay/adaptor/deepseek/constants.go
@ -3,4 +3,6 @@ package deepseek
 var ModelList = []string{
 	"deepseek-chat",
 	"deepseek-reasoner",
 	"deepseek-v3",
 	"deepseek-r1",
 }
--- a/relay/adaptor/geminiv2/constants.go
+++ b/relay/adaptor/geminiv2/constants.go
@ -12,4 +12,6 @@ var ModelList = []string{
 	"gemini-2.0-flash-lite-preview-02-05",
 	"gemini-2.0-flash-thinking-exp-01-21",
 	"gemini-2.0-pro-exp-02-05",
 	// Gemini 2.5 series
 	"gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
 }
--- a/relay/adaptor/groq/constants.go
+++ b/relay/adaptor/groq/constants.go
@ -24,4 +24,9 @@ var ModelList = []string{
 	"whisper-large-v3-turbo",
 	"deepseek-r1-distill-llama-70b-specdec",
 	"deepseek-r1-distill-llama-70b",
 	// Latest featured models
 	"llama-3.3-70b-versatile",
 	"openai-gpt-oss-20b",
 	"openai-gpt-oss-120b",
 	"meta-llama/llama-guard-4-12b",
 }
--- a/relay/adaptor/minimax/constants.go
+++ b/relay/adaptor/minimax/constants.go
@ -10,4 +10,11 @@ var ModelList = []string{
 	"abab5.5s-chat",
 	"MiniMax-VL-01",
 	"MiniMax-Text-01",
 	// Latest models
 	"MiniMax-M1",
 	"MiniMax-Speech-2.5",
 	"MiniMax-Speech-02",
 	"MiniMax-Hailuo-02",
 	"music-01",
 	"Image-01",
 }
--- a/relay/adaptor/mistral/constants.go
+++ b/relay/adaptor/mistral/constants.go
@ -7,4 +7,9 @@ var ModelList = []string{
 	"mistral-medium-latest",
 	"mistral-large-latest",
 	"mistral-embed",
 	// Latest models
 	"mistral-medium-3",
 	"codestral",
 	"ministral-3b",
 	"ministral-8b",
 }
--- a/relay/adaptor/openai/constants.go
+++ b/relay/adaptor/openai/constants.go
@ -1,27 +1,49 @@
 package openai
 var ModelList = []string{
 	// GPT-3.5 series
 	"gpt-3.5-turbo", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-1106", "gpt-3.5-turbo-0125",
 	"gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613",
 	"gpt-3.5-turbo-instruct",
 	// GPT-4 series
 	"gpt-4", "gpt-4-0314", "gpt-4-0613", "gpt-4-1106-preview", "gpt-4-0125-preview",
 	"gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613",
 	"gpt-4-turbo-preview", "gpt-4-turbo", "gpt-4-turbo-2024-04-09",
 	"gpt-4o", "gpt-4o-2024-05-13",
 	"gpt-4o-2024-08-06",
 	"gpt-4o-2024-11-20",
 	"chatgpt-4o-latest",
 	"gpt-4o-mini", "gpt-4o-mini-2024-07-18",
 	"gpt-4-vision-preview",
 	// GPT-4.1 series
 	"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano",
 	// GPT-4o series
 	"gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-2024-08-06", "gpt-4o-2024-11-20",
 	"gpt-4o-audio-preview", "gpt-4o-realtime-preview",
 	"gpt-4o-search-preview", "gpt-4o-mini-search-preview",
 	"gpt-4o-mini", "gpt-4o-mini-2024-07-18", "gpt-4o-mini-audio-preview", "gpt-4o-mini-realtime-preview",
 	"chatgpt-4o-latest",
 	// GPT-5 series
 	"gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-5-chat-latest",
 	// o1 series
 	"o1", "o1-2024-12-17", "o1-pro",
 	"o1-preview", "o1-preview-2024-09-12",
 	"o1-mini", "o1-mini-2024-09-12",
 	// o3 series
 	"o3", "o3-pro", "o3-deep-research", "o3-mini", "o3-mini-2025-01-31",
 	// o4 series
 	"o4-mini", "o4-mini-deep-research",
 	// Other models
 	"computer-use-preview",
 	"codex-mini-latest",
 	"gpt-image-1",
 	// Transcription and TTS
 	"gpt-4o-transcribe", "gpt-4o-mini-transcribe", "gpt-4o-mini-tts",
 	// Embeddings
 	"text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large",
 	// Legacy text models
 	"text-curie-001", "text-babbage-001", "text-ada-001", "text-davinci-002", "text-davinci-003",
 	"text-moderation-latest", "text-moderation-stable",
 	"text-davinci-edit-001",
 	"davinci-002", "babbage-002",
 	// Image generation
 	"dall-e-2", "dall-e-3",
 	// Audio
 	"whisper-1",
 	"tts-1", "tts-1-1106", "tts-1-hd", "tts-1-hd-1106",
 	"o1", "o1-2024-12-17",
 	"o1-preview", "o1-preview-2024-09-12",
 	"o1-mini", "o1-mini-2024-09-12",
 }
--- a/relay/adaptor/siliconflow/constants.go
+++ b/relay/adaptor/siliconflow/constants.go
@ -33,4 +33,10 @@ var ModelList = []string{
 	"Pro/internlm/internlm2_5-7b-chat",
 	"Pro/meta-llama/Meta-Llama-3-8B-Instruct",
 	"Pro/mistralai/Mistral-7B-Instruct-v0.2",
 	// Latest models
 	"Qwen/Qwen2.5-72B-Instruct",
 	"Qwen/Qwen2.5-7B-Instruct",
 	"deepseek-ai/DeepSeek-V2.5",
 	"deepseek-ai/DeepSeek-R1",
 	"internlm/InternLM2.5-20B-Chat",
 }
--- a/relay/adaptor/togetherai/constants.go
+++ b/relay/adaptor/togetherai/constants.go
@ -7,4 +7,10 @@ var ModelList = []string{
 	"deepseek-ai/deepseek-coder-33b-instruct",
 	"mistralai/Mixtral-8x22B-Instruct-v0.1",
 	"Qwen/Qwen1.5-72B-Chat",
 	// Updated models
 	"meta-llama/Llama-3.1-70B-Instruct-Turbo",
 	"meta-llama/Llama-3.1-8B-Instruct-Turbo",
 	"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
 	"Qwen/Qwen2.5-72B-Instruct-Turbo",
 	"deepseek-ai/deepseek-r1",
 }
--- a/relay/billing/ratio/model.go
+++ b/relay/billing/ratio/model.go
@ -25,42 +25,75 @@ var modelRatioLock sync.RWMutex
 // 1 === $0.002 / 1K tokens
 // 1 === ￥0.014 / 1k tokens
 var ModelRatio = map[string]float64{
-	// https://openai.com/pricing
+	// https://platform.openai.com/docs/pricing (Standard pricing tier)
-	"gpt-4":                   15,
+	// GPT-5 series
-	"gpt-4-0314":              15,
+	"gpt-5":                   0.625, // $1.25 / 1M tokens
-	"gpt-4-0613":              15,
+	"gpt-5-mini":              0.125, // $0.25 / 1M tokens
-	"gpt-4-32k":               30,
+	"gpt-5-nano":              0.025, // $0.05 / 1M tokens
-	"gpt-4-32k-0314":          30,
+	"gpt-5-chat-latest":       0.625, // $1.25 / 1M tokens
-	"gpt-4-32k-0613":          30,
+	// GPT-4.1 series
-	"gpt-4-1106-preview":      5,     // $0.01 / 1K tokens
+	"gpt-4.1":                 1.0,   // $2.00 / 1M tokens
-	"gpt-4-0125-preview":      5,     // $0.01 / 1K tokens
+	"gpt-4.1-mini":            0.2,   // $0.40 / 1M tokens
-	"gpt-4-turbo-preview":     5,     // $0.01 / 1K tokens
+	"gpt-4.1-nano":            0.05,  // $0.10 / 1M tokens
-	"gpt-4-turbo":             5,     // $0.01 / 1K tokens
+	// GPT-4o series
-	"gpt-4-turbo-2024-04-09":  5,     // $0.01 / 1K tokens
+	"gpt-4o":                  1.25,  // $2.50 / 1M tokens
-	"gpt-4o":                  2.5,   // $0.005 / 1K tokens
+	"gpt-4o-2024-05-13":       2.5,   // $5.00 / 1M tokens
-	"chatgpt-4o-latest":       2.5,   // $0.005 / 1K tokens
+	"gpt-4o-2024-08-06":       1.25,  // $2.50 / 1M tokens (updated)
-	"gpt-4o-2024-05-13":       2.5,   // $0.005 / 1K tokens
+	"gpt-4o-2024-11-20":       1.25,  // $2.50 / 1M tokens (updated)
-	"gpt-4o-2024-08-06":       1.25,  // $0.0025 / 1K tokens
+	"gpt-4o-audio-preview":    1.25,  // $2.50 / 1M tokens
-	"gpt-4o-2024-11-20":       1.25,  // $0.0025 / 1K tokens
+	"gpt-4o-realtime-preview": 2.5,   // $5.00 / 1M tokens
-	"gpt-4o-mini":             0.075, // $0.00015 / 1K tokens
+	"gpt-4o-search-preview":   1.25,  // $2.50 / 1M tokens
-	"gpt-4o-mini-2024-07-18":  0.075, // $0.00015 / 1K tokens
+	"gpt-4o-mini":             0.075, // $0.15 / 1M tokens
-	"gpt-4-vision-preview":    5,     // $0.01 / 1K tokens
+	"gpt-4o-mini-2024-07-18":  0.075, // $0.15 / 1M tokens
-	"gpt-3.5-turbo":           0.25,  // $0.0005 / 1K tokens
+	"gpt-4o-mini-audio-preview": 0.075, // $0.15 / 1M tokens
-	"gpt-3.5-turbo-0301":      0.75,
+	"gpt-4o-mini-realtime-preview": 0.3, // $0.60 / 1M tokens
-	"gpt-3.5-turbo-0613":      0.75,
+	"gpt-4o-mini-search-preview": 0.075, // $0.15 / 1M tokens
-	"gpt-3.5-turbo-16k":       1.5, // $0.003 / 1K tokens
+	"chatgpt-4o-latest":       2.5,   // $5.00 / 1M tokens
-	"gpt-3.5-turbo-16k-0613":  1.5,
+	// GPT-4 Legacy
-	"gpt-3.5-turbo-instruct":  0.75, // $0.0015 / 1K tokens
+	"gpt-4":                   15,    // $30.00 / 1M tokens
-	"gpt-3.5-turbo-1106":      0.5,  // $0.001 / 1K tokens
+	"gpt-4-0314":              15,    // $30.00 / 1M tokens
-	"gpt-3.5-turbo-0125":      0.25, // $0.0005 / 1K tokens
+	"gpt-4-0613":              15,    // $30.00 / 1M tokens
-	"o1":                      7.5,  // $15.00 / 1M input tokens
+	"gpt-4-32k":               30,    // $60.00 / 1M tokens
-	"o1-2024-12-17":           7.5,
+	"gpt-4-32k-0314":          30,    // $60.00 / 1M tokens
-	"o1-preview":              7.5, // $15.00 / 1M input tokens
+	"gpt-4-32k-0613":          30,    // $60.00 / 1M tokens
-	"o1-preview-2024-09-12":   7.5,
+	"gpt-4-1106-preview":      5,     // $10.00 / 1M tokens
-	"o1-mini":                 1.5, // $3.00 / 1M input tokens
+	"gpt-4-0125-preview":      5,     // $10.00 / 1M tokens
-	"o1-mini-2024-09-12":      1.5,
+	"gpt-4-turbo-preview":     5,     // $10.00 / 1M tokens
-	"o3-mini":                 1.5, // $3.00 / 1M input tokens
+	"gpt-4-turbo":             5,     // $10.00 / 1M tokens
-	"o3-mini-2025-01-31":      1.5,
+	"gpt-4-turbo-2024-04-09":  5,     // $10.00 / 1M tokens
 	"gpt-4-vision-preview":    5,     // $10.00 / 1M tokens (1106-vision-preview)
 	// GPT-3.5 series
 	"gpt-3.5-turbo":           0.25,  // $0.50 / 1M tokens
 	"gpt-3.5-turbo-0301":      0.75,  // $1.50 / 1M tokens
 	"gpt-3.5-turbo-0613":      0.75,  // $1.50 / 1M tokens
 	"gpt-3.5-turbo-16k":       1.5,   // $3.00 / 1M tokens
 	"gpt-3.5-turbo-16k-0613":  1.5,   // $3.00 / 1M tokens
 	"gpt-3.5-turbo-instruct":  0.75,  // $1.50 / 1M tokens
 	"gpt-3.5-turbo-1106":      0.5,   // $1.00 / 1M tokens
 	"gpt-3.5-turbo-0125":      0.25,  // $0.50 / 1M tokens
 	// Reasoning models
 	"o1":                      7.5,   // $15.00 / 1M tokens
 	"o1-2024-12-17":           7.5,   // $15.00 / 1M tokens
 	"o1-pro":                  75,    // $150.00 / 1M tokens
 	"o1-preview":              7.5,   // $15.00 / 1M tokens
 	"o1-preview-2024-09-12":   7.5,   // $15.00 / 1M tokens
 	"o1-mini":                 0.55,  // $1.10 / 1M tokens
 	"o1-mini-2024-09-12":      0.55,  // $1.10 / 1M tokens
 	"o3":                      1.0,   // $2.00 / 1M tokens
 	"o3-pro":                  10,    // $20.00 / 1M tokens
 	"o3-deep-research":        5.0,   // $10.00 / 1M tokens
 	"o3-mini":                 0.55,  // $1.10 / 1M tokens
 	"o3-mini-2025-01-31":      0.55,  // $1.10 / 1M tokens
 	"o4-mini":                 0.55,  // $1.10 / 1M tokens
 	"o4-mini-deep-research":   1.0,   // $2.00 / 1M tokens
 	// Other models
 	"computer-use-preview":    1.5,   // $3.00 / 1M tokens
 	"codex-mini-latest":       0.75,  // $1.50 / 1M tokens
 	"gpt-image-1":             2.5,   // $5.00 / 1M tokens
 	// Transcription and TTS models
 	"gpt-4o-transcribe":       1.25,  // $2.50 / 1M text tokens
 	"gpt-4o-mini-transcribe":  0.625, // $1.25 / 1M text tokens  
 	"gpt-4o-mini-tts":         0.3,   // $0.60 / 1M text tokens
 	"davinci-002":             1,   // $0.002 / 1K tokens
 	"babbage-002":             0.2, // $0.0004 / 1K tokens
 	"text-ada-001":            0.2,
@ -88,17 +121,25 @@ var ModelRatio = map[string]float64{
 	"dall-e-2":                0.02 * USD, // $0.016 - $0.020 / image
 	"dall-e-3":                0.04 * USD, // $0.040 - $0.120 / image
 	// https://docs.anthropic.com/en/docs/about-claude/models
 	// Legacy models
 	"claude-instant-1.2":         0.8 / 1000 * USD,
 	"claude-2.0":                 8.0 / 1000 * USD,
 	"claude-2.1":                 8.0 / 1000 * USD,
-	"claude-3-haiku-20240307":    0.25 / 1000 * USD,
+	// Claude 3 series
-	"claude-3-5-haiku-20241022":  1.0 / 1000 * USD,
+	"claude-3-haiku-20240307":    0.25 / 1000 * USD,   // $0.25 / 1M tokens
-	"claude-3-5-haiku-latest":    1.0 / 1000 * USD,
+	"claude-3-sonnet-20240229":   3.0 / 1000 * USD,    // $3.00 / 1M tokens
-	"claude-3-sonnet-20240229":   3.0 / 1000 * USD,
+	"claude-3-opus-20240229":     15.0 / 1000 * USD,   // $15.00 / 1M tokens
-	"claude-3-5-sonnet-20240620": 3.0 / 1000 * USD,
+	// Claude 3.5 series
-	"claude-3-5-sonnet-20241022": 3.0 / 1000 * USD,
+	"claude-3-5-haiku-20241022":  0.8 / 1000 * USD,    // $0.80 / 1M tokens (Haiku 3.5)
-	"claude-3-5-sonnet-latest":   3.0 / 1000 * USD,
+	"claude-3-5-haiku-latest":    0.8 / 1000 * USD,    // $0.80 / 1M tokens (Haiku 3.5)
-	"claude-3-opus-20240229":     15.0 / 1000 * USD,
+	"claude-3-5-sonnet-20240620": 3.0 / 1000 * USD,    // $3.00 / 1M tokens
 	"claude-3-5-sonnet-20241022": 3.0 / 1000 * USD,    // $3.00 / 1M tokens
 	"claude-3-5-sonnet-latest":   3.0 / 1000 * USD,    // $3.00 / 1M tokens
 	// Claude 4 series
 	"claude-4-opus":              15.0 / 1000 * USD,   // $15.00 / 1M tokens (Opus 4)
 	"claude-4-sonnet":            3.0 / 1000 * USD,    // $3.00 / 1M tokens (Sonnet 4)
 	// Claude 4.1 series
 	"claude-4.1-opus":            15.0 / 1000 * USD,   // $15.00 / 1M tokens (Opus 4.1)
 	// https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
 	"ERNIE-4.0-8K":       0.120 * RMB,
 	"ERNIE-3.5-8K":       0.012 * RMB,
@ -135,6 +176,10 @@ var ModelRatio = map[string]float64{
 	"gemini-2.0-flash-lite-preview-02-05": 0.075 * MILLI_USD,
 	"gemini-2.0-flash-thinking-exp-01-21": 0.075 * MILLI_USD,
 	"gemini-2.0-pro-exp-02-05":            1.25 * MILLI_USD,
 	// Gemini 2.5 series (based on official pricing)
 	"gemini-2.5-pro":                      0.625, // $1.25 / 1M tokens (≤200k)
 	"gemini-2.5-flash":                    0.15,  // $0.30 / 1M tokens  
 	"gemini-2.5-flash-lite":               0.05,  // $0.10 / 1M tokens
 	"aqa":                                 1,
 	// https://open.bigmodel.cn/pricing
 	"glm-zero-preview": 0.01 * RMB,
@ -324,9 +369,20 @@ var ModelRatio = map[string]float64{
 	"step-2-16k":   0.038 / 1000 * RMB,
 	"step-1v-8k":   0.005 / 1000 * RMB,
 	"step-1v-32k":  0.015 / 1000 * RMB,
-	// aws llama3 https://aws.amazon.com/cn/bedrock/pricing/
+	// aws bedrock https://aws.amazon.com/cn/bedrock/pricing/
 	"llama3-8b-8192(33)":  0.0003 / 0.002,  // $0.0003 / 1K tokens
 	"llama3-70b-8192(33)": 0.00265 / 0.002, // $0.00265 / 1K tokens
 	// AWS Claude models on Bedrock (channel type 37)
 	"claude-instant-1.2(37)":         0.0008 / 0.002,  // $0.0008 / 1K tokens
 	"claude-2.0(37)":                 0.008 / 0.002,   // $0.008 / 1K tokens
 	"claude-2.1(37)":                 0.008 / 0.002,   // $0.008 / 1K tokens
 	"claude-3-haiku-20240307(37)":    0.00025 / 0.002, // $0.00025 / 1K tokens
 	"claude-3-sonnet-20240229(37)":   0.003 / 0.002,   // $0.003 / 1K tokens
 	"claude-3-opus-20240229(37)":     0.015 / 0.002,   // $0.015 / 1K tokens
 	"claude-3-5-sonnet-20240620(37)": 0.003 / 0.002,   // $0.003 / 1K tokens
 	"claude-3-5-sonnet-20241022(37)": 0.003 / 0.002,   // $0.003 / 1K tokens
 	"claude-3-5-sonnet-latest(37)":   0.003 / 0.002,   // $0.003 / 1K tokens
 	"claude-3-5-haiku-20241022(37)":  0.001 / 0.002,   // $0.001 / 1K tokens
 	// https://cohere.com/pricing
 	"command":               0.5,
 	"command-nightly":       0.5,
@ -335,8 +391,10 @@ var ModelRatio = map[string]float64{
 	"command-r":             0.5 / 1000 * USD,
 	"command-r-plus":        3.0 / 1000 * USD,
 	// https://platform.deepseek.com/api-docs/pricing/
-	"deepseek-chat":     0.14 * MILLI_USD,
+	"deepseek-chat":     0.14 * MILLI_USD,  // $0.14 / 1M tokens
-	"deepseek-reasoner": 0.55 * MILLI_USD,
+	"deepseek-reasoner": 0.55 * MILLI_USD,  // $0.55 / 1M tokens
 	"deepseek-v3":       0.10 * MILLI_USD,  // $0.10 / 1M tokens
 	"deepseek-r1":       0.55 * MILLI_USD,  // $0.55 / 1M tokens (same as reasoner)
 	// https://www.deepl.com/pro?cta=header-prices
 	"deepl-zh": 25.0 / 1000 * USD,
 	"deepl-en": 25.0 / 1000 * USD,
@ -622,14 +680,108 @@ var ModelRatio = map[string]float64{
 }
 var CompletionRatio = map[string]float64{
 	// OpenAI models (output/input ratio from Standard pricing)
 	// GPT-5 series
 	"gpt-5":                   10.00 / 1.25,  // $10.00 output / $1.25 input = 8x
 	"gpt-5-mini":              2.00 / 0.25,   // $2.00 output / $0.25 input = 8x
 	"gpt-5-nano":              0.40 / 0.05,   // $0.40 output / $0.05 input = 8x
 	"gpt-5-chat-latest":       10.00 / 1.25,  // $10.00 output / $1.25 input = 8x
 	// GPT-4.1 series
 	"gpt-4.1":                 8.00 / 2.00,   // $8.00 output / $2.00 input = 4x
 	"gpt-4.1-mini":            1.60 / 0.40,   // $1.60 output / $0.40 input = 4x
 	"gpt-4.1-nano":            0.40 / 0.10,   // $0.40 output / $0.10 input = 4x
 	// GPT-4o series
 	"gpt-4o":                  10.00 / 2.50,  // $10.00 output / $2.50 input = 4x
 	"gpt-4o-2024-05-13":       15.00 / 5.00,  // $15.00 output / $5.00 input = 3x
 	"gpt-4o-2024-08-06":       10.00 / 2.50,  // $10.00 output / $2.50 input = 4x
 	"gpt-4o-2024-11-20":       10.00 / 2.50,  // $10.00 output / $2.50 input = 4x
 	"gpt-4o-audio-preview":    10.00 / 2.50,  // $10.00 output / $2.50 input = 4x
 	"gpt-4o-realtime-preview": 20.00 / 5.00,  // $20.00 output / $5.00 input = 4x
 	"gpt-4o-search-preview":   10.00 / 2.50,  // $10.00 output / $2.50 input = 4x
 	"gpt-4o-mini":             0.60 / 0.15,   // $0.60 output / $0.15 input = 4x
 	"gpt-4o-mini-2024-07-18":  0.60 / 0.15,   // $0.60 output / $0.15 input = 4x
 	"gpt-4o-mini-audio-preview": 0.60 / 0.15, // $0.60 output / $0.15 input = 4x
 	"gpt-4o-mini-realtime-preview": 2.40 / 0.60, // $2.40 output / $0.60 input = 4x
 	"gpt-4o-mini-search-preview": 0.60 / 0.15, // $0.60 output / $0.15 input = 4x
 	"chatgpt-4o-latest":       15.00 / 5.00,  // $15.00 output / $5.00 input = 3x
 	// GPT-4 Legacy
 	"gpt-4-1106-preview":      30.00 / 10.00, // $30.00 output / $10.00 input = 3x
 	"gpt-4-0125-preview":      30.00 / 10.00, // $30.00 output / $10.00 input = 3x
 	"gpt-4-turbo-preview":     30.00 / 10.00, // $30.00 output / $10.00 input = 3x
 	"gpt-4-turbo":             30.00 / 10.00, // $30.00 output / $10.00 input = 3x
 	"gpt-4-turbo-2024-04-09":  30.00 / 10.00, // $30.00 output / $10.00 input = 3x
 	"gpt-4":                   60.00 / 30.00, // $60.00 output / $30.00 input = 2x
 	"gpt-4-0314":              60.00 / 30.00, // $60.00 output / $30.00 input = 2x
 	"gpt-4-0613":              60.00 / 30.00, // $60.00 output / $30.00 input = 2x
 	"gpt-4-32k":               120.00 / 60.00, // $120.00 output / $60.00 input = 2x
 	"gpt-4-32k-0314":          120.00 / 60.00, // $120.00 output / $60.00 input = 2x
 	"gpt-4-32k-0613":          120.00 / 60.00, // $120.00 output / $60.00 input = 2x
 	// GPT-3.5 series
 	"gpt-3.5-turbo":           1.50 / 0.50,   // $1.50 output / $0.50 input = 3x
 	"gpt-3.5-turbo-0301":      2.00 / 1.50,   // $2.00 output / $1.50 input = 1.33x
 	"gpt-3.5-turbo-0613":      2.00 / 1.50,   // $2.00 output / $1.50 input = 1.33x
 	"gpt-3.5-turbo-16k-0613":  4.00 / 3.00,   // $4.00 output / $3.00 input = 1.33x
 	"gpt-3.5-turbo-instruct":  2.00 / 1.50,   // $2.00 output / $1.50 input = 1.33x
 	"gpt-3.5-turbo-1106":      2.00 / 1.00,   // $2.00 output / $1.00 input = 2x
 	"gpt-3.5-turbo-0125":      1.50 / 0.50,   // $1.50 output / $0.50 input = 3x
 	// Reasoning models
 	"o1":                      60.00 / 15.00,  // $60.00 output / $15.00 input = 4x
 	"o1-2024-12-17":           60.00 / 15.00,  // $60.00 output / $15.00 input = 4x
 	"o1-pro":                  600.00 / 150.00, // $600.00 output / $150.00 input = 4x
 	"o1-preview":              60.00 / 15.00,  // $60.00 output / $15.00 input = 4x
 	"o1-preview-2024-09-12":   60.00 / 15.00,  // $60.00 output / $15.00 input = 4x
 	"o1-mini":                 4.40 / 1.10,    // $4.40 output / $1.10 input = 4x
 	"o1-mini-2024-09-12":      4.40 / 1.10,    // $4.40 output / $1.10 input = 4x
 	"o3":                      8.00 / 2.00,    // $8.00 output / $2.00 input = 4x
 	"o3-pro":                  80.00 / 20.00,  // $80.00 output / $20.00 input = 4x
 	"o3-deep-research":        40.00 / 10.00,  // $40.00 output / $10.00 input = 4x
 	"o3-mini":                 4.40 / 1.10,    // $4.40 output / $1.10 input = 4x
 	"o3-mini-2025-01-31":      4.40 / 1.10,    // $4.40 output / $1.10 input = 4x
 	"o4-mini":                 4.40 / 1.10,    // $4.40 output / $1.10 input = 4x
 	"o4-mini-deep-research":   8.00 / 2.00,    // $8.00 output / $2.00 input = 4x
 	// Other models
 	"computer-use-preview":    12.00 / 3.00,   // $12.00 output / $3.00 input = 4x
 	"codex-mini-latest":       6.00 / 1.50,    // $6.00 output / $1.50 input = 4x
 	// Transcription models (text output)
 	"gpt-4o-transcribe":       10.00 / 2.50,   // $10.00 output / $2.50 input = 4x
 	"gpt-4o-mini-transcribe":  5.00 / 1.25,    // $5.00 output / $1.25 input = 4x
 	// Anthropic Claude models (5x output multiplier)
 	"claude-3-haiku-20240307":    1.25 / 0.25,  // $1.25 output / $0.25 input = 5x
 	"claude-3-sonnet-20240229":   15.0 / 3.0,   // $15.00 output / $3.00 input = 5x
 	"claude-3-opus-20240229":     75.0 / 15.0,  // $75.00 output / $15.00 input = 5x
 	"claude-3-5-haiku-20241022":  4.0 / 0.8,    // $4.00 output / $0.80 input = 5x
 	"claude-3-5-haiku-latest":    4.0 / 0.8,    // $4.00 output / $0.80 input = 5x
 	"claude-3-5-sonnet-20240620": 15.0 / 3.0,   // $15.00 output / $3.00 input = 5x
 	"claude-3-5-sonnet-20241022": 15.0 / 3.0,   // $15.00 output / $3.00 input = 5x
 	"claude-3-5-sonnet-latest":   15.0 / 3.0,   // $15.00 output / $3.00 input = 5x
 	"claude-4-opus":              75.0 / 15.0,  // $75.00 output / $15.00 input = 5x
 	"claude-4-sonnet":            15.0 / 3.0,   // $15.00 output / $3.00 input = 5x
 	"claude-4.1-opus":            75.0 / 15.0,  // $75.00 output / $15.00 input = 5x
 	// aws llama3
-	"llama3-8b-8192(33)":  0.0006 / 0.0003,
+	"llama3-8b-8192(33)":      0.0006 / 0.0003,
-	"llama3-70b-8192(33)": 0.0035 / 0.00265,
+	"llama3-70b-8192(33)":     0.0035 / 0.00265,
 	// Gemini 2.5 series (output/input ratios from official pricing)
 	"gemini-2.5-pro":                 10.00 / 1.25,      // $10.00 output / $1.25 input = 8x
 	"gemini-2.5-flash":               2.50 / 0.30,       // $2.50 output / $0.30 input = 8.33x
 	"gemini-2.5-flash-lite":          0.40 / 0.10,       // $0.40 output / $0.10 input = 4x
 	// AWS Claude models (output/input ratios)
 	"claude-instant-1.2(37)":         0.0024 / 0.0008,  // 3x
 	"claude-2.0(37)":                 0.024 / 0.008,    // 3x
 	"claude-2.1(37)":                 0.024 / 0.008,    // 3x
 	"claude-3-haiku-20240307(37)":    0.00125 / 0.00025, // 5x
 	"claude-3-sonnet-20240229(37)":   0.015 / 0.003,    // 5x
 	"claude-3-opus-20240229(37)":     0.075 / 0.015,    // 5x
 	"claude-3-5-sonnet-20240620(37)": 0.015 / 0.003,    // 5x
 	"claude-3-5-sonnet-20241022(37)": 0.015 / 0.003,    // 5x
 	"claude-3-5-sonnet-latest(37)":   0.015 / 0.003,    // 5x
 	"claude-3-5-haiku-20241022(37)":  0.004 / 0.001,    // 4x
 	// whisper
-	"whisper-1": 0, // only count input tokens
+	"whisper-1":               0, // only count input tokens
 	// deepseek
-	"deepseek-chat":     0.28 / 0.14,
+	"deepseek-chat":           0.28 / 0.14,  // $0.28 output / $0.14 input = 2x
-	"deepseek-reasoner": 2.19 / 0.55,
+	"deepseek-reasoner":       2.19 / 0.55,  // $2.19 output / $0.55 input = ~4x
 	"deepseek-v3":             0.20 / 0.10,  // $0.20 output / $0.10 input = 2x  
 	"deepseek-r1":             2.19 / 0.55,  // $2.19 output / $0.55 input = ~4x (same as reasoner)
 }
 var (