fix: update model ratios and add completion ratio calculations

This commit is contained in:
Laisky.Cai 2025-03-14 03:07:58 +00:00
parent 2f32545ab0
commit f3300f08e2

View File

@ -272,21 +272,21 @@ var ModelRatio = map[string]float64{
"deepseek-r1-distill-qwen-1.5b": 0.001 * KiloRmb, "deepseek-r1-distill-qwen-1.5b": 0.001 * KiloRmb,
"deepseek-r1-distill-qwen-7b": 0.0005 * KiloRmb, "deepseek-r1-distill-qwen-7b": 0.0005 * KiloRmb,
"deepseek-r1-distill-qwen-14b": 0.001 * KiloRmb, "deepseek-r1-distill-qwen-14b": 0.001 * KiloRmb,
"deepseek-r1-distill-qwen-32b": 0.002 * KiloRmb, // "deepseek-r1-distill-qwen-32b": 0.002 * KiloRmb,
"deepseek-r1-distill-llama-8b": 0.0005 * KiloRmb, "deepseek-r1-distill-llama-8b": 0.0005 * KiloRmb,
"deepseek-r1-distill-llama-70b": 0.004 * KiloRmb, // "deepseek-r1-distill-llama-70b": 0.004 * KiloRmb,
"SparkDesk": 1.2858, // ¥0.018 / 1k tokens "SparkDesk": 1.2858, // ¥0.018 / 1k tokens
"SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens "SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens
"SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens "SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens
"SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens "SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens
"SparkDesk-v3.1-128K": 1.2858, // ¥0.018 / 1k tokens "SparkDesk-v3.1-128K": 1.2858, // ¥0.018 / 1k tokens
"SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens "SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens
"SparkDesk-v3.5-32K": 1.2858, // ¥0.018 / 1k tokens "SparkDesk-v3.5-32K": 1.2858, // ¥0.018 / 1k tokens
"SparkDesk-v4.0": 1.2858, // ¥0.018 / 1k tokens "SparkDesk-v4.0": 1.2858, // ¥0.018 / 1k tokens
"360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens "360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens
"embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens "embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens
"embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens "embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens
"semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens "semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens
// https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0 // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
"hunyuan-turbo": 0.015 * KiloRmb, "hunyuan-turbo": 0.015 * KiloRmb,
"hunyuan-large": 0.004 * KiloRmb, "hunyuan-large": 0.004 * KiloRmb,
@ -321,22 +321,30 @@ var ModelRatio = map[string]float64{
"mistral-medium-latest": 2.7 * MilliTokensUsd, "mistral-medium-latest": 2.7 * MilliTokensUsd,
"mistral-large-latest": 8.0 * MilliTokensUsd, "mistral-large-latest": 8.0 * MilliTokensUsd,
"mistral-embed": 0.1 * MilliTokensUsd, "mistral-embed": 0.1 * MilliTokensUsd,
// https://wow.groq.com/#:~:text=inquiries%C2%A0here.-,Model,-Current%20Speed // -------------------------------------
"gemma-7b-it": 0.07 / 1000 * MilliTokensUsd, // https://groq.com/pricing/
"gemma2-9b-it": 0.20 / 1000 * MilliTokensUsd, // -------------------------------------
"llama-3.1-70b-versatile": 0.59 / 1000 * MilliTokensUsd, "gemma2-9b-it": 0.20 * MilliTokensUsd,
"llama-3.1-8b-instant": 0.05 / 1000 * MilliTokensUsd, "llama-3.1-8b-instant": 0.05 * MilliTokensUsd,
"llama-3.2-11b-text-preview": 0.05 / 1000 * MilliTokensUsd, "llama-3.2-11b-text-preview": 0.18 * MilliTokensUsd,
"llama-3.2-11b-vision-preview": 0.05 / 1000 * MilliTokensUsd, "llama-3.2-11b-vision-preview": 0.18 * MilliTokensUsd,
"llama-3.2-1b-preview": 0.05 / 1000 * MilliTokensUsd, "llama-3.2-1b-preview": 0.04 * MilliTokensUsd,
"llama-3.2-3b-preview": 0.05 / 1000 * MilliTokensUsd, "llama-3.2-3b-preview": 0.06 * MilliTokensUsd,
"llama-3.2-90b-text-preview": 0.59 / 1000 * MilliTokensUsd, "llama-3.2-90b-text-preview": 0.90 * MilliTokensUsd,
"llama-guard-3-8b": 0.05 / 1000 * MilliTokensUsd, "llama-3.2-90b-vision-preview": 0.90 * MilliTokensUsd,
"llama3-70b-8192": 0.59 / 1000 * MilliTokensUsd, "llama-3.3-70b-versatile": 0.59 * MilliTokensUsd,
"llama3-8b-8192": 0.05 / 1000 * MilliTokensUsd, "llama-guard-3-8b": 0.20 * MilliTokensUsd,
"llama3-groq-70b-8192-tool-use-preview": 0.89 / 1000 * MilliTokensUsd, "llama3-70b-8192": 0.59 * MilliTokensUsd,
"llama3-groq-8b-8192-tool-use-preview": 0.19 / 1000 * MilliTokensUsd, "llama3-8b-8192": 0.05 * MilliTokensUsd,
"mixtral-8x7b-32768": 0.24 / 1000 * MilliTokensUsd, "llama3-groq-70b-8192-tool-use-preview": 0.59 * MilliTokensUsd,
"llama3-groq-8b-8192-tool-use-preview": 0.05 * MilliTokensUsd,
"mixtral-8x7b-32768": 0.24 * MilliTokensUsd,
"whisper-large-v3": 0.111 * MilliTokensUsd,
"whisper-large-v3-turbo": 0.04 * MilliTokensUsd,
"distil-whisper-large-v3-en": 0.02 * MilliTokensUsd,
"deepseek-r1-distill-qwen-32b": 0.69 * MilliTokensUsd,
"deepseek-r1-distill-llama-70b-specdec": 0.75 * MilliTokensUsd,
"deepseek-r1-distill-llama-70b": 0.75 * MilliTokensUsd,
// https://platform.lingyiwanwu.com/docs#-计费单元 // https://platform.lingyiwanwu.com/docs#-计费单元
"yi-34b-chat-0205": 2.5 * MilliRmb, "yi-34b-chat-0205": 2.5 * MilliRmb,
"yi-34b-chat-200k": 12.0 * MilliRmb, "yi-34b-chat-200k": 12.0 * MilliRmb,
@ -651,6 +659,45 @@ var ModelRatio = map[string]float64{
"xwin-lm/xwin-lm-70b": 1.875, "xwin-lm/xwin-lm-70b": 1.875,
} }
// CompletionRatio is the price ratio between completion tokens and prompt tokens
var CompletionRatio = map[string]float64{
// aws llama3
"llama3-8b-8192(33)": 0.0006 / 0.0003,
"llama3-70b-8192(33)": 0.0035 / 0.00265,
// whisper
"whisper-1": 0, // only count input tokens
"whisper-large-v3": 0, // only count input tokens
"whisper-large-v3-turbo": 0, // only count input tokens
"distil-whisper-large-v3-en": 0, // only count input tokens
// deepseek
"deepseek-chat": 1.1 / 0.27,
"deepseek-reasoner": 2.19 / 0.55,
// openrouter
"deepseek/deepseek-chat": 1,
"deepseek/deepseek-r1": 1,
// -------------------------------------
// groq
// -------------------------------------
"llama-3.3-70b-versatile": 0.79 / 0.59,
"llama-3.1-8b-instant": 0.08 / 0.05,
"llama3-70b-8192": 0.79 / 0.59,
"llama3-8b-8192": 0.08 / 0.05,
"gemma2-9b-it": 1.0,
"llama-3.2-11b-text-preview": 1.0,
"llama-3.2-11b-vision-preview": 1.0,
"llama-3.2-1b-preview": 1.0,
"llama-3.2-3b-preview": 1.0,
"llama-3.2-90b-text-preview": 1.0,
"llama-3.2-90b-vision-preview": 1.0,
"llama-guard-3-8b": 1.0,
"llama3-groq-70b-8192-tool-use-preview": 0.79 / 0.59,
"llama3-groq-8b-8192-tool-use-preview": 0.08 / 0.05,
"mixtral-8x7b-32768": 1.0,
"deepseek-r1-distill-qwen-32b": 1,
"deepseek-r1-distill-llama-70b-specdec": 0.99 / 0.75,
"deepseek-r1-distill-llama-70b": 0.99 / 0.75,
}
// AudioRatio represents the price ratio between audio tokens and text tokens // AudioRatio represents the price ratio between audio tokens and text tokens
var AudioRatio = map[string]float64{ var AudioRatio = map[string]float64{
"gpt-4o-audio-preview": 16, "gpt-4o-audio-preview": 16,
@ -721,20 +768,6 @@ func GetAudioPromptTokensPerSecond(actualModelName string) float64 {
return v return v
} }
var CompletionRatio = map[string]float64{
// aws llama3
"llama3-8b-8192(33)": 0.0006 / 0.0003,
"llama3-70b-8192(33)": 0.0035 / 0.00265,
// whisper
"whisper-1": 0, // only count input tokens
// deepseek
"deepseek-chat": 1.1 / 0.27,
"deepseek-reasoner": 2.19 / 0.55,
// openrouter
"deepseek/deepseek-chat": 1,
"deepseek/deepseek-r1": 1,
}
var ( var (
DefaultModelRatio map[string]float64 DefaultModelRatio map[string]float64
DefaultCompletionRatio map[string]float64 DefaultCompletionRatio map[string]float64