mirror of
https://github.com/songquanpeng/one-api.git
synced 2025-09-18 09:36:37 +08:00
fix: update model ratios and add completion ratio calculations
This commit is contained in:
parent
2f32545ab0
commit
f3300f08e2
@ -272,21 +272,21 @@ var ModelRatio = map[string]float64{
|
||||
"deepseek-r1-distill-qwen-1.5b": 0.001 * KiloRmb,
|
||||
"deepseek-r1-distill-qwen-7b": 0.0005 * KiloRmb,
|
||||
"deepseek-r1-distill-qwen-14b": 0.001 * KiloRmb,
|
||||
"deepseek-r1-distill-qwen-32b": 0.002 * KiloRmb,
|
||||
"deepseek-r1-distill-llama-8b": 0.0005 * KiloRmb,
|
||||
"deepseek-r1-distill-llama-70b": 0.004 * KiloRmb,
|
||||
"SparkDesk": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v3.1-128K": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v3.5-32K": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v4.0": 1.2858, // ¥0.018 / 1k tokens
|
||||
"360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens
|
||||
"embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens
|
||||
"embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens
|
||||
"semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens
|
||||
// "deepseek-r1-distill-qwen-32b": 0.002 * KiloRmb,
|
||||
"deepseek-r1-distill-llama-8b": 0.0005 * KiloRmb,
|
||||
// "deepseek-r1-distill-llama-70b": 0.004 * KiloRmb,
|
||||
"SparkDesk": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v3.1-128K": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v3.5-32K": 1.2858, // ¥0.018 / 1k tokens
|
||||
"SparkDesk-v4.0": 1.2858, // ¥0.018 / 1k tokens
|
||||
"360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens
|
||||
"embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens
|
||||
"embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens
|
||||
"semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens
|
||||
// https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
|
||||
"hunyuan-turbo": 0.015 * KiloRmb,
|
||||
"hunyuan-large": 0.004 * KiloRmb,
|
||||
@ -321,22 +321,30 @@ var ModelRatio = map[string]float64{
|
||||
"mistral-medium-latest": 2.7 * MilliTokensUsd,
|
||||
"mistral-large-latest": 8.0 * MilliTokensUsd,
|
||||
"mistral-embed": 0.1 * MilliTokensUsd,
|
||||
// https://wow.groq.com/#:~:text=inquiries%C2%A0here.-,Model,-Current%20Speed
|
||||
"gemma-7b-it": 0.07 / 1000 * MilliTokensUsd,
|
||||
"gemma2-9b-it": 0.20 / 1000 * MilliTokensUsd,
|
||||
"llama-3.1-70b-versatile": 0.59 / 1000 * MilliTokensUsd,
|
||||
"llama-3.1-8b-instant": 0.05 / 1000 * MilliTokensUsd,
|
||||
"llama-3.2-11b-text-preview": 0.05 / 1000 * MilliTokensUsd,
|
||||
"llama-3.2-11b-vision-preview": 0.05 / 1000 * MilliTokensUsd,
|
||||
"llama-3.2-1b-preview": 0.05 / 1000 * MilliTokensUsd,
|
||||
"llama-3.2-3b-preview": 0.05 / 1000 * MilliTokensUsd,
|
||||
"llama-3.2-90b-text-preview": 0.59 / 1000 * MilliTokensUsd,
|
||||
"llama-guard-3-8b": 0.05 / 1000 * MilliTokensUsd,
|
||||
"llama3-70b-8192": 0.59 / 1000 * MilliTokensUsd,
|
||||
"llama3-8b-8192": 0.05 / 1000 * MilliTokensUsd,
|
||||
"llama3-groq-70b-8192-tool-use-preview": 0.89 / 1000 * MilliTokensUsd,
|
||||
"llama3-groq-8b-8192-tool-use-preview": 0.19 / 1000 * MilliTokensUsd,
|
||||
"mixtral-8x7b-32768": 0.24 / 1000 * MilliTokensUsd,
|
||||
// -------------------------------------
|
||||
// https://groq.com/pricing/
|
||||
// -------------------------------------
|
||||
"gemma2-9b-it": 0.20 * MilliTokensUsd,
|
||||
"llama-3.1-8b-instant": 0.05 * MilliTokensUsd,
|
||||
"llama-3.2-11b-text-preview": 0.18 * MilliTokensUsd,
|
||||
"llama-3.2-11b-vision-preview": 0.18 * MilliTokensUsd,
|
||||
"llama-3.2-1b-preview": 0.04 * MilliTokensUsd,
|
||||
"llama-3.2-3b-preview": 0.06 * MilliTokensUsd,
|
||||
"llama-3.2-90b-text-preview": 0.90 * MilliTokensUsd,
|
||||
"llama-3.2-90b-vision-preview": 0.90 * MilliTokensUsd,
|
||||
"llama-3.3-70b-versatile": 0.59 * MilliTokensUsd,
|
||||
"llama-guard-3-8b": 0.20 * MilliTokensUsd,
|
||||
"llama3-70b-8192": 0.59 * MilliTokensUsd,
|
||||
"llama3-8b-8192": 0.05 * MilliTokensUsd,
|
||||
"llama3-groq-70b-8192-tool-use-preview": 0.59 * MilliTokensUsd,
|
||||
"llama3-groq-8b-8192-tool-use-preview": 0.05 * MilliTokensUsd,
|
||||
"mixtral-8x7b-32768": 0.24 * MilliTokensUsd,
|
||||
"whisper-large-v3": 0.111 * MilliTokensUsd,
|
||||
"whisper-large-v3-turbo": 0.04 * MilliTokensUsd,
|
||||
"distil-whisper-large-v3-en": 0.02 * MilliTokensUsd,
|
||||
"deepseek-r1-distill-qwen-32b": 0.69 * MilliTokensUsd,
|
||||
"deepseek-r1-distill-llama-70b-specdec": 0.75 * MilliTokensUsd,
|
||||
"deepseek-r1-distill-llama-70b": 0.75 * MilliTokensUsd,
|
||||
// https://platform.lingyiwanwu.com/docs#-计费单元
|
||||
"yi-34b-chat-0205": 2.5 * MilliRmb,
|
||||
"yi-34b-chat-200k": 12.0 * MilliRmb,
|
||||
@ -651,6 +659,45 @@ var ModelRatio = map[string]float64{
|
||||
"xwin-lm/xwin-lm-70b": 1.875,
|
||||
}
|
||||
|
||||
// CompletionRatio is the price ratio between completion tokens and prompt tokens
|
||||
var CompletionRatio = map[string]float64{
|
||||
// aws llama3
|
||||
"llama3-8b-8192(33)": 0.0006 / 0.0003,
|
||||
"llama3-70b-8192(33)": 0.0035 / 0.00265,
|
||||
// whisper
|
||||
"whisper-1": 0, // only count input tokens
|
||||
"whisper-large-v3": 0, // only count input tokens
|
||||
"whisper-large-v3-turbo": 0, // only count input tokens
|
||||
"distil-whisper-large-v3-en": 0, // only count input tokens
|
||||
// deepseek
|
||||
"deepseek-chat": 1.1 / 0.27,
|
||||
"deepseek-reasoner": 2.19 / 0.55,
|
||||
// openrouter
|
||||
"deepseek/deepseek-chat": 1,
|
||||
"deepseek/deepseek-r1": 1,
|
||||
// -------------------------------------
|
||||
// groq
|
||||
// -------------------------------------
|
||||
"llama-3.3-70b-versatile": 0.79 / 0.59,
|
||||
"llama-3.1-8b-instant": 0.08 / 0.05,
|
||||
"llama3-70b-8192": 0.79 / 0.59,
|
||||
"llama3-8b-8192": 0.08 / 0.05,
|
||||
"gemma2-9b-it": 1.0,
|
||||
"llama-3.2-11b-text-preview": 1.0,
|
||||
"llama-3.2-11b-vision-preview": 1.0,
|
||||
"llama-3.2-1b-preview": 1.0,
|
||||
"llama-3.2-3b-preview": 1.0,
|
||||
"llama-3.2-90b-text-preview": 1.0,
|
||||
"llama-3.2-90b-vision-preview": 1.0,
|
||||
"llama-guard-3-8b": 1.0,
|
||||
"llama3-groq-70b-8192-tool-use-preview": 0.79 / 0.59,
|
||||
"llama3-groq-8b-8192-tool-use-preview": 0.08 / 0.05,
|
||||
"mixtral-8x7b-32768": 1.0,
|
||||
"deepseek-r1-distill-qwen-32b": 1,
|
||||
"deepseek-r1-distill-llama-70b-specdec": 0.99 / 0.75,
|
||||
"deepseek-r1-distill-llama-70b": 0.99 / 0.75,
|
||||
}
|
||||
|
||||
// AudioRatio represents the price ratio between audio tokens and text tokens
|
||||
var AudioRatio = map[string]float64{
|
||||
"gpt-4o-audio-preview": 16,
|
||||
@ -721,20 +768,6 @@ func GetAudioPromptTokensPerSecond(actualModelName string) float64 {
|
||||
return v
|
||||
}
|
||||
|
||||
var CompletionRatio = map[string]float64{
|
||||
// aws llama3
|
||||
"llama3-8b-8192(33)": 0.0006 / 0.0003,
|
||||
"llama3-70b-8192(33)": 0.0035 / 0.00265,
|
||||
// whisper
|
||||
"whisper-1": 0, // only count input tokens
|
||||
// deepseek
|
||||
"deepseek-chat": 1.1 / 0.27,
|
||||
"deepseek-reasoner": 2.19 / 0.55,
|
||||
// openrouter
|
||||
"deepseek/deepseek-chat": 1,
|
||||
"deepseek/deepseek-r1": 1,
|
||||
}
|
||||
|
||||
var (
|
||||
DefaultModelRatio map[string]float64
|
||||
DefaultCompletionRatio map[string]float64
|
||||
|
Loading…
Reference in New Issue
Block a user