From f3300f08e25e212f1b32ae1f678eb7ec2dec6a8c Mon Sep 17 00:00:00 2001 From: "Laisky.Cai" Date: Fri, 14 Mar 2025 03:07:58 +0000 Subject: [PATCH] fix: update model ratios and add completion ratio calculations --- relay/billing/ratio/model.go | 123 ++++++++++++++++++++++------------- 1 file changed, 78 insertions(+), 45 deletions(-) diff --git a/relay/billing/ratio/model.go b/relay/billing/ratio/model.go index cebe38cc..617a9c76 100644 --- a/relay/billing/ratio/model.go +++ b/relay/billing/ratio/model.go @@ -272,21 +272,21 @@ var ModelRatio = map[string]float64{ "deepseek-r1-distill-qwen-1.5b": 0.001 * KiloRmb, "deepseek-r1-distill-qwen-7b": 0.0005 * KiloRmb, "deepseek-r1-distill-qwen-14b": 0.001 * KiloRmb, - "deepseek-r1-distill-qwen-32b": 0.002 * KiloRmb, - "deepseek-r1-distill-llama-8b": 0.0005 * KiloRmb, - "deepseek-r1-distill-llama-70b": 0.004 * KiloRmb, - "SparkDesk": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.1-128K": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.5-32K": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v4.0": 1.2858, // ¥0.018 / 1k tokens - "360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens - "embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens - "embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens - "semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens + // "deepseek-r1-distill-qwen-32b": 0.002 * KiloRmb, + "deepseek-r1-distill-llama-8b": 0.0005 * KiloRmb, + // "deepseek-r1-distill-llama-70b": 0.004 * KiloRmb, + "SparkDesk": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.1-128K": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.5-32K": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v4.0": 1.2858, // ¥0.018 / 1k tokens + "360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens + "embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens + "embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens + "semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0 "hunyuan-turbo": 0.015 * KiloRmb, "hunyuan-large": 0.004 * KiloRmb, @@ -321,22 +321,30 @@ var ModelRatio = map[string]float64{ "mistral-medium-latest": 2.7 * MilliTokensUsd, "mistral-large-latest": 8.0 * MilliTokensUsd, "mistral-embed": 0.1 * MilliTokensUsd, - // https://wow.groq.com/#:~:text=inquiries%C2%A0here.-,Model,-Current%20Speed - "gemma-7b-it": 0.07 / 1000 * MilliTokensUsd, - "gemma2-9b-it": 0.20 / 1000 * MilliTokensUsd, - "llama-3.1-70b-versatile": 0.59 / 1000 * MilliTokensUsd, - "llama-3.1-8b-instant": 0.05 / 1000 * MilliTokensUsd, - "llama-3.2-11b-text-preview": 0.05 / 1000 * MilliTokensUsd, - "llama-3.2-11b-vision-preview": 0.05 / 1000 * MilliTokensUsd, - "llama-3.2-1b-preview": 0.05 / 1000 * MilliTokensUsd, - "llama-3.2-3b-preview": 0.05 / 1000 * MilliTokensUsd, - "llama-3.2-90b-text-preview": 0.59 / 1000 * MilliTokensUsd, - "llama-guard-3-8b": 0.05 / 1000 * MilliTokensUsd, - "llama3-70b-8192": 0.59 / 1000 * MilliTokensUsd, - "llama3-8b-8192": 0.05 / 1000 * MilliTokensUsd, - "llama3-groq-70b-8192-tool-use-preview": 0.89 / 1000 * MilliTokensUsd, - "llama3-groq-8b-8192-tool-use-preview": 0.19 / 1000 * MilliTokensUsd, - "mixtral-8x7b-32768": 0.24 / 1000 * MilliTokensUsd, + // ------------------------------------- + // https://groq.com/pricing/ + // ------------------------------------- + "gemma2-9b-it": 0.20 * MilliTokensUsd, + "llama-3.1-8b-instant": 0.05 * MilliTokensUsd, + "llama-3.2-11b-text-preview": 0.18 * MilliTokensUsd, + "llama-3.2-11b-vision-preview": 0.18 * MilliTokensUsd, + "llama-3.2-1b-preview": 0.04 * MilliTokensUsd, + "llama-3.2-3b-preview": 0.06 * MilliTokensUsd, + "llama-3.2-90b-text-preview": 0.90 * MilliTokensUsd, + "llama-3.2-90b-vision-preview": 0.90 * MilliTokensUsd, + "llama-3.3-70b-versatile": 0.59 * MilliTokensUsd, + "llama-guard-3-8b": 0.20 * MilliTokensUsd, + "llama3-70b-8192": 0.59 * MilliTokensUsd, + "llama3-8b-8192": 0.05 * MilliTokensUsd, + "llama3-groq-70b-8192-tool-use-preview": 0.59 * MilliTokensUsd, + "llama3-groq-8b-8192-tool-use-preview": 0.05 * MilliTokensUsd, + "mixtral-8x7b-32768": 0.24 * MilliTokensUsd, + "whisper-large-v3": 0.111 * MilliTokensUsd, + "whisper-large-v3-turbo": 0.04 * MilliTokensUsd, + "distil-whisper-large-v3-en": 0.02 * MilliTokensUsd, + "deepseek-r1-distill-qwen-32b": 0.69 * MilliTokensUsd, + "deepseek-r1-distill-llama-70b-specdec": 0.75 * MilliTokensUsd, + "deepseek-r1-distill-llama-70b": 0.75 * MilliTokensUsd, // https://platform.lingyiwanwu.com/docs#-计费单元 "yi-34b-chat-0205": 2.5 * MilliRmb, "yi-34b-chat-200k": 12.0 * MilliRmb, @@ -651,6 +659,45 @@ var ModelRatio = map[string]float64{ "xwin-lm/xwin-lm-70b": 1.875, } +// CompletionRatio is the price ratio between completion tokens and prompt tokens +var CompletionRatio = map[string]float64{ + // aws llama3 + "llama3-8b-8192(33)": 0.0006 / 0.0003, + "llama3-70b-8192(33)": 0.0035 / 0.00265, + // whisper + "whisper-1": 0, // only count input tokens + "whisper-large-v3": 0, // only count input tokens + "whisper-large-v3-turbo": 0, // only count input tokens + "distil-whisper-large-v3-en": 0, // only count input tokens + // deepseek + "deepseek-chat": 1.1 / 0.27, + "deepseek-reasoner": 2.19 / 0.55, + // openrouter + "deepseek/deepseek-chat": 1, + "deepseek/deepseek-r1": 1, + // ------------------------------------- + // groq + // ------------------------------------- + "llama-3.3-70b-versatile": 0.79 / 0.59, + "llama-3.1-8b-instant": 0.08 / 0.05, + "llama3-70b-8192": 0.79 / 0.59, + "llama3-8b-8192": 0.08 / 0.05, + "gemma2-9b-it": 1.0, + "llama-3.2-11b-text-preview": 1.0, + "llama-3.2-11b-vision-preview": 1.0, + "llama-3.2-1b-preview": 1.0, + "llama-3.2-3b-preview": 1.0, + "llama-3.2-90b-text-preview": 1.0, + "llama-3.2-90b-vision-preview": 1.0, + "llama-guard-3-8b": 1.0, + "llama3-groq-70b-8192-tool-use-preview": 0.79 / 0.59, + "llama3-groq-8b-8192-tool-use-preview": 0.08 / 0.05, + "mixtral-8x7b-32768": 1.0, + "deepseek-r1-distill-qwen-32b": 1, + "deepseek-r1-distill-llama-70b-specdec": 0.99 / 0.75, + "deepseek-r1-distill-llama-70b": 0.99 / 0.75, +} + // AudioRatio represents the price ratio between audio tokens and text tokens var AudioRatio = map[string]float64{ "gpt-4o-audio-preview": 16, @@ -721,20 +768,6 @@ func GetAudioPromptTokensPerSecond(actualModelName string) float64 { return v } -var CompletionRatio = map[string]float64{ - // aws llama3 - "llama3-8b-8192(33)": 0.0006 / 0.0003, - "llama3-70b-8192(33)": 0.0035 / 0.00265, - // whisper - "whisper-1": 0, // only count input tokens - // deepseek - "deepseek-chat": 1.1 / 0.27, - "deepseek-reasoner": 2.19 / 0.55, - // openrouter - "deepseek/deepseek-chat": 1, - "deepseek/deepseek-r1": 1, -} - var ( DefaultModelRatio map[string]float64 DefaultCompletionRatio map[string]float64