From a5c517c27a831ba5553802125fa6350486502216 Mon Sep 17 00:00:00 2001 From: longkeyy Date: Sun, 16 Feb 2025 18:01:24 +0800 Subject: [PATCH] feat: update ali models and price 20250213 (#2086) --- relay/adaptor/ali/constants.go | 4 + relay/billing/ratio/model.go | 184 ++++++++++++++++++--------------- 2 files changed, 103 insertions(+), 85 deletions(-) diff --git a/relay/adaptor/ali/constants.go b/relay/adaptor/ali/constants.go index f3d99520..7c253250 100644 --- a/relay/adaptor/ali/constants.go +++ b/relay/adaptor/ali/constants.go @@ -14,10 +14,14 @@ var ModelList = []string{ "qwen2-72b-instruct", "qwen2-57b-a14b-instruct", "qwen2-7b-instruct", "qwen2-1.5b-instruct", "qwen2-0.5b-instruct", "qwen1.5-110b-chat", "qwen1.5-72b-chat", "qwen1.5-32b-chat", "qwen1.5-14b-chat", "qwen1.5-7b-chat", "qwen1.5-1.8b-chat", "qwen1.5-0.5b-chat", "qwen-72b-chat", "qwen-14b-chat", "qwen-7b-chat", "qwen-1.8b-chat", "qwen-1.8b-longcontext-chat", + "qvq-72b-preview", + "qwen2.5-vl-72b-instruct", "qwen2.5-vl-7b-instruct", "qwen2.5-vl-2b-instruct", "qwen2.5-vl-1b-instruct", "qwen2.5-vl-0.5b-instruct", "qwen2-vl-7b-instruct", "qwen2-vl-2b-instruct", "qwen-vl-v1", "qwen-vl-chat-v1", "qwen2-audio-instruct", "qwen-audio-chat", "qwen2.5-math-72b-instruct", "qwen2.5-math-7b-instruct", "qwen2.5-math-1.5b-instruct", "qwen2-math-72b-instruct", "qwen2-math-7b-instruct", "qwen2-math-1.5b-instruct", "qwen2.5-coder-32b-instruct", "qwen2.5-coder-14b-instruct", "qwen2.5-coder-7b-instruct", "qwen2.5-coder-3b-instruct", "qwen2.5-coder-1.5b-instruct", "qwen2.5-coder-0.5b-instruct", "text-embedding-v1", "text-embedding-v3", "text-embedding-v2", "text-embedding-async-v2", "text-embedding-async-v1", "ali-stable-diffusion-xl", "ali-stable-diffusion-v1.5", "wanx-v1", + "qwen-mt-plus", "qwen-mt-turbo", + "deepseek-r1", "deepseek-v3", "deepseek-r1-distill-qwen-1.5b", "deepseek-r1-distill-qwen-7b", "deepseek-r1-distill-qwen-14b", "deepseek-r1-distill-qwen-32b", "deepseek-r1-distill-llama-8b", "deepseek-r1-distill-llama-70b", } diff --git a/relay/billing/ratio/model.go b/relay/billing/ratio/model.go index 25e4629d..4a4bbdb0 100644 --- a/relay/billing/ratio/model.go +++ b/relay/billing/ratio/model.go @@ -159,91 +159,105 @@ var ModelRatio = map[string]float64{ "embedding-2": 0.0005 * RMB, "embedding-3": 0.0005 * RMB, // https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing - "qwen-turbo": 1.4286, // ¥0.02 / 1k tokens - "qwen-turbo-latest": 1.4286, - "qwen-plus": 1.4286, - "qwen-plus-latest": 1.4286, - "qwen-max": 1.4286, - "qwen-max-latest": 1.4286, - "qwen-max-longcontext": 1.4286, - "qwen-vl-max": 1.4286, - "qwen-vl-max-latest": 1.4286, - "qwen-vl-plus": 1.4286, - "qwen-vl-plus-latest": 1.4286, - "qwen-vl-ocr": 1.4286, - "qwen-vl-ocr-latest": 1.4286, - "qwen-audio-turbo": 1.4286, - "qwen-math-plus": 1.4286, - "qwen-math-plus-latest": 1.4286, - "qwen-math-turbo": 1.4286, - "qwen-math-turbo-latest": 1.4286, - "qwen-coder-plus": 1.4286, - "qwen-coder-plus-latest": 1.4286, - "qwen-coder-turbo": 1.4286, - "qwen-coder-turbo-latest": 1.4286, - "qwq-32b-preview": 1.4286, - "qwen2.5-72b-instruct": 1.4286, - "qwen2.5-32b-instruct": 1.4286, - "qwen2.5-14b-instruct": 1.4286, - "qwen2.5-7b-instruct": 1.4286, - "qwen2.5-3b-instruct": 1.4286, - "qwen2.5-1.5b-instruct": 1.4286, - "qwen2.5-0.5b-instruct": 1.4286, - "qwen2-72b-instruct": 1.4286, - "qwen2-57b-a14b-instruct": 1.4286, - "qwen2-7b-instruct": 1.4286, - "qwen2-1.5b-instruct": 1.4286, - "qwen2-0.5b-instruct": 1.4286, - "qwen1.5-110b-chat": 1.4286, - "qwen1.5-72b-chat": 1.4286, - "qwen1.5-32b-chat": 1.4286, - "qwen1.5-14b-chat": 1.4286, - "qwen1.5-7b-chat": 1.4286, - "qwen1.5-1.8b-chat": 1.4286, - "qwen1.5-0.5b-chat": 1.4286, - "qwen-72b-chat": 1.4286, - "qwen-14b-chat": 1.4286, - "qwen-7b-chat": 1.4286, - "qwen-1.8b-chat": 1.4286, - "qwen-1.8b-longcontext-chat": 1.4286, - "qwen2-vl-7b-instruct": 1.4286, - "qwen2-vl-2b-instruct": 1.4286, - "qwen-vl-v1": 1.4286, - "qwen-vl-chat-v1": 1.4286, - "qwen2-audio-instruct": 1.4286, - "qwen-audio-chat": 1.4286, - "qwen2.5-math-72b-instruct": 1.4286, - "qwen2.5-math-7b-instruct": 1.4286, - "qwen2.5-math-1.5b-instruct": 1.4286, - "qwen2-math-72b-instruct": 1.4286, - "qwen2-math-7b-instruct": 1.4286, - "qwen2-math-1.5b-instruct": 1.4286, - "qwen2.5-coder-32b-instruct": 1.4286, - "qwen2.5-coder-14b-instruct": 1.4286, - "qwen2.5-coder-7b-instruct": 1.4286, - "qwen2.5-coder-3b-instruct": 1.4286, - "qwen2.5-coder-1.5b-instruct": 1.4286, - "qwen2.5-coder-0.5b-instruct": 1.4286, - "text-embedding-v1": 0.05, // ¥0.0007 / 1k tokens - "text-embedding-v3": 0.05, - "text-embedding-v2": 0.05, - "text-embedding-async-v2": 0.05, - "text-embedding-async-v1": 0.05, - "ali-stable-diffusion-xl": 8.00, - "ali-stable-diffusion-v1.5": 8.00, - "wanx-v1": 8.00, - "SparkDesk": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.1-128K": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.5-32K": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v4.0": 1.2858, // ¥0.018 / 1k tokens - "360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens - "embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens - "embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens - "semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens + "qwen-turbo": 0.0003 * RMB, + "qwen-turbo-latest": 0.0003 * RMB, + "qwen-plus": 0.0008 * RMB, + "qwen-plus-latest": 0.0008 * RMB, + "qwen-max": 0.0024 * RMB, + "qwen-max-latest": 0.0024 * RMB, + "qwen-max-longcontext": 0.0005 * RMB, + "qwen-vl-max": 0.003 * RMB, + "qwen-vl-max-latest": 0.003 * RMB, + "qwen-vl-plus": 0.0015 * RMB, + "qwen-vl-plus-latest": 0.0015 * RMB, + "qwen-vl-ocr": 0.005 * RMB, + "qwen-vl-ocr-latest": 0.005 * RMB, + "qwen-audio-turbo": 1.4286, + "qwen-math-plus": 0.004 * RMB, + "qwen-math-plus-latest": 0.004 * RMB, + "qwen-math-turbo": 0.002 * RMB, + "qwen-math-turbo-latest": 0.002 * RMB, + "qwen-coder-plus": 0.0035 * RMB, + "qwen-coder-plus-latest": 0.0035 * RMB, + "qwen-coder-turbo": 0.002 * RMB, + "qwen-coder-turbo-latest": 0.002 * RMB, + "qwen-mt-plus": 0.015 * RMB, + "qwen-mt-turbo": 0.001 * RMB, + "qwq-32b-preview": 0.002 * RMB, + "qwen2.5-72b-instruct": 0.004 * RMB, + "qwen2.5-32b-instruct": 0.03 * RMB, + "qwen2.5-14b-instruct": 0.001 * RMB, + "qwen2.5-7b-instruct": 0.0005 * RMB, + "qwen2.5-3b-instruct": 0.006 * RMB, + "qwen2.5-1.5b-instruct": 0.0003 * RMB, + "qwen2.5-0.5b-instruct": 0.0003 * RMB, + "qwen2-72b-instruct": 0.004 * RMB, + "qwen2-57b-a14b-instruct": 0.0035 * RMB, + "qwen2-7b-instruct": 0.001 * RMB, + "qwen2-1.5b-instruct": 0.001 * RMB, + "qwen2-0.5b-instruct": 0.001 * RMB, + "qwen1.5-110b-chat": 0.007 * RMB, + "qwen1.5-72b-chat": 0.005 * RMB, + "qwen1.5-32b-chat": 0.0035 * RMB, + "qwen1.5-14b-chat": 0.002 * RMB, + "qwen1.5-7b-chat": 0.001 * RMB, + "qwen1.5-1.8b-chat": 0.001 * RMB, + "qwen1.5-0.5b-chat": 0.001 * RMB, + "qwen-72b-chat": 0.02 * RMB, + "qwen-14b-chat": 0.008 * RMB, + "qwen-7b-chat": 0.006 * RMB, + "qwen-1.8b-chat": 0.006 * RMB, + "qwen-1.8b-longcontext-chat": 0.006 * RMB, + "qvq-72b-preview": 0.012 * RMB, + "qwen2.5-vl-72b-instruct": 0.016 * RMB, + "qwen2.5-vl-7b-instruct": 0.002 * RMB, + "qwen2.5-vl-3b-instruct": 0.0012 * RMB, + "qwen2-vl-7b-instruct": 0.016 * RMB, + "qwen2-vl-2b-instruct": 0.002 * RMB, + "qwen-vl-v1": 0.002 * RMB, + "qwen-vl-chat-v1": 0.002 * RMB, + "qwen2-audio-instruct": 0.002 * RMB, + "qwen-audio-chat": 0.002 * RMB, + "qwen2.5-math-72b-instruct": 0.004 * RMB, + "qwen2.5-math-7b-instruct": 0.001 * RMB, + "qwen2.5-math-1.5b-instruct": 0.001 * RMB, + "qwen2-math-72b-instruct": 0.004 * RMB, + "qwen2-math-7b-instruct": 0.001 * RMB, + "qwen2-math-1.5b-instruct": 0.001 * RMB, + "qwen2.5-coder-32b-instruct": 0.002 * RMB, + "qwen2.5-coder-14b-instruct": 0.002 * RMB, + "qwen2.5-coder-7b-instruct": 0.001 * RMB, + "qwen2.5-coder-3b-instruct": 0.001 * RMB, + "qwen2.5-coder-1.5b-instruct": 0.001 * RMB, + "qwen2.5-coder-0.5b-instruct": 0.001 * RMB, + "text-embedding-v1": 0.0007 * RMB, // ¥0.0007 / 1k tokens + "text-embedding-v3": 0.0007 * RMB, + "text-embedding-v2": 0.0007 * RMB, + "text-embedding-async-v2": 0.0007 * RMB, + "text-embedding-async-v1": 0.0007 * RMB, + "ali-stable-diffusion-xl": 8.00, + "ali-stable-diffusion-v1.5": 8.00, + "wanx-v1": 8.00, + "deepseek-r1": 0.002 * RMB, + "deepseek-v3": 0.001 * RMB, + "deepseek-r1-distill-qwen-1.5b": 0.001 * RMB, + "deepseek-r1-distill-qwen-7b": 0.0005 * RMB, + "deepseek-r1-distill-qwen-14b": 0.001 * RMB, + "deepseek-r1-distill-qwen-32b": 0.002 * RMB, + "deepseek-r1-distill-llama-8b": 0.0005 * RMB, + "deepseek-r1-distill-llama-70b": 0.004 * RMB, + "SparkDesk": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.1-128K": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.5-32K": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v4.0": 1.2858, // ¥0.018 / 1k tokens + "360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens + "embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens + "embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens + "semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0 "hunyuan-turbo": 0.015 * RMB, "hunyuan-large": 0.004 * RMB,