From 0ad609ade6dad16bb9ec45421083c73b64ede7d1 Mon Sep 17 00:00:00 2001 From: WqyJh <781345688@qq.com> Date: Tue, 14 Jan 2025 14:38:26 +0800 Subject: [PATCH 1/6] refactor: add GetRatio to Adaptor --- controller/model.go | 4 +- model/option.go | 14 +- relay/adaptor/ai360/constants.go | 12 +- relay/adaptor/aiproxy/adaptor.go | 16 +- relay/adaptor/aiproxy/constants.go | 6 +- relay/adaptor/ali/adaptor.go | 12 +- relay/adaptor/ali/constants.go | 152 +++++++++++++++--- relay/adaptor/ali/main.go | 11 +- relay/adaptor/anthropic/adaptor.go | 7 +- relay/adaptor/anthropic/constants.go | 23 +-- relay/adaptor/aws/adaptor.go | 24 ++- relay/adaptor/aws/claude/adapter.go | 6 + relay/adaptor/aws/claude/main.go | 2 + relay/adaptor/aws/llama3/adapter.go | 6 + relay/adaptor/aws/llama3/main.go | 6 + relay/adaptor/aws/utils/adaptor.go | 2 + relay/adaptor/baichuan/constants.go | 11 +- relay/adaptor/baidu/adaptor.go | 11 +- relay/adaptor/baidu/constants.go | 52 ++++-- relay/adaptor/cloudflare/adaptor.go | 5 + relay/adaptor/cohere/adaptor.go | 7 +- relay/adaptor/cohere/constant.go | 25 +-- relay/adaptor/common.go | 22 +++ relay/adaptor/coze/adaptor.go | 12 +- relay/adaptor/coze/constants.go | 4 +- relay/adaptor/deepl/adaptor.go | 16 +- relay/adaptor/deepl/constants.go | 11 +- relay/adaptor/deepseek/constants.go | 8 +- relay/adaptor/doubao/constants.go | 20 +-- relay/adaptor/gemini/adaptor.go | 7 +- relay/adaptor/gemini/constants.go | 54 ++++++- relay/adaptor/groq/constants.go | 37 ++--- relay/adaptor/interface.go | 2 + relay/adaptor/lingyiwanwu/constants.go | 10 +- relay/adaptor/minimax/constants.go | 18 ++- relay/adaptor/mistral/constants.go | 24 ++- relay/adaptor/moonshot/constants.go | 11 +- relay/adaptor/novita/constants.go | 47 ++++-- relay/adaptor/ollama/adaptor.go | 12 +- relay/adaptor/ollama/constants.go | 18 ++- relay/adaptor/openai/adaptor.go | 10 +- relay/adaptor/openai/compatible.go | 35 ++-- relay/adaptor/openai/constants.go | 82 +++++++--- relay/adaptor/palm/adaptor.go | 12 +- relay/adaptor/palm/constants.go | 6 +- relay/adaptor/proxy/adaptor.go | 5 + relay/adaptor/replicate/adaptor.go | 10 +- relay/adaptor/replicate/constant.go | 99 ++++++------ relay/adaptor/siliconflow/constants.go | 68 ++++---- relay/adaptor/stepfun/constants.go | 24 +-- relay/adaptor/tencent/adaptor.go | 20 ++- relay/adaptor/tencent/constants.go | 22 ++- relay/adaptor/togetherai/constants.go | 14 +- relay/adaptor/vertexai/adaptor.go | 26 ++- relay/adaptor/vertexai/claude/adapter.go | 22 ++- relay/adaptor/vertexai/gemini/adapter.go | 16 +- relay/adaptor/vertexai/registry.go | 9 +- relay/adaptor/xai/constants.go | 6 +- relay/adaptor/xunfei/adaptor.go | 18 ++- relay/adaptor/xunfei/constants.go | 20 +-- relay/adaptor/zhipu/adaptor.go | 14 +- relay/adaptor/zhipu/constants.go | 25 ++- relay/adaptor/zhipu/main.go | 12 +- relay/billing/billing.go | 15 +- relay/billing/ratio/model.go | 52 +++++- relay/controller/audio.go | 14 +- relay/controller/helper.go | 38 ++++- relay/controller/image.go | 7 +- relay/controller/text.go | 22 +-- .../src/components/OperationSetting.js | 35 +++- 70 files changed, 1038 insertions(+), 467 deletions(-) diff --git a/controller/model.go b/controller/model.go index dcbe709e..b9bf6281 100644 --- a/controller/model.go +++ b/controller/model.go @@ -6,6 +6,7 @@ import ( "github.com/songquanpeng/one-api/common/ctxkey" "github.com/songquanpeng/one-api/model" relay "github.com/songquanpeng/one-api/relay" + "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/openai" "github.com/songquanpeng/one-api/relay/apitype" "github.com/songquanpeng/one-api/relay/channeltype" @@ -86,7 +87,8 @@ func init() { if channelType == channeltype.Azure { continue } - channelName, channelModelList := openai.GetCompatibleChannelMeta(channelType) + channelName, channelRatioMap := openai.GetCompatibleChannelMeta(channelType) + channelModelList := adaptor.GetModelListHelper(channelRatioMap) for _, modelName := range channelModelList { models = append(models, OpenAIModels{ Id: modelName, diff --git a/model/option.go b/model/option.go index 8fd30aee..c749b231 100644 --- a/model/option.go +++ b/model/option.go @@ -1,12 +1,13 @@ package model import ( - "github.com/songquanpeng/one-api/common/config" - "github.com/songquanpeng/one-api/common/logger" - billingratio "github.com/songquanpeng/one-api/relay/billing/ratio" "strconv" "strings" "time" + + "github.com/songquanpeng/one-api/common/config" + "github.com/songquanpeng/one-api/common/logger" + billingratio "github.com/songquanpeng/one-api/relay/billing/ratio" ) type Option struct { @@ -70,6 +71,7 @@ func InitOptionMap() { config.OptionMap["ModelRatio"] = billingratio.ModelRatio2JSONString() config.OptionMap["GroupRatio"] = billingratio.GroupRatio2JSONString() config.OptionMap["CompletionRatio"] = billingratio.CompletionRatio2JSONString() + config.OptionMap["Ratio"] = billingratio.Ratio2JSONString() config.OptionMap["TopUpLink"] = config.TopUpLink config.OptionMap["ChatLink"] = config.ChatLink config.OptionMap["QuotaPerUnit"] = strconv.FormatFloat(config.QuotaPerUnit, 'f', -1, 64) @@ -223,12 +225,14 @@ func updateOptionMap(key string, value string) (err error) { config.PreConsumedQuota, _ = strconv.ParseInt(value, 10, 64) case "RetryTimes": config.RetryTimes, _ = strconv.Atoi(value) - case "ModelRatio": + case "ModelRatio": // Deprecated err = billingratio.UpdateModelRatioByJSONString(value) case "GroupRatio": err = billingratio.UpdateGroupRatioByJSONString(value) - case "CompletionRatio": + case "CompletionRatio": // Deprecated err = billingratio.UpdateCompletionRatioByJSONString(value) + case "Ratio": + err = billingratio.UpdateRatioByJSONString(value) case "TopUpLink": config.TopUpLink = value case "ChatLink": diff --git a/relay/adaptor/ai360/constants.go b/relay/adaptor/ai360/constants.go index cfc3cb28..dd3e6e5e 100644 --- a/relay/adaptor/ai360/constants.go +++ b/relay/adaptor/ai360/constants.go @@ -1,8 +1,10 @@ package ai360 -var ModelList = []string{ - "360GPT_S2_V9", - "embedding-bert-512-v1", - "embedding_s1_v1", - "semantic_similarity_s1_v1", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +var RatioMap = map[string]ratio.Ratio{ + "360GPT_S2_V9": {Input: 0.012 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "embedding-bert-512-v1": {Input: 0.0001 * ratio.RMB, Output: 0}, + "embedding_s1_v1": {Input: 0.0001 * ratio.RMB, Output: 0}, + "semantic_similarity_s1_v1": {Input: 0.0001 * ratio.RMB, Output: 0}, } diff --git a/relay/adaptor/aiproxy/adaptor.go b/relay/adaptor/aiproxy/adaptor.go index 42d49c0a..30317f3a 100644 --- a/relay/adaptor/aiproxy/adaptor.go +++ b/relay/adaptor/aiproxy/adaptor.go @@ -3,12 +3,14 @@ package aiproxy import ( "errors" "fmt" - "github.com/gin-gonic/gin" - "github.com/songquanpeng/one-api/relay/adaptor" - "github.com/songquanpeng/one-api/relay/meta" - "github.com/songquanpeng/one-api/relay/model" "io" "net/http" + + "github.com/gin-gonic/gin" + "github.com/songquanpeng/one-api/relay/adaptor" + "github.com/songquanpeng/one-api/relay/billing/ratio" + "github.com/songquanpeng/one-api/relay/meta" + "github.com/songquanpeng/one-api/relay/model" ) type Adaptor struct { @@ -58,8 +60,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met return } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} + func (a *Adaptor) GetModelList() []string { - return ModelList + return adaptor.GetModelListHelper(RatioMap) } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/aiproxy/constants.go b/relay/adaptor/aiproxy/constants.go index 818d2709..aa684c90 100644 --- a/relay/adaptor/aiproxy/constants.go +++ b/relay/adaptor/aiproxy/constants.go @@ -2,8 +2,4 @@ package aiproxy import "github.com/songquanpeng/one-api/relay/adaptor/openai" -var ModelList = []string{""} - -func init() { - ModelList = openai.ModelList -} +var RatioMap = openai.RatioMap diff --git a/relay/adaptor/ali/adaptor.go b/relay/adaptor/ali/adaptor.go index 4aa8a11a..f8ee1d78 100644 --- a/relay/adaptor/ali/adaptor.go +++ b/relay/adaptor/ali/adaptor.go @@ -3,13 +3,15 @@ package ali import ( "errors" "fmt" + "io" + "net/http" + "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/relay/adaptor" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" "github.com/songquanpeng/one-api/relay/relaymode" - "io" - "net/http" ) // https://help.aliyun.com/zh/dashscope/developer-reference/api-details @@ -96,8 +98,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met return } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} + func (a *Adaptor) GetModelList() []string { - return ModelList + return adaptor.GetModelListHelper(RatioMap) } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/ali/constants.go b/relay/adaptor/ali/constants.go index f3d99520..ab4be887 100644 --- a/relay/adaptor/ali/constants.go +++ b/relay/adaptor/ali/constants.go @@ -1,23 +1,135 @@ package ali -var ModelList = []string{ - "qwen-turbo", "qwen-turbo-latest", - "qwen-plus", "qwen-plus-latest", - "qwen-max", "qwen-max-latest", - "qwen-max-longcontext", - "qwen-vl-max", "qwen-vl-max-latest", "qwen-vl-plus", "qwen-vl-plus-latest", - "qwen-vl-ocr", "qwen-vl-ocr-latest", - "qwen-audio-turbo", - "qwen-math-plus", "qwen-math-plus-latest", "qwen-math-turbo", "qwen-math-turbo-latest", - "qwen-coder-plus", "qwen-coder-plus-latest", "qwen-coder-turbo", "qwen-coder-turbo-latest", - "qwq-32b-preview", "qwen2.5-72b-instruct", "qwen2.5-32b-instruct", "qwen2.5-14b-instruct", "qwen2.5-7b-instruct", "qwen2.5-3b-instruct", "qwen2.5-1.5b-instruct", "qwen2.5-0.5b-instruct", - "qwen2-72b-instruct", "qwen2-57b-a14b-instruct", "qwen2-7b-instruct", "qwen2-1.5b-instruct", "qwen2-0.5b-instruct", - "qwen1.5-110b-chat", "qwen1.5-72b-chat", "qwen1.5-32b-chat", "qwen1.5-14b-chat", "qwen1.5-7b-chat", "qwen1.5-1.8b-chat", "qwen1.5-0.5b-chat", - "qwen-72b-chat", "qwen-14b-chat", "qwen-7b-chat", "qwen-1.8b-chat", "qwen-1.8b-longcontext-chat", - "qwen2-vl-7b-instruct", "qwen2-vl-2b-instruct", "qwen-vl-v1", "qwen-vl-chat-v1", - "qwen2-audio-instruct", "qwen-audio-chat", - "qwen2.5-math-72b-instruct", "qwen2.5-math-7b-instruct", "qwen2.5-math-1.5b-instruct", "qwen2-math-72b-instruct", "qwen2-math-7b-instruct", "qwen2-math-1.5b-instruct", - "qwen2.5-coder-32b-instruct", "qwen2.5-coder-14b-instruct", "qwen2.5-coder-7b-instruct", "qwen2.5-coder-3b-instruct", "qwen2.5-coder-1.5b-instruct", "qwen2.5-coder-0.5b-instruct", - "text-embedding-v1", "text-embedding-v3", "text-embedding-v2", "text-embedding-async-v2", "text-embedding-async-v1", - "ali-stable-diffusion-xl", "ali-stable-diffusion-v1.5", "wanx-v1", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +// https://help.aliyun.com/zh/model-studio/getting-started/models +// https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing +var RatioMap = map[string]ratio.Ratio{ + "qwen-long": {Input: 0.0005 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "qwen-turbo": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB}, + "qwen-turbo-latest": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB}, + "qwen-turbo-2024-09-19": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB}, + "qwen-turbo-0919": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB}, + "qwen-turbo-2024-06-24": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB}, + "qwen-turbo-0624": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB}, + "qwen-turbo-2024-02-06": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB}, + "qwen-turbo-0206": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB}, + "qwen-plus": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "qwen-plus-latest": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "qwen-plus-2024-09-19": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "qwen-plus-0919": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "qwen-plus-2024-08-06": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-plus-0806": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-plus-2024-07-23": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-plus-0723": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-plus-2024-06-24": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-plus-0624": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-plus-2024-02-06": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-plus-0206": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-max": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB}, + "qwen-max-latest": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB}, + "qwen-max-2024-09-19": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB}, + "qwen-max-0919": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB}, + "qwen-max-2024-04-28": {Input: 0.04 * ratio.RMB, Output: 0.12 * ratio.RMB}, + "qwen-max-0428": {Input: 0.04 * ratio.RMB, Output: 0.12 * ratio.RMB}, + "qwen-max-2024-04-03": {Input: 0.04 * ratio.RMB, Output: 0.12 * ratio.RMB}, + "qwen-max-0403": {Input: 0.04 * ratio.RMB, Output: 0.12 * ratio.RMB}, + "qwen-max-2024-01-07": {Input: 0.04 * ratio.RMB, Output: 0.12 * ratio.RMB}, + "qwen-max-0107": {Input: 0.04 * ratio.RMB, Output: 0.12 * ratio.RMB}, + "qwen-vl-max": {Input: 0.003 * ratio.RMB, Output: 0.009 * ratio.RMB}, + "qwen-vl-max-latest": {Input: 0.003 * ratio.RMB, Output: 0.009 * ratio.RMB}, + "qwen-vl-max-2024-12-30": {Input: 0.003 * ratio.RMB, Output: 0.009 * ratio.RMB}, + "qwen-vl-max-1230": {Input: 0.003 * ratio.RMB, Output: 0.009 * ratio.RMB}, + "qwen-vl-max-2024-11-19": {Input: 0.003 * ratio.RMB, Output: 0.009 * ratio.RMB}, + "qwen-vl-max-1119": {Input: 0.003 * ratio.RMB, Output: 0.009 * ratio.RMB}, + "qwen-vl-max-2024-10-30": {Input: 0.02 * ratio.RMB, Output: 0.02 * ratio.RMB}, + "qwen-vl-max-1030": {Input: 0.02 * ratio.RMB, Output: 0.02 * ratio.RMB}, + "qwen-vl-max-2024-08-09": {Input: 0.02 * ratio.RMB, Output: 0.02 * ratio.RMB}, + "qwen-vl-max-0809": {Input: 0.02 * ratio.RMB, Output: 0.02 * ratio.RMB}, + "qwen-vl-max-2024-02-01": {Input: 0.02 * ratio.RMB, Output: 0.02 * ratio.RMB}, + "qwen-vl-max-0201": {Input: 0.02 * ratio.RMB, Output: 0.02 * ratio.RMB}, + "qwen-vl-plus": {Input: 0.0015 * ratio.RMB, Output: 0.0045 * ratio.RMB}, + "qwen-vl-plus-latest": {Input: 0.0015 * ratio.RMB, Output: 0.0045 * ratio.RMB}, + "qwen-vl-plus-2024-08-09": {Input: 0.0015 * ratio.RMB, Output: 0.0045 * ratio.RMB}, + "qwen-vl-plus-0809": {Input: 0.0015 * ratio.RMB, Output: 0.0045 * ratio.RMB}, + "qwen-vl-plus-2023-12-01": {Input: 0.008 * ratio.RMB, Output: 0.008 * ratio.RMB}, + "qwen-vl-ocr": {Input: 0.005 * ratio.RMB, Output: 0.005 * ratio.RMB}, + "qwen-vl-ocr-latest": {Input: 0.005 * ratio.RMB, Output: 0.005 * ratio.RMB}, + "qwen-vl-ocr-2024-10-28": {Input: 0.005 * ratio.RMB, Output: 0.005 * ratio.RMB}, + "qwen-audio-turbo": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。 + "qwen-audio-turbo-latest": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。 + "qwen-audio-turbo-2024-12-04": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。 + "qwen-audio-turbo-1204": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。 + "qwen-audio-turbo-2024-08-07": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。 + "qwen-audio-turbo-0807": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。 + "qwen-math-plus": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-math-plus-latest": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-math-plus-2024-09-19": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-math-plus-0919": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-math-plus-2024-08-16": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-math-plus-0816": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-math-turbo": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-math-turbo-latest": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-math-turbo-2024-09-19": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-math-turbo-0919": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen-coder-plus": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB}, + "qwen-coder-plus-latest": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB}, + "qwen-coder-plus-2024-11-06": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB}, + "qwen-coder-plus-1106": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB}, + "qwen-coder-turbo": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB}, + "qwen-coder-turbo-latest": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB}, + "qwen-coder-turbo-2024-09-19": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB}, + "qwen-coder-turbo-0919": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB}, + "qwq-32b-preview": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB}, + "qwen2.5-72b-instruct": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen2.5-32b-instruct": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB}, + "qwen2.5-14b-instruct": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB}, + "qwen2.5-7b-instruct": {Input: 0.001 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "qwen2.5-3b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费 + "qwen2.5-1.5b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费 + "qwen2.5-0.5b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费 + "qwen2-72b-instruct": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen2-57b-a14b-instruct": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB}, + "qwen2-7b-instruct": {Input: 0.001 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "qwen2-1.5b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费 + "qwen2-0.5b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费 + "qwen1.5-110b-chat": {Input: 0.007 * ratio.RMB, Output: 0.014 * ratio.RMB}, + "qwen1.5-72b-chat": {Input: 0.005 * ratio.RMB, Output: 0.01 * ratio.RMB}, + "qwen1.5-32b-chat": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB}, + "qwen1.5-14b-chat": {Input: 0.002 * ratio.RMB, Output: 0.004 * ratio.RMB}, + "qwen1.5-7b-chat": {Input: 0.001 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "qwen1.5-1.8b-chat": {Input: 0.1, Output: 0.1}, // 限时免费 + "qwen1.5-0.5b-chat": {Input: 0.1, Output: 0.1}, // 限时免费 + "qwen-72b-chat": {Input: 0.02 * ratio.RMB, Output: 0.02 * ratio.RMB}, + "qwen-14b-chat": {Input: 0.008 * ratio.RMB, Output: 0.008 * ratio.RMB}, + "qwen-7b-chat": {Input: 0.006 * ratio.RMB, Output: 0.006 * ratio.RMB}, + "qwen-1.8b-chat": {Input: 0.1, Output: 0.1}, // 限时免费 + "qwen-1.8b-longcontext-chat": {Input: 0.1, Output: 0.1}, // 限时免费(需申请) + "qwen2-vl-72b-instruct": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。 + "qwen2-vl-7b-instruct": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。 + "qwen2-vl-2b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费 + "qwen-vl-v1": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。 + "qwen-vl-chat-v1": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。 + "qwen2-audio-instruct": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。 + "qwen-audio-chat": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。 + "qwen2.5-math-72b-instruct": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen2.5-math-7b-instruct": {Input: 0.001 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "qwen2.5-math-1.5b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费 + "qwen2-math-72b-instruct": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "qwen2-math-7b-instruct": {Input: 0.001 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "qwen2-math-1.5b-instruct": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。 + "qwen2.5-coder-32b-instruct": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB}, + "qwen2.5-coder-14b-instruct": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB}, + "qwen2.5-coder-7b-instruct": {Input: 0.001 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "qwen2.5-coder-3b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费 + "qwen2.5-coder-1.5b-instruct": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。 + "qwen2.5-coder-0.5b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费 + "text-embedding-v3": {Input: 0.0007 * ratio.RMB, Output: 0}, + "text-embedding-v2": {Input: 0.0007 * ratio.RMB, Output: 0}, + "text-embedding-v1": {Input: 0.0007 * ratio.RMB, Output: 0}, + "text-embedding-async-v2": {Input: 0.0007 * ratio.RMB, Output: 0}, + "text-embedding-async-v1": {Input: 0.0007 * ratio.RMB, Output: 0}, + "ali-stable-diffusion-xl": {Input: 8.00 * ratio.RMB, Output: 0}, + "ali-stable-diffusion-v1.5": {Input: 8.00 * ratio.RMB, Output: 0}, + "wanx-v1": {Input: 8.00 * ratio.RMB, Output: 0}, } diff --git a/relay/adaptor/ali/main.go b/relay/adaptor/ali/main.go index 6a73c707..c67c150a 100644 --- a/relay/adaptor/ali/main.go +++ b/relay/adaptor/ali/main.go @@ -3,12 +3,13 @@ package ali import ( "bufio" "encoding/json" - "github.com/songquanpeng/one-api/common/ctxkey" - "github.com/songquanpeng/one-api/common/render" "io" "net/http" "strings" + "github.com/songquanpeng/one-api/common/ctxkey" + "github.com/songquanpeng/one-api/common/render" + "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/common" "github.com/songquanpeng/one-api/common/helper" @@ -119,7 +120,11 @@ func embeddingResponseAli2OpenAI(response *EmbeddingResponse) *openai.EmbeddingR Object: "list", Data: make([]openai.EmbeddingResponseItem, 0, len(response.Output.Embeddings)), Model: "text-embedding-v1", - Usage: model.Usage{TotalTokens: response.Usage.TotalTokens}, + Usage: model.Usage{ + PromptTokens: response.Usage.InputTokens, + CompletionTokens: response.Usage.OutputTokens, + TotalTokens: response.Usage.TotalTokens, + }, } for _, item := range response.Output.Embeddings { diff --git a/relay/adaptor/anthropic/adaptor.go b/relay/adaptor/anthropic/adaptor.go index bd0949be..6cef3571 100644 --- a/relay/adaptor/anthropic/adaptor.go +++ b/relay/adaptor/anthropic/adaptor.go @@ -9,6 +9,7 @@ import ( "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/relay/adaptor" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" ) @@ -70,8 +71,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met return } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} + func (a *Adaptor) GetModelList() []string { - return ModelList + return adaptor.GetModelListHelper(RatioMap) } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/anthropic/constants.go b/relay/adaptor/anthropic/constants.go index 8ea7c4d8..efaab72b 100644 --- a/relay/adaptor/anthropic/constants.go +++ b/relay/adaptor/anthropic/constants.go @@ -1,12 +1,17 @@ package anthropic -var ModelList = []string{ - "claude-instant-1.2", "claude-2.0", "claude-2.1", - "claude-3-haiku-20240307", - "claude-3-5-haiku-20241022", - "claude-3-sonnet-20240229", - "claude-3-opus-20240229", - "claude-3-5-sonnet-20240620", - "claude-3-5-sonnet-20241022", - "claude-3-5-sonnet-latest", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +// https://www.anthropic.com/api#pricing +var RatioMap = map[string]ratio.Ratio{ + "claude-instant-1.2": {Input: 0.8 * ratio.MILLI_USD, Output: 2.4 * ratio.MILLI_USD}, + "claude-2.0": {Input: 8.0 * ratio.MILLI_USD, Output: 24 * ratio.MILLI_USD}, + "claude-2.1": {Input: 8.0 * ratio.MILLI_USD, Output: 24 * ratio.MILLI_USD}, + "claude-3-haiku-20240307": {Input: 0.25 * ratio.MILLI_USD, Output: 1.25 * ratio.MILLI_USD}, + "claude-3-5-haiku-20241022": {Input: 0.8 * ratio.MILLI_USD, Output: 4 * ratio.MILLI_USD}, + "claude-3-sonnet-20240229": {Input: 3 * ratio.MILLI_USD, Output: 15 * ratio.MILLI_USD}, + "claude-3-opus-20240229": {Input: 15 * ratio.MILLI_USD, Output: 75 * ratio.MILLI_USD}, + "claude-3-5-sonnet-20240620": {Input: 3 * ratio.MILLI_USD, Output: 15 * ratio.MILLI_USD}, + "claude-3-5-sonnet-20241022": {Input: 3 * ratio.MILLI_USD, Output: 15 * ratio.MILLI_USD}, + "claude-3-5-sonnet-latest": {Input: 3 * ratio.MILLI_USD, Output: 15 * ratio.MILLI_USD}, } diff --git a/relay/adaptor/aws/adaptor.go b/relay/adaptor/aws/adaptor.go index 62221346..125e0f3f 100644 --- a/relay/adaptor/aws/adaptor.go +++ b/relay/adaptor/aws/adaptor.go @@ -10,7 +10,7 @@ import ( "github.com/aws/aws-sdk-go-v2/service/bedrockruntime" "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/relay/adaptor" - "github.com/songquanpeng/one-api/relay/adaptor/aws/utils" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" ) @@ -18,8 +18,6 @@ import ( var _ adaptor.Adaptor = new(Adaptor) type Adaptor struct { - awsAdapter utils.AwsAdapter - Meta *meta.Meta AwsClient *bedrockruntime.Client } @@ -42,15 +40,27 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G return nil, errors.New("adaptor not found") } - a.awsAdapter = adaptor return adaptor.ConvertRequest(c, relayMode, request) } func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) { - if a.awsAdapter == nil { - return nil, utils.WrapErr(errors.New("awsAdapter is nil")) + adaptor := GetAdaptor(meta.ActualModelName) + if adaptor == nil { + return nil, &model.ErrorWithStatusCode{ + StatusCode: http.StatusInternalServerError, + Error: model.Error{Message: "adaptor not found"}, + } } - return a.awsAdapter.DoResponse(c, a.AwsClient, meta) + + return adaptor.DoResponse(c, a.AwsClient, meta) +} + +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + adaptor := GetAdaptor(meta.ActualModelName) + if adaptor == nil { + return nil + } + return adaptor.GetRatio(meta) } func (a *Adaptor) GetModelList() (models []string) { diff --git a/relay/adaptor/aws/claude/adapter.go b/relay/adaptor/aws/claude/adapter.go index eb3c9fb8..1edbecfa 100644 --- a/relay/adaptor/aws/claude/adapter.go +++ b/relay/adaptor/aws/claude/adapter.go @@ -5,8 +5,10 @@ import ( "github.com/gin-gonic/gin" "github.com/pkg/errors" "github.com/songquanpeng/one-api/common/ctxkey" + "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/anthropic" "github.com/songquanpeng/one-api/relay/adaptor/aws/utils" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" ) @@ -35,3 +37,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, awsCli *bedrockruntime.Client, meta } return } + +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, anthropic.RatioMap) +} diff --git a/relay/adaptor/aws/claude/main.go b/relay/adaptor/aws/claude/main.go index 3fe3dfd8..d3b6f059 100644 --- a/relay/adaptor/aws/claude/main.go +++ b/relay/adaptor/aws/claude/main.go @@ -38,6 +38,8 @@ var AwsModelIDMap = map[string]string{ "claude-3-5-haiku-20241022": "anthropic.claude-3-5-haiku-20241022-v1:0", } +var RatioMap = anthropic.RatioMap + func awsModelID(requestModel string) (string, error) { if awsModelID, ok := AwsModelIDMap[requestModel]; ok { return awsModelID, nil diff --git a/relay/adaptor/aws/llama3/adapter.go b/relay/adaptor/aws/llama3/adapter.go index 83edbc9d..a58b5035 100644 --- a/relay/adaptor/aws/llama3/adapter.go +++ b/relay/adaptor/aws/llama3/adapter.go @@ -6,7 +6,9 @@ import ( "github.com/gin-gonic/gin" "github.com/pkg/errors" + "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/aws/utils" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" ) @@ -35,3 +37,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, awsCli *bedrockruntime.Client, meta } return } + +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} diff --git a/relay/adaptor/aws/llama3/main.go b/relay/adaptor/aws/llama3/main.go index e5fcd89f..a9531ddf 100644 --- a/relay/adaptor/aws/llama3/main.go +++ b/relay/adaptor/aws/llama3/main.go @@ -22,6 +22,7 @@ import ( "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/relay/adaptor/aws/utils" "github.com/songquanpeng/one-api/relay/adaptor/openai" + "github.com/songquanpeng/one-api/relay/billing/ratio" relaymodel "github.com/songquanpeng/one-api/relay/model" ) @@ -32,6 +33,11 @@ var AwsModelIDMap = map[string]string{ "llama3-70b-8192": "meta.llama3-70b-instruct-v1:0", } +var RatioMap = map[string]ratio.Ratio{ + "llama3-8b-8192": {Input: 0.3 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD}, + "llama3-70b-8192": {Input: 2.65 * ratio.MILLI_USD, Output: 3.5 * ratio.MILLI_USD}, +} + func awsModelID(requestModel string) (string, error) { if awsModelID, ok := AwsModelIDMap[requestModel]; ok { return awsModelID, nil diff --git a/relay/adaptor/aws/utils/adaptor.go b/relay/adaptor/aws/utils/adaptor.go index 4cb880f2..70072b79 100644 --- a/relay/adaptor/aws/utils/adaptor.go +++ b/relay/adaptor/aws/utils/adaptor.go @@ -9,6 +9,7 @@ import ( "github.com/aws/aws-sdk-go-v2/credentials" "github.com/aws/aws-sdk-go-v2/service/bedrockruntime" "github.com/gin-gonic/gin" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" ) @@ -16,6 +17,7 @@ import ( type AwsAdapter interface { ConvertRequest(c *gin.Context, relayMode int, request *model.GeneralOpenAIRequest) (any, error) DoResponse(c *gin.Context, awsCli *bedrockruntime.Client, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) + GetRatio(meta *meta.Meta) *ratio.Ratio } type Adaptor struct { diff --git a/relay/adaptor/baichuan/constants.go b/relay/adaptor/baichuan/constants.go index cb20a1ff..4ad41d30 100644 --- a/relay/adaptor/baichuan/constants.go +++ b/relay/adaptor/baichuan/constants.go @@ -1,7 +1,10 @@ package baichuan -var ModelList = []string{ - "Baichuan2-Turbo", - "Baichuan2-Turbo-192k", - "Baichuan-Text-Embedding", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +// https://platform.baichuan-ai.com/price +var RatioMap = map[string]ratio.Ratio{ + "Baichuan2-Turbo": {Input: 0.008 * ratio.RMB, Output: 0.008 * ratio.RMB}, + "Baichuan2-Turbo-192k": {Input: 0.016 * ratio.RMB, Output: 0.016 * ratio.RMB}, + "Baichuan-Text-Embedding": {Input: 0.001 * ratio.RMB, Output: 0.001 * ratio.RMB}, } diff --git a/relay/adaptor/baidu/adaptor.go b/relay/adaptor/baidu/adaptor.go index 15306b95..135313bc 100644 --- a/relay/adaptor/baidu/adaptor.go +++ b/relay/adaptor/baidu/adaptor.go @@ -3,15 +3,16 @@ package baidu import ( "errors" "fmt" - "github.com/songquanpeng/one-api/relay/meta" - "github.com/songquanpeng/one-api/relay/relaymode" "io" "net/http" "strings" "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/relay/adaptor" + "github.com/songquanpeng/one-api/relay/billing/ratio" + "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" + "github.com/songquanpeng/one-api/relay/relaymode" ) type Adaptor struct { @@ -120,6 +121,10 @@ func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Read return adaptor.DoRequestHelper(a, c, meta, requestBody) } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} + func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) { if meta.IsStream { err, usage = StreamHandler(c, resp) @@ -135,7 +140,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met } func (a *Adaptor) GetModelList() []string { - return ModelList + return adaptor.GetModelListHelper(RatioMap) } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/baidu/constants.go b/relay/adaptor/baidu/constants.go index f952adc6..38083397 100644 --- a/relay/adaptor/baidu/constants.go +++ b/relay/adaptor/baidu/constants.go @@ -1,20 +1,38 @@ package baidu -var ModelList = []string{ - "ERNIE-4.0-8K", - "ERNIE-3.5-8K", - "ERNIE-3.5-8K-0205", - "ERNIE-3.5-8K-1222", - "ERNIE-Bot-8K", - "ERNIE-3.5-4K-0205", - "ERNIE-Speed-8K", - "ERNIE-Speed-128K", - "ERNIE-Lite-8K-0922", - "ERNIE-Lite-8K-0308", - "ERNIE-Tiny-8K", - "BLOOMZ-7B", - "Embedding-V1", - "bge-large-zh", - "bge-large-en", - "tao-8k", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +// https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7 +var RatioMap = map[string]ratio.Ratio{ + "ERNIE-4.0-Turbo-128K": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB}, + "ERNIE-4.0-Turbo-8K": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB}, + "ERNIE-4.0-Turbo-8K-Preview": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB}, + "ERNIE-4.0-Turbo-8K-0628": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB}, + "ERNIE-4.0-8K": {Input: 0.03 * ratio.RMB, Output: 0.09 * ratio.RMB}, + "ERNIE-4.0-8K-0613": {Input: 0.03 * ratio.RMB, Output: 0.09 * ratio.RMB}, + "ERNIE-4.0-8K-Latest": {Input: 0.03 * ratio.RMB, Output: 0.09 * ratio.RMB}, + "ERNIE-4.0-8K-Preview": {Input: 0.03 * ratio.RMB, Output: 0.09 * ratio.RMB}, + "ERNIE-3.5-128K": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "ERNIE-3.5-8K": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "ERNIE-3.5-8K-0701": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "ERNIE-3.5-8K-Preview": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "ERNIE-3.5-8K-0613": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "ERNIE-Speed-Pro-128K": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB}, + "ERNIE-Novel-8K": {Input: 0.04 * ratio.RMB, Output: 0.12 * ratio.RMB}, + "ERNIE-Speed-128K": {Input: 0.1, Output: 0.1}, // 免费 + "ERNIE-Speed-8K": {Input: 0.1, Output: 0.1}, // 免费 + "ERNIE-Lite-8K": {Input: 0.1, Output: 0.1}, // 免费 + "ERNIE-Tiny-8K": {Input: 0.1, Output: 0.1}, // 免费 + "ERNIE-Functions-8K": {Input: 0.004 * ratio.RMB, Output: 0.008 * ratio.RMB}, + "ERNIE-Character-8K": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB}, + "ERNIE-Character-Fiction-8K": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB}, + "ERNIE-Character-Fiction-8K-Preview ": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB}, + "ERNIE-Lite-Pro-128K": {Input: 0.0002 * ratio.RMB, Output: 0.0004 * ratio.RMB}, + "Qianfan-Agent-Speed-8K": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB}, + "Qianfan-Agent-Lite-8K": {Input: 0.0002 * ratio.RMB, Output: 0.0004 * ratio.RMB}, + "BLOOMZ-7B": {Input: 0.004 * ratio.RMB, Output: 0.004 * ratio.RMB}, + "Embedding-V1": {Input: 0.0005 * ratio.RMB, Output: 0}, + "bge-large-zh": {Input: 0.0005 * ratio.RMB, Output: 0}, + "bge-large-en": {Input: 0.0005 * ratio.RMB, Output: 0}, + "tao-8k": {Input: 0.0005 * ratio.RMB, Output: 0}, } diff --git a/relay/adaptor/cloudflare/adaptor.go b/relay/adaptor/cloudflare/adaptor.go index 97e3dbb2..4985d5b3 100644 --- a/relay/adaptor/cloudflare/adaptor.go +++ b/relay/adaptor/cloudflare/adaptor.go @@ -9,6 +9,7 @@ import ( "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/relay/adaptor" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" "github.com/songquanpeng/one-api/relay/relaymode" @@ -91,6 +92,10 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met return } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, nil) +} + func (a *Adaptor) GetModelList() []string { return ModelList } diff --git a/relay/adaptor/cohere/adaptor.go b/relay/adaptor/cohere/adaptor.go index 6fdb1b04..5d6aa2f4 100644 --- a/relay/adaptor/cohere/adaptor.go +++ b/relay/adaptor/cohere/adaptor.go @@ -8,6 +8,7 @@ import ( "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/relay/adaptor" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" ) @@ -55,8 +56,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met return } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} + func (a *Adaptor) GetModelList() []string { - return ModelList + return adaptor.GetModelListHelper(RatioMap) } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/cohere/constant.go b/relay/adaptor/cohere/constant.go index 9e70652c..be56429d 100644 --- a/relay/adaptor/cohere/constant.go +++ b/relay/adaptor/cohere/constant.go @@ -1,14 +1,19 @@ package cohere -var ModelList = []string{ - "command", "command-nightly", - "command-light", "command-light-nightly", - "command-r", "command-r-plus", -} +import "github.com/songquanpeng/one-api/relay/billing/ratio" -func init() { - num := len(ModelList) - for i := 0; i < num; i++ { - ModelList = append(ModelList, ModelList[i]+"-internet") - } +// https://cohere.com/pricing +var RatioMap = map[string]ratio.Ratio{ + "command": {Input: 1 * ratio.MILLI_USD, Output: 2 * ratio.MILLI_USD}, + "command-internet": {Input: 1 * ratio.MILLI_USD, Output: 2 * ratio.MILLI_USD}, + "command-nightly": {Input: 1 * ratio.MILLI_USD, Output: 2 * ratio.MILLI_USD}, + "command-nightly-internet": {Input: 1 * ratio.MILLI_USD, Output: 2 * ratio.MILLI_USD}, + "command-light": {Input: 0.3 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD}, + "command-light-internet": {Input: 0.3 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD}, + "command-light-nightly": {Input: 0.3 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD}, + "command-light-nightly-internet": {Input: 0.3 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD}, + "command-r": {Input: 0.15 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD}, + "command-r-internet": {Input: 0.15 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD}, + "command-r-plus": {Input: 2.5 * ratio.MILLI_USD, Output: 10 * ratio.MILLI_USD}, + "command-r-plus-internet": {Input: 2.5 * ratio.MILLI_USD, Output: 10 * ratio.MILLI_USD}, } diff --git a/relay/adaptor/common.go b/relay/adaptor/common.go index 8953d7a3..766cc3b5 100644 --- a/relay/adaptor/common.go +++ b/relay/adaptor/common.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/common/client" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "io" "net/http" @@ -50,3 +51,24 @@ func DoRequest(c *gin.Context, req *http.Request) (*http.Response, error) { _ = c.Request.Body.Close() return resp, nil } + +func GetRatioHelper(meta *meta.Meta, ratioMap map[string]ratio.Ratio) *ratio.Ratio { + var result ratio.Ratio + if ratio, ok := ratioMap[meta.OriginModelName]; ok { + result = ratio + return &result + } + if ratio, ok := ratioMap[meta.ActualModelName]; ok { + result = ratio + return &result + } + return nil +} + +func GetModelListHelper(ratioMap map[string]ratio.Ratio) []string { + var modelList []string + for model := range ratioMap { + modelList = append(modelList, model) + } + return modelList +} diff --git a/relay/adaptor/coze/adaptor.go b/relay/adaptor/coze/adaptor.go index 44f560e8..51d25531 100644 --- a/relay/adaptor/coze/adaptor.go +++ b/relay/adaptor/coze/adaptor.go @@ -3,13 +3,15 @@ package coze import ( "errors" "fmt" + "io" + "net/http" + "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/openai" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" - "io" - "net/http" ) type Adaptor struct { @@ -66,8 +68,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met return } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} + func (a *Adaptor) GetModelList() []string { - return ModelList + return adaptor.GetModelListHelper(RatioMap) } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/coze/constants.go b/relay/adaptor/coze/constants.go index d20fd875..e98da846 100644 --- a/relay/adaptor/coze/constants.go +++ b/relay/adaptor/coze/constants.go @@ -1,3 +1,5 @@ package coze -var ModelList = []string{} +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +var RatioMap = map[string]ratio.Ratio{} diff --git a/relay/adaptor/deepl/adaptor.go b/relay/adaptor/deepl/adaptor.go index d018a096..8e413672 100644 --- a/relay/adaptor/deepl/adaptor.go +++ b/relay/adaptor/deepl/adaptor.go @@ -3,12 +3,14 @@ package deepl import ( "errors" "fmt" - "github.com/gin-gonic/gin" - "github.com/songquanpeng/one-api/relay/adaptor" - "github.com/songquanpeng/one-api/relay/meta" - "github.com/songquanpeng/one-api/relay/model" "io" "net/http" + + "github.com/gin-gonic/gin" + "github.com/songquanpeng/one-api/relay/adaptor" + "github.com/songquanpeng/one-api/relay/billing/ratio" + "github.com/songquanpeng/one-api/relay/meta" + "github.com/songquanpeng/one-api/relay/model" ) type Adaptor struct { @@ -64,8 +66,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met return } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} + func (a *Adaptor) GetModelList() []string { - return ModelList + return adaptor.GetModelListHelper(RatioMap) } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/deepl/constants.go b/relay/adaptor/deepl/constants.go index 6a4f2545..f98b8c84 100644 --- a/relay/adaptor/deepl/constants.go +++ b/relay/adaptor/deepl/constants.go @@ -1,9 +1,10 @@ package deepl -// https://developers.deepl.com/docs/api-reference/glossaries +import "github.com/songquanpeng/one-api/relay/billing/ratio" -var ModelList = []string{ - "deepl-zh", - "deepl-en", - "deepl-ja", +// https://developers.deepl.com/docs/api-reference/glossaries +var RatioMap = map[string]ratio.Ratio{ + "deepl-zh": {Input: 25.0 * ratio.MILLI_USD, Output: 0}, + "deepl-en": {Input: 25.0 * ratio.MILLI_USD, Output: 0}, + "deepl-ja": {Input: 25.0 * ratio.MILLI_USD, Output: 0}, } diff --git a/relay/adaptor/deepseek/constants.go b/relay/adaptor/deepseek/constants.go index ad840bc2..57cc24ad 100644 --- a/relay/adaptor/deepseek/constants.go +++ b/relay/adaptor/deepseek/constants.go @@ -1,6 +1,8 @@ package deepseek -var ModelList = []string{ - "deepseek-chat", - "deepseek-coder", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +var RatioMap = map[string]ratio.Ratio{ + "deepseek-chat": {Input: 1 * ratio.MILLI_RMB, Output: 2 * ratio.MILLI_RMB}, + "deepseek-coder": {Input: 1 * ratio.MILLI_RMB, Output: 2 * ratio.MILLI_RMB}, } diff --git a/relay/adaptor/doubao/constants.go b/relay/adaptor/doubao/constants.go index dbe819dd..78b81f86 100644 --- a/relay/adaptor/doubao/constants.go +++ b/relay/adaptor/doubao/constants.go @@ -1,13 +1,15 @@ package doubao -// https://console.volcengine.com/ark/region:ark+cn-beijing/model +import "github.com/songquanpeng/one-api/relay/billing/ratio" -var ModelList = []string{ - "Doubao-pro-128k", - "Doubao-pro-32k", - "Doubao-pro-4k", - "Doubao-lite-128k", - "Doubao-lite-32k", - "Doubao-lite-4k", - "Doubao-embedding", +// https://www.volcengine.com/product/doubao +var RatioMap = map[string]ratio.Ratio{ + "Doubao-vision-pro-32k": {Input: 0.0030 * ratio.RMB, Output: 0.0090 * ratio.RMB}, + "Doubao-vision-lite-32k": {Input: 0.0015 * ratio.RMB, Output: 0.0045 * ratio.RMB}, + "Doubao-pro-256k": {Input: 0.0050 * ratio.RMB, Output: 0.0090 * ratio.RMB}, + "Doubao-pro-128k": {Input: 0.0050 * ratio.RMB, Output: 0.0090 * ratio.RMB}, + "Doubao-pro-32k": {Input: 0.0008 * ratio.RMB, Output: 0.0020 * ratio.RMB}, + "Doubao-lite-128k": {Input: 0.0008 * ratio.RMB, Output: 0.0010 * ratio.RMB}, + "Doubao-lite-32k": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB}, + "Doubao-embedding": {Input: 0.0005 * ratio.RMB, Output: 0}, } diff --git a/relay/adaptor/gemini/adaptor.go b/relay/adaptor/gemini/adaptor.go index a86fde40..dbe70a03 100644 --- a/relay/adaptor/gemini/adaptor.go +++ b/relay/adaptor/gemini/adaptor.go @@ -11,6 +11,7 @@ import ( "github.com/songquanpeng/one-api/common/helper" channelhelper "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/openai" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" "github.com/songquanpeng/one-api/relay/relaymode" @@ -92,8 +93,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met return } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return channelhelper.GetRatioHelper(meta, RatioMap) +} + func (a *Adaptor) GetModelList() []string { - return ModelList + return channelhelper.GetModelListHelper(RatioMap) } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/gemini/constants.go b/relay/adaptor/gemini/constants.go index 9d1cbc4a..6e429ada 100644 --- a/relay/adaptor/gemini/constants.go +++ b/relay/adaptor/gemini/constants.go @@ -1,11 +1,53 @@ package gemini +import "github.com/songquanpeng/one-api/relay/billing/ratio" + // https://ai.google.dev/models/gemini -var ModelList = []string{ - "gemini-pro", "gemini-1.0-pro", - "gemini-1.5-flash", "gemini-1.5-pro", - "text-embedding-004", "aqa", - "gemini-2.0-flash-exp", - "gemini-2.0-flash-thinking-exp", +var gemini15FlashRatio = ratio.Ratio{ + Input: 0.075 * ratio.MILLI_USD, + Output: 0.30 * ratio.MILLI_USD, + LongThreshold: 128000, + LongInput: 0.15 * ratio.MILLI_USD, + LongOutput: 0.60 * ratio.MILLI_USD, +} + +var gemini15ProRatio = ratio.Ratio{ + Input: 1.25 * ratio.MILLI_USD, + Output: 5.00 * ratio.MILLI_USD, + LongThreshold: 128000, + LongInput: 2.50 * ratio.MILLI_USD, + LongOutput: 10.00 * ratio.MILLI_USD, +} + +var gemini10ProRatio = ratio.Ratio{ + Input: 0.50 * ratio.MILLI_USD, + Output: 1.50 * ratio.MILLI_USD, +} + +var gemini15Flash8bRatio = ratio.Ratio{ + Input: 0.0375 * ratio.MILLI_USD, + Output: 0.15 * ratio.MILLI_USD, + LongThreshold: 128000, + LongInput: 0.075 * ratio.MILLI_USD, + LongOutput: 0.30 * ratio.MILLI_USD, +} + +// https://ai.google.dev/pricing +// https://ai.google.dev/gemini-api/docs/models/gemini +// https://cloud.google.com/vertex-ai/generative-ai/pricing?hl=zh-cn#google_models +var RatioMap = map[string]ratio.Ratio{ + "gemini-2.0-flash-exp": {Input: 0.1, Output: 0.1}, // currently free of charge + "gemini-2.0-flash-thinking-exp": {Input: 0.1, Output: 0.1}, // currently free of charge + "gemini-1.5-flash": gemini15FlashRatio, + "gemini-1.5-flash-001": gemini15FlashRatio, + "gemini-1.5-flash-002": gemini15FlashRatio, + "gemini-1.5-pro": gemini15ProRatio, + "gemini-1.5-pro-001": gemini15ProRatio, + "gemini-1.5-pro-002": gemini15ProRatio, + "gemini-1.0-pro": gemini10ProRatio, + "gemini-1.0-pro-001": gemini10ProRatio, + "gemini-1.5-flash-8b": gemini15Flash8bRatio, + "gemini-1.5-flash-8b-001": gemini15Flash8bRatio, + "text-embedding-004": {Input: 0.1, Output: 0.1}, // free of charge } diff --git a/relay/adaptor/groq/constants.go b/relay/adaptor/groq/constants.go index 0864ebe7..7e395b7a 100644 --- a/relay/adaptor/groq/constants.go +++ b/relay/adaptor/groq/constants.go @@ -1,27 +1,18 @@ package groq -// https://console.groq.com/docs/models +import "github.com/songquanpeng/one-api/relay/billing/ratio" -var ModelList = []string{ - "gemma-7b-it", - "gemma2-9b-it", - "llama-3.1-70b-versatile", - "llama-3.1-8b-instant", - "llama-3.2-11b-text-preview", - "llama-3.2-11b-vision-preview", - "llama-3.2-1b-preview", - "llama-3.2-3b-preview", - "llama-3.2-11b-vision-preview", - "llama-3.2-90b-text-preview", - "llama-3.2-90b-vision-preview", - "llama-guard-3-8b", - "llama3-70b-8192", - "llama3-8b-8192", - "llama3-groq-70b-8192-tool-use-preview", - "llama3-groq-8b-8192-tool-use-preview", - "llava-v1.5-7b-4096-preview", - "mixtral-8x7b-32768", - "distil-whisper-large-v3-en", - "whisper-large-v3", - "whisper-large-v3-turbo", +// https://groq.com/pricing/ +// https://console.groq.com/docs/models +var RatioMap = map[string]ratio.Ratio{ + "distil-whisper-large-v3-en": {Input: 0.02 / 3600 * 20 * ratio.USD, Output: 0}, + "gemma2-9b-it": {Input: 0.20 * ratio.MILLI_USD, Output: 0.20 * ratio.MILLI_USD}, + "llama-3.3-70b-versatile": {Input: 0.59 * ratio.MILLI_USD, Output: 0.79 * ratio.MILLI_USD}, + "llama-3.1-8b-instant": {Input: 0.05 * ratio.MILLI_USD, Output: 0.08 * ratio.MILLI_USD}, + "llama-guard-3-8b": {Input: 0.20 * ratio.MILLI_USD, Output: 0.20 * ratio.MILLI_USD}, + "llama3-70b-8192": {Input: 0.59 * ratio.MILLI_USD, Output: 0.79 * ratio.MILLI_USD}, + "llama3-8b-8192": {Input: 0.05 * ratio.MILLI_USD, Output: 0.08 * ratio.MILLI_USD}, + "mixtral-8x7b-32768": {Input: 0.24 * ratio.MILLI_USD, Output: 0.24 * ratio.MILLI_USD}, + "whisper-large-v3": {Input: 0.111 / 3600 * 20 * ratio.USD, Output: 0}, + "whisper-large-v3-turbo": {Input: 0.04 / 3600 * 20 * ratio.USD, Output: 0}, } diff --git a/relay/adaptor/interface.go b/relay/adaptor/interface.go index 01b2e2cb..cdd89bf9 100644 --- a/relay/adaptor/interface.go +++ b/relay/adaptor/interface.go @@ -2,6 +2,7 @@ package adaptor import ( "github.com/gin-gonic/gin" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" "io" @@ -16,6 +17,7 @@ type Adaptor interface { ConvertImageRequest(request *model.ImageRequest) (any, error) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) + GetRatio(meta *meta.Meta) *ratio.Ratio GetModelList() []string GetChannelName() string } diff --git a/relay/adaptor/lingyiwanwu/constants.go b/relay/adaptor/lingyiwanwu/constants.go index 30000e9d..6185614f 100644 --- a/relay/adaptor/lingyiwanwu/constants.go +++ b/relay/adaptor/lingyiwanwu/constants.go @@ -1,9 +1,9 @@ package lingyiwanwu -// https://platform.lingyiwanwu.com/docs +import "github.com/songquanpeng/one-api/relay/billing/ratio" -var ModelList = []string{ - "yi-34b-chat-0205", - "yi-34b-chat-200k", - "yi-vl-plus", +// https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 +var RatioMap = map[string]ratio.Ratio{ + "yi-lightning": {Input: 0.99 * ratio.MILLI_RMB, Output: 0.99 * ratio.MILLI_RMB}, + "yi-vision-v2": {Input: 6 * ratio.MILLI_RMB, Output: 6 * ratio.MILLI_RMB}, } diff --git a/relay/adaptor/minimax/constants.go b/relay/adaptor/minimax/constants.go index 1b2fc104..0e780e1e 100644 --- a/relay/adaptor/minimax/constants.go +++ b/relay/adaptor/minimax/constants.go @@ -1,11 +1,15 @@ package minimax -// https://www.minimaxi.com/document/guides/chat-model/V2?id=65e0736ab2845de20908e2dd +import "github.com/songquanpeng/one-api/relay/billing/ratio" -var ModelList = []string{ - "abab6.5-chat", - "abab6.5s-chat", - "abab6-chat", - "abab5.5-chat", - "abab5.5s-chat", +// https://platform.minimaxi.com/document/Price +// https://platform.minimaxi.com/document/ChatCompletion%20v2 + +var RatioMap = map[string]ratio.Ratio{ + "abab7-chat-preview": {Input: 0.01 * ratio.RMB, Output: 0.01 * ratio.RMB}, + "abab6.5s-chat": {Input: 0.001 * ratio.RMB, Output: 0.001 * ratio.RMB}, + "abab6.5g-chat": {Input: 0.005 * ratio.RMB, Output: 0.005 * ratio.RMB}, + "abab6.5t-chat": {Input: 0.005 * ratio.RMB, Output: 0.005 * ratio.RMB}, + "abab5.5s-chat": {Input: 0.005 * ratio.RMB, Output: 0.005 * ratio.RMB}, + "abab5.5-chat": {Input: 0.015 * ratio.RMB, Output: 0.015 * ratio.RMB}, } diff --git a/relay/adaptor/mistral/constants.go b/relay/adaptor/mistral/constants.go index cdb157f5..2d243267 100644 --- a/relay/adaptor/mistral/constants.go +++ b/relay/adaptor/mistral/constants.go @@ -1,10 +1,20 @@ package mistral -var ModelList = []string{ - "open-mistral-7b", - "open-mixtral-8x7b", - "mistral-small-latest", - "mistral-medium-latest", - "mistral-large-latest", - "mistral-embed", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +// https://mistral.ai/technology/#pricing +var RatioMap = map[string]ratio.Ratio{ + "mistral-large-latest": {Input: 2 * ratio.MILLI_USD, Output: 6 * ratio.MILLI_USD}, + "pixtral-large-latest": {Input: 2 * ratio.MILLI_USD, Output: 6 * ratio.MILLI_USD}, + "mistral-small-latest": {Input: 0.2 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD}, + "codestral-latest": {Input: 0.2 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD}, + "ministral-8b-latest": {Input: 0.1 * ratio.MILLI_USD, Output: 0.1 * ratio.MILLI_USD}, + "ministral-3b-latest": {Input: 0.04 * ratio.MILLI_USD, Output: 0.04 * ratio.MILLI_USD}, + "mistral-embed": {Input: 0.1 * ratio.MILLI_USD, Output: 0}, + "mistral-moderation-latest": {Input: 0.1 * ratio.MILLI_USD, Output: 0}, + "pixtral-12b": {Input: 0.15 * ratio.MILLI_USD, Output: 0.15 * ratio.MILLI_USD}, + "mistral-nemo": {Input: 0.15 * ratio.MILLI_USD, Output: 0.15 * ratio.MILLI_USD}, + "open-mistral-7b": {Input: 0.25 * ratio.MILLI_USD, Output: 0.25 * ratio.MILLI_USD}, + "open-mixtral-8x7b": {Input: 0.7 * ratio.MILLI_USD, Output: 0.7 * ratio.MILLI_USD}, + "open-mixtral-8x22b": {Input: 2 * ratio.MILLI_USD, Output: 6 * ratio.MILLI_USD}, } diff --git a/relay/adaptor/moonshot/constants.go b/relay/adaptor/moonshot/constants.go index 1b86f0fa..6f97e1ca 100644 --- a/relay/adaptor/moonshot/constants.go +++ b/relay/adaptor/moonshot/constants.go @@ -1,7 +1,10 @@ package moonshot -var ModelList = []string{ - "moonshot-v1-8k", - "moonshot-v1-32k", - "moonshot-v1-128k", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +// https://platform.moonshot.cn/docs/pricing/chat#%E4%BA%A7%E5%93%81%E5%AE%9A%E4%BB%B7 +var RatioMap = map[string]ratio.Ratio{ + "moonshot-v1-8k": {Input: 12 * ratio.MILLI_RMB, Output: 12 * ratio.MILLI_RMB}, + "moonshot-v1-32k": {Input: 24 * ratio.MILLI_RMB, Output: 24 * ratio.MILLI_RMB}, + "moonshot-v1-128k": {Input: 60 * ratio.MILLI_RMB, Output: 60 * ratio.MILLI_RMB}, } diff --git a/relay/adaptor/novita/constants.go b/relay/adaptor/novita/constants.go index c6618308..cf5c6210 100644 --- a/relay/adaptor/novita/constants.go +++ b/relay/adaptor/novita/constants.go @@ -1,19 +1,38 @@ package novita +import "github.com/songquanpeng/one-api/relay/billing/ratio" + // https://novita.ai/llm-api -var ModelList = []string{ - "meta-llama/llama-3-8b-instruct", - "meta-llama/llama-3-70b-instruct", - "nousresearch/hermes-2-pro-llama-3-8b", - "nousresearch/nous-hermes-llama2-13b", - "mistralai/mistral-7b-instruct", - "cognitivecomputations/dolphin-mixtral-8x22b", - "sao10k/l3-70b-euryale-v2.1", - "sophosympatheia/midnight-rose-70b", - "gryphe/mythomax-l2-13b", - "Nous-Hermes-2-Mixtral-8x7B-DPO", - "lzlv_70b", - "teknium/openhermes-2.5-mistral-7b", - "microsoft/wizardlm-2-8x22b", +var RatioMap = map[string]ratio.Ratio{ + "meta-llama/llama-3.3-70b-instruct": {Input: 0.39 * ratio.MILLI_USD, Output: 0.39 * ratio.MILLI_USD}, + "meta-llama/llama-3.1-8b-instruct": {Input: 0.05 * ratio.MILLI_USD, Output: 0.05 * ratio.MILLI_USD}, + "meta-llama/llama-3.1-8b-instruct-max": {Input: 0.05 * ratio.MILLI_USD, Output: 0.05 * ratio.MILLI_USD}, + "meta-llama/llama-3.1-70b-instruct": {Input: 0.34 * ratio.MILLI_USD, Output: 0.39 * ratio.MILLI_USD}, + "meta-llama/llama-3-8b-instruct": {Input: 0.04 * ratio.MILLI_USD, Output: 0.04 * ratio.MILLI_USD}, + "meta-llama/llama-3-70b-instruct": {Input: 0.51 * ratio.MILLI_USD, Output: 0.74 * ratio.MILLI_USD}, + "gryphe/mythomax-l2-13b": {Input: 0.09 * ratio.MILLI_USD, Output: 0.09 * ratio.MILLI_USD}, + "google/gemma-2-9b-it": {Input: 0.08 * ratio.MILLI_USD, Output: 0.08 * ratio.MILLI_USD}, + "mistralai/mistral-nemo": {Input: 0.17 * ratio.MILLI_USD, Output: 0.17 * ratio.MILLI_USD}, + "microsoft/wizardlm-2-8x22b": {Input: 0.62 * ratio.MILLI_USD, Output: 0.62 * ratio.MILLI_USD}, + "mistralai/mistral-7b-instruct": {Input: 0.059 * ratio.MILLI_USD, Output: 0.059 * ratio.MILLI_USD}, + "openchat/openchat-7b": {Input: 0.06 * ratio.MILLI_USD, Output: 0.06 * ratio.MILLI_USD}, + "nousresearch/hermes-2-pro-llama-3-8b": {Input: 0.14 * ratio.MILLI_USD, Output: 0.14 * ratio.MILLI_USD}, + "sao10k/l3-70b-euryale-v2.1": {Input: 1.48 * ratio.MILLI_USD, Output: 1.48 * ratio.MILLI_USD}, + "cognitivecomputations/dolphin-mixtral-8x22b": {Input: 0.9 * ratio.MILLI_USD, Output: 0.9 * ratio.MILLI_USD}, + "jondurbin/airoboros-l2-70b": {Input: 0.5 * ratio.MILLI_USD, Output: 0.5 * ratio.MILLI_USD}, + "nousresearch/nous-hermes-llama2-13b": {Input: 0.17 * ratio.MILLI_USD, Output: 0.17 * ratio.MILLI_USD}, + "teknium/openhermes-2.5-mistral-7b": {Input: 0.17 * ratio.MILLI_USD, Output: 0.17 * ratio.MILLI_USD}, + "sophosympatheia/midnight-rose-70b": {Input: 0.8 * ratio.MILLI_USD, Output: 0.8 * ratio.MILLI_USD}, + "Sao10K/L3-8B-Stheno-v3.2": {Input: 0.05 * ratio.MILLI_USD, Output: 0.05 * ratio.MILLI_USD}, + "sao10k/l3-8b-lunaris": {Input: 0.05 * ratio.MILLI_USD, Output: 0.05 * ratio.MILLI_USD}, + "qwen/qwen-2-vl-72b-instruct": {Input: 0.45 * ratio.MILLI_USD, Output: 0.45 * ratio.MILLI_USD}, + "meta-llama/llama-3.2-1b-instruct": {Input: 0.02 * ratio.MILLI_USD, Output: 0.02 * ratio.MILLI_USD}, + "meta-llama/llama-3.2-11b-vision-instruct": {Input: 0.06 * ratio.MILLI_USD, Output: 0.06 * ratio.MILLI_USD}, + "meta-llama/llama-3.2-3b-instruct": {Input: 0.03 * ratio.MILLI_USD, Output: 0.05 * ratio.MILLI_USD}, + "meta-llama/llama-3.1-8b-instruct-bf16": {Input: 0.06 * ratio.MILLI_USD, Output: 0.06 * ratio.MILLI_USD}, + "qwen/qwen-2.5-72b-instruct": {Input: 0.38 * ratio.MILLI_USD, Output: 0.4 * ratio.MILLI_USD}, + "sao10k/l31-70b-euryale-v2.2": {Input: 1.48 * ratio.MILLI_USD, Output: 1.48 * ratio.MILLI_USD}, + "qwen/qwen-2-7b-instruct": {Input: 0.054 * ratio.MILLI_USD, Output: 0.054 * ratio.MILLI_USD}, + "qwen/qwen-2-72b-instruct": {Input: 0.34 * ratio.MILLI_USD, Output: 0.39 * ratio.MILLI_USD}, } diff --git a/relay/adaptor/ollama/adaptor.go b/relay/adaptor/ollama/adaptor.go index ad1f8983..cad86216 100644 --- a/relay/adaptor/ollama/adaptor.go +++ b/relay/adaptor/ollama/adaptor.go @@ -3,11 +3,13 @@ package ollama import ( "errors" "fmt" - "github.com/songquanpeng/one-api/relay/meta" - "github.com/songquanpeng/one-api/relay/relaymode" "io" "net/http" + "github.com/songquanpeng/one-api/relay/billing/ratio" + "github.com/songquanpeng/one-api/relay/meta" + "github.com/songquanpeng/one-api/relay/relaymode" + "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/model" @@ -73,8 +75,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met return } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} + func (a *Adaptor) GetModelList() []string { - return ModelList + return adaptor.GetModelListHelper(RatioMap) } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/ollama/constants.go b/relay/adaptor/ollama/constants.go index d9dc72a8..97ee4f47 100644 --- a/relay/adaptor/ollama/constants.go +++ b/relay/adaptor/ollama/constants.go @@ -1,11 +1,13 @@ package ollama -var ModelList = []string{ - "codellama:7b-instruct", - "llama2:7b", - "llama2:latest", - "llama3:latest", - "phi3:latest", - "qwen:0.5b-chat", - "qwen:7b", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +var RatioMap = map[string]ratio.Ratio{ + "codellama:7b-instruct": {Input: 0, Output: 0}, + "llama2:7b": {Input: 0, Output: 0}, + "llama2:latest": {Input: 0, Output: 0}, + "llama3:latest": {Input: 0, Output: 0}, + "phi3:latest": {Input: 0, Output: 0}, + "qwen:0.5b-chat": {Input: 0, Output: 0}, + "qwen:7b": {Input: 0, Output: 0}, } diff --git a/relay/adaptor/openai/adaptor.go b/relay/adaptor/openai/adaptor.go index 6946e402..88b2579d 100644 --- a/relay/adaptor/openai/adaptor.go +++ b/relay/adaptor/openai/adaptor.go @@ -12,6 +12,7 @@ import ( "github.com/songquanpeng/one-api/relay/adaptor/doubao" "github.com/songquanpeng/one-api/relay/adaptor/minimax" "github.com/songquanpeng/one-api/relay/adaptor/novita" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/channeltype" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" @@ -118,9 +119,14 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met return } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + _, ratioMap := GetCompatibleChannelMeta(a.ChannelType) + return adaptor.GetRatioHelper(meta, ratioMap) +} + func (a *Adaptor) GetModelList() []string { - _, modelList := GetCompatibleChannelMeta(a.ChannelType) - return modelList + _, ratioMap := GetCompatibleChannelMeta(a.ChannelType) + return adaptor.GetModelListHelper(ratioMap) } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/openai/compatible.go b/relay/adaptor/openai/compatible.go index 15b4dcc0..050b97d2 100644 --- a/relay/adaptor/openai/compatible.go +++ b/relay/adaptor/openai/compatible.go @@ -15,6 +15,7 @@ import ( "github.com/songquanpeng/one-api/relay/adaptor/stepfun" "github.com/songquanpeng/one-api/relay/adaptor/togetherai" "github.com/songquanpeng/one-api/relay/adaptor/xai" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/channeltype" ) @@ -36,39 +37,39 @@ var CompatibleChannels = []int{ channeltype.XAI, } -func GetCompatibleChannelMeta(channelType int) (string, []string) { +func GetCompatibleChannelMeta(channelType int) (string, map[string]ratio.Ratio) { switch channelType { case channeltype.Azure: - return "azure", ModelList + return "azure", RatioMap case channeltype.AI360: - return "360", ai360.ModelList + return "360", ai360.RatioMap case channeltype.Moonshot: - return "moonshot", moonshot.ModelList + return "moonshot", moonshot.RatioMap case channeltype.Baichuan: - return "baichuan", baichuan.ModelList + return "baichuan", baichuan.RatioMap case channeltype.Minimax: - return "minimax", minimax.ModelList + return "minimax", minimax.RatioMap case channeltype.Mistral: - return "mistralai", mistral.ModelList + return "mistralai", mistral.RatioMap case channeltype.Groq: - return "groq", groq.ModelList + return "groq", groq.RatioMap case channeltype.LingYiWanWu: - return "lingyiwanwu", lingyiwanwu.ModelList + return "lingyiwanwu", lingyiwanwu.RatioMap case channeltype.StepFun: - return "stepfun", stepfun.ModelList + return "stepfun", stepfun.RatioMap case channeltype.DeepSeek: - return "deepseek", deepseek.ModelList + return "deepseek", deepseek.RatioMap case channeltype.TogetherAI: - return "together.ai", togetherai.ModelList + return "together.ai", togetherai.RatioMap case channeltype.Doubao: - return "doubao", doubao.ModelList + return "doubao", doubao.RatioMap case channeltype.Novita: - return "novita", novita.ModelList + return "novita", novita.RatioMap case channeltype.SiliconFlow: - return "siliconflow", siliconflow.ModelList + return "siliconflow", siliconflow.RatioMap case channeltype.XAI: - return "xai", xai.ModelList + return "xai", xai.RatioMap default: - return "openai", ModelList + return "openai", RatioMap } } diff --git a/relay/adaptor/openai/constants.go b/relay/adaptor/openai/constants.go index 8a643bc6..f749e1e6 100644 --- a/relay/adaptor/openai/constants.go +++ b/relay/adaptor/openai/constants.go @@ -1,27 +1,61 @@ package openai -var ModelList = []string{ - "gpt-3.5-turbo", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-1106", "gpt-3.5-turbo-0125", - "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613", - "gpt-3.5-turbo-instruct", - "gpt-4", "gpt-4-0314", "gpt-4-0613", "gpt-4-1106-preview", "gpt-4-0125-preview", - "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613", - "gpt-4-turbo-preview", "gpt-4-turbo", "gpt-4-turbo-2024-04-09", - "gpt-4o", "gpt-4o-2024-05-13", - "gpt-4o-2024-08-06", - "gpt-4o-2024-11-20", - "chatgpt-4o-latest", - "gpt-4o-mini", "gpt-4o-mini-2024-07-18", - "gpt-4-vision-preview", - "text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large", - "text-curie-001", "text-babbage-001", "text-ada-001", "text-davinci-002", "text-davinci-003", - "text-moderation-latest", "text-moderation-stable", - "text-davinci-edit-001", - "davinci-002", "babbage-002", - "dall-e-2", "dall-e-3", - "whisper-1", - "tts-1", "tts-1-1106", "tts-1-hd", "tts-1-hd-1106", - "o1", "o1-2024-12-17", - "o1-preview", "o1-preview-2024-09-12", - "o1-mini", "o1-mini-2024-09-12", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +var RatioMap = map[string]ratio.Ratio{ + "gpt-4": {Input: 15, Output: 30}, + "gpt-4-0314": {Input: 15, Output: 30}, + "gpt-4-0613": {Input: 15, Output: 30}, + "gpt-4-32k": {Input: 30, Output: 60}, + "gpt-4-32k-0314": {Input: 30, Output: 60}, + "gpt-4-32k-0613": {Input: 30, Output: 60}, + "gpt-4-1106-preview": {Input: 5, Output: 15}, + "gpt-4-0125-preview": {Input: 5, Output: 15}, + "gpt-4-turbo-preview": {Input: 5, Output: 15}, // $0.01 / 1K tokens + "gpt-4-turbo": {Input: 5, Output: 15}, // $0.01 / 1K tokens + "gpt-4-turbo-2024-04-09": {Input: 5, Output: 15}, // $0.01 / 1K tokens + "gpt-4o": {Input: 1.25, Output: 5}, // $0.005 / 1K tokens + "chatgpt-4o-latest": {Input: 2.5, Output: 7.5}, // $0.005 / 1K tokens + "gpt-4o-2024-05-13": {Input: 2.5, Output: 7.5}, // $0.005 / 1K tokens + "gpt-4o-2024-08-06": {Input: 1.25, Output: 5}, // $0.0025 / 1K tokens + "gpt-4o-2024-11-20": {Input: 1.25, Output: 5}, // $0.0025 / 1K tokens + "gpt-4o-mini": {Input: 0.075, Output: 0.3}, // $0.00015 / 1K tokens + "gpt-4o-mini-2024-07-18": {Input: 0.075, Output: 0.3}, // $0.00015 / 1K tokens + "gpt-4-vision-preview": {Input: 5, Output: 15}, // $0.01 / 1K tokens + "gpt-3.5-turbo": {Input: 0.25, Output: 0.75}, // $0.0005 / 1K tokens + "gpt-3.5-turbo-0301": {Input: 0.75, Output: 1}, // $0.0015 / 1K tokens + "gpt-3.5-turbo-0613": {Input: 0.75, Output: 1}, // $0.0015 / 1K tokens + "gpt-3.5-turbo-16k": {Input: 1.5, Output: 2}, // $0.003 / 1K tokens + "gpt-3.5-turbo-16k-0613": {Input: 1.5, Output: 2}, // $0.003 / 1K tokens + "gpt-3.5-turbo-instruct": {Input: 0.75, Output: 1}, // $0.0015 / 1K tokens + "gpt-3.5-turbo-1106": {Input: 0.5, Output: 1}, // $0.001 / 1K tokens + "gpt-3.5-turbo-0125": {Input: 0.25, Output: 0.75}, // $0.0005 / 1K tokens + "davinci-002": {Input: 1, Output: 1}, // $0.002 / 1K tokens + "babbage-002": {Input: 0.2, Output: 0.2}, // $0.0004 / 1K tokens + "text-ada-001": {Input: 0.2, Output: 0.2}, // $0.0004 / 1K tokens + "text-babbage-001": {Input: 0.25, Output: 0.25}, // $0.0005 / 1K tokens + "text-curie-001": {Input: 1, Output: 1}, // $0.002 / 1K tokens + "text-davinci-002": {Input: 10, Output: 10}, // $0.02 / 1K tokens + "text-davinci-003": {Input: 10, Output: 10}, // $0.02 / 1K tokens + "text-davinci-edit-001": {Input: 10, Output: 10}, // $0.02 / 1K tokens + "code-davinci-edit-001": {Input: 10, Output: 10}, // $0.02 / 1K tokens + "whisper-1": {Input: 1, Output: 1}, // $0.006 / minute -> $0.002 / 20 seconds -> $0.002 / 1K tokens -> 20 seconds / 1K tokens + "tts-1": {Input: 7.5, Output: 7.5}, // $0.015 / 1K characters + "tts-1-1106": {Input: 7.5, Output: 7.5}, // $0.015 / 1K characters + "tts-1-hd": {Input: 15, Output: 15}, // $0.030 / 1K characters + "tts-1-hd-1106": {Input: 15, Output: 15}, // $0.030 / 1K characters + "davinci": {Input: 10, Output: 10}, // $0.02 / 1K tokens + "curie": {Input: 10, Output: 10}, // $0.02 / 1K tokens + "babbage": {Input: 10, Output: 10}, // $0.02 / 1K tokens + "ada": {Input: 10, Output: 10}, // $0.02 / 1K tokens + "text-embedding-ada-002": {Input: 0.05, Output: 0}, // $0.001 / 1K tokens + "text-embedding-3-small": {Input: 0.01, Output: 0}, // $0.0002 / 1K tokens + "text-embedding-3-large": {Input: 0.065, Output: 0}, // $0.0013 / 1K tokens + "text-search-ada-doc-001": {Input: 10, Output: 0}, // $0.02 / 1K tokens + "text-moderation-stable": {Input: 0.1, Output: 0}, // $0.002 / 1K tokens + "text-moderation-latest": {Input: 0.1, Output: 0}, // $0.002 / 1K tokens + "omni-moderation-latest": {Input: 0.1, Output: 0}, // $0.002 / 1K tokens + "omni-moderation-2024-09-26": {Input: 0.1, Output: 0}, // $0.002 / 1K tokens + "dall-e-2": {Input: 0.02 * ratio.USD, Output: 0}, // $0.016 - $0.020 / image + "dall-e-3": {Input: 0.04 * ratio.USD, Output: 0}, // $0.040 - $0.120 / image } diff --git a/relay/adaptor/palm/adaptor.go b/relay/adaptor/palm/adaptor.go index 98aa3e18..51a615d2 100644 --- a/relay/adaptor/palm/adaptor.go +++ b/relay/adaptor/palm/adaptor.go @@ -3,13 +3,15 @@ package palm import ( "errors" "fmt" + "io" + "net/http" + "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/openai" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" - "io" - "net/http" ) type Adaptor struct { @@ -58,8 +60,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met return } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} + func (a *Adaptor) GetModelList() []string { - return ModelList + return adaptor.GetModelListHelper(RatioMap) } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/palm/constants.go b/relay/adaptor/palm/constants.go index a8349362..70421002 100644 --- a/relay/adaptor/palm/constants.go +++ b/relay/adaptor/palm/constants.go @@ -1,5 +1,7 @@ package palm -var ModelList = []string{ - "PaLM-2", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +var RatioMap = map[string]ratio.Ratio{ + "PaLM-2": {Input: 0, Output: 0}, } diff --git a/relay/adaptor/proxy/adaptor.go b/relay/adaptor/proxy/adaptor.go index 670c7628..798060cf 100644 --- a/relay/adaptor/proxy/adaptor.go +++ b/relay/adaptor/proxy/adaptor.go @@ -10,6 +10,7 @@ import ( "github.com/pkg/errors" "github.com/songquanpeng/one-api/relay/adaptor" channelhelper "github.com/songquanpeng/one-api/relay/adaptor" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" relaymodel "github.com/songquanpeng/one-api/relay/model" @@ -48,6 +49,10 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met return nil, nil } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, nil) +} + func (a *Adaptor) GetModelList() (models []string) { return nil } diff --git a/relay/adaptor/replicate/adaptor.go b/relay/adaptor/replicate/adaptor.go index a60a7de3..a54780a0 100644 --- a/relay/adaptor/replicate/adaptor.go +++ b/relay/adaptor/replicate/adaptor.go @@ -4,7 +4,6 @@ import ( "fmt" "io" "net/http" - "slices" "strings" "time" @@ -13,6 +12,7 @@ import ( "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/openai" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" "github.com/songquanpeng/one-api/relay/relaymode" @@ -96,7 +96,7 @@ func (a *Adaptor) Init(meta *meta.Meta) { } func (a *Adaptor) GetRequestURL(meta *meta.Meta) (string, error) { - if !slices.Contains(ModelList, meta.OriginModelName) { + if _, ok := RatioMap[meta.OriginModelName]; !ok { return "", errors.Errorf("model %s not supported", meta.OriginModelName) } @@ -127,8 +127,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met return } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} + func (a *Adaptor) GetModelList() []string { - return ModelList + return adaptor.GetModelListHelper(RatioMap) } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/replicate/constant.go b/relay/adaptor/replicate/constant.go index 989142c9..fddeb3f6 100644 --- a/relay/adaptor/replicate/constant.go +++ b/relay/adaptor/replicate/constant.go @@ -1,58 +1,49 @@ package replicate -// ModelList is a list of models that can be used with Replicate. -// +import "github.com/songquanpeng/one-api/relay/billing/ratio" + // https://replicate.com/pricing -var ModelList = []string{ - // ------------------------------------- - // image model - // ------------------------------------- - "black-forest-labs/flux-1.1-pro", - "black-forest-labs/flux-1.1-pro-ultra", - "black-forest-labs/flux-canny-dev", - "black-forest-labs/flux-canny-pro", - "black-forest-labs/flux-depth-dev", - "black-forest-labs/flux-depth-pro", - "black-forest-labs/flux-dev", - "black-forest-labs/flux-dev-lora", - "black-forest-labs/flux-fill-dev", - "black-forest-labs/flux-fill-pro", - "black-forest-labs/flux-pro", - "black-forest-labs/flux-redux-dev", - "black-forest-labs/flux-redux-schnell", - "black-forest-labs/flux-schnell", - "black-forest-labs/flux-schnell-lora", - "ideogram-ai/ideogram-v2", - "ideogram-ai/ideogram-v2-turbo", - "recraft-ai/recraft-v3", - "recraft-ai/recraft-v3-svg", - "stability-ai/stable-diffusion-3", - "stability-ai/stable-diffusion-3.5-large", - "stability-ai/stable-diffusion-3.5-large-turbo", - "stability-ai/stable-diffusion-3.5-medium", - // ------------------------------------- - // language model - // ------------------------------------- - "ibm-granite/granite-20b-code-instruct-8k", - "ibm-granite/granite-3.0-2b-instruct", - "ibm-granite/granite-3.0-8b-instruct", - "ibm-granite/granite-8b-code-instruct-128k", - "meta/llama-2-13b", - "meta/llama-2-13b-chat", - "meta/llama-2-70b", - "meta/llama-2-70b-chat", - "meta/llama-2-7b", - "meta/llama-2-7b-chat", - "meta/meta-llama-3.1-405b-instruct", - "meta/meta-llama-3-70b", - "meta/meta-llama-3-70b-instruct", - "meta/meta-llama-3-8b", - "meta/meta-llama-3-8b-instruct", - "mistralai/mistral-7b-instruct-v0.2", - "mistralai/mistral-7b-v0.1", - "mistralai/mixtral-8x7b-instruct-v0.1", - // ------------------------------------- - // video model - // ------------------------------------- - // "minimax/video-01", // TODO: implement the adaptor +var RatioMap = map[string]ratio.Ratio{ + "black-forest-labs/flux-1.1-pro": {Input: 0.04 * ratio.USD, Output: 0}, + "black-forest-labs/flux-1.1-pro-ultra": {Input: 0.06 * ratio.USD, Output: 0}, + "black-forest-labs/flux-canny-dev": {Input: 0.025 * ratio.USD, Output: 0}, + "black-forest-labs/flux-canny-pro": {Input: 0.05 * ratio.USD, Output: 0}, + "black-forest-labs/flux-depth-dev": {Input: 0.025 * ratio.USD, Output: 0}, + "black-forest-labs/flux-depth-pro": {Input: 0.05 * ratio.USD, Output: 0}, + "black-forest-labs/flux-dev": {Input: 0.025 * ratio.USD, Output: 0}, + "black-forest-labs/flux-dev-lora": {Input: 0.032 * ratio.USD, Output: 0}, + "black-forest-labs/flux-fill-dev": {Input: 0.04 * ratio.USD, Output: 0}, + "black-forest-labs/flux-fill-pro": {Input: 0.05 * ratio.USD, Output: 0}, + "black-forest-labs/flux-pro": {Input: 0.055 * ratio.USD, Output: 0}, + "black-forest-labs/flux-redux-dev": {Input: 0.025 * ratio.USD, Output: 0}, + "black-forest-labs/flux-redux-schnell": {Input: 0.003 * ratio.USD, Output: 0}, + "black-forest-labs/flux-schnell": {Input: 0.003 * ratio.USD, Output: 0}, + "black-forest-labs/flux-schnell-lora": {Input: 0.02 * ratio.USD, Output: 0}, + "ideogram-ai/ideogram-v2": {Input: 0.08 * ratio.USD, Output: 0}, + "ideogram-ai/ideogram-v2-turbo": {Input: 0.05 * ratio.USD, Output: 0}, + "recraft-ai/recraft-v3": {Input: 0.04 * ratio.USD, Output: 0}, + "recraft-ai/recraft-v3-svg": {Input: 0.08 * ratio.USD, Output: 0}, + "stability-ai/stable-diffusion-3": {Input: 0.035 * ratio.USD, Output: 0}, + "stability-ai/stable-diffusion-3.5-large": {Input: 0.065 * ratio.USD, Output: 0}, + "stability-ai/stable-diffusion-3.5-large-turbo": {Input: 0.04 * ratio.USD, Output: 0}, + "stability-ai/stable-diffusion-3.5-medium": {Input: 0.035 * ratio.USD, Output: 0}, + // replicate chat models + "ibm-granite/granite-20b-code-instruct-8k": {Input: 0.100 * ratio.MILLI_USD, Output: 0.500 * ratio.MILLI_USD}, + "ibm-granite/granite-3.0-2b-instruct": {Input: 0.030 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD}, + "ibm-granite/granite-3.0-8b-instruct": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD}, + "ibm-granite/granite-8b-code-instruct-128k": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD}, + "meta/llama-2-13b": {Input: 0.100 * ratio.MILLI_USD, Output: 0.500 * ratio.MILLI_USD}, + "meta/llama-2-13b-chat": {Input: 0.100 * ratio.MILLI_USD, Output: 0.500 * ratio.MILLI_USD}, + "meta/llama-2-70b": {Input: 0.650 * ratio.MILLI_USD, Output: 2.750 * ratio.MILLI_USD}, + "meta/llama-2-70b-chat": {Input: 0.650 * ratio.MILLI_USD, Output: 2.750 * ratio.MILLI_USD}, + "meta/llama-2-7b": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD}, + "meta/llama-2-7b-chat": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD}, + "meta/meta-llama-3.1-405b-instruct": {Input: 9.500 * ratio.MILLI_USD, Output: 9.500 * ratio.MILLI_USD}, + "meta/meta-llama-3-70b": {Input: 0.650 * ratio.MILLI_USD, Output: 2.750 * ratio.MILLI_USD}, + "meta/meta-llama-3-70b-instruct": {Input: 0.650 * ratio.MILLI_USD, Output: 2.750 * ratio.MILLI_USD}, + "meta/meta-llama-3-8b": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD}, + "meta/meta-llama-3-8b-instruct": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD}, + "mistralai/mistral-7b-instruct-v0.2": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD}, + "mistralai/mistral-7b-v0.1": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD}, + "mistralai/mixtral-8x7b-instruct-v0.1": {Input: 0.300 * ratio.MILLI_USD, Output: 1.000 * ratio.MILLI_USD}, } diff --git a/relay/adaptor/siliconflow/constants.go b/relay/adaptor/siliconflow/constants.go index 0bf54761..59fb4e85 100644 --- a/relay/adaptor/siliconflow/constants.go +++ b/relay/adaptor/siliconflow/constants.go @@ -1,36 +1,40 @@ package siliconflow -// https://docs.siliconflow.cn/docs/getting-started +import "github.com/songquanpeng/one-api/relay/billing/ratio" -var ModelList = []string{ - "deepseek-ai/deepseek-llm-67b-chat", - "Qwen/Qwen1.5-14B-Chat", - "Qwen/Qwen1.5-7B-Chat", - "Qwen/Qwen1.5-110B-Chat", - "Qwen/Qwen1.5-32B-Chat", - "01-ai/Yi-1.5-6B-Chat", - "01-ai/Yi-1.5-9B-Chat-16K", - "01-ai/Yi-1.5-34B-Chat-16K", - "THUDM/chatglm3-6b", - "deepseek-ai/DeepSeek-V2-Chat", - "THUDM/glm-4-9b-chat", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "Qwen/Qwen2-57B-A14B-Instruct", - "deepseek-ai/DeepSeek-Coder-V2-Instruct", - "Qwen/Qwen2-1.5B-Instruct", - "internlm/internlm2_5-7b-chat", - "BAAI/bge-large-en-v1.5", - "BAAI/bge-large-zh-v1.5", - "Pro/Qwen/Qwen2-7B-Instruct", - "Pro/Qwen/Qwen2-1.5B-Instruct", - "Pro/Qwen/Qwen1.5-7B-Chat", - "Pro/THUDM/glm-4-9b-chat", - "Pro/THUDM/chatglm3-6b", - "Pro/01-ai/Yi-1.5-9B-Chat-16K", - "Pro/01-ai/Yi-1.5-6B-Chat", - "Pro/google/gemma-2-9b-it", - "Pro/internlm/internlm2_5-7b-chat", - "Pro/meta-llama/Meta-Llama-3-8B-Instruct", - "Pro/mistralai/Mistral-7B-Instruct-v0.2", +// https://siliconflow.cn/zh-cn/models +// https://siliconflow.cn/zh-cn/pricing +var RatioMap = map[string]ratio.Ratio{ + "Qwen/Qwen2.5-72B-Instruct": {Input: 41.3 * ratio.MILLI_RMB, Output: 41.3 * ratio.MILLI_RMB}, + "Qwen/Qwen2.5-7B-Instruct": {Input: 3.5 * ratio.MILLI_RMB, Output: 3.5 * ratio.MILLI_RMB}, + "deepseek-ai/deepseek-llm-67b-chat": {Input: 0, Output: 0}, + "Qwen/Qwen1.5-14B-Chat": {Input: 0, Output: 0}, + "Qwen/Qwen1.5-7B-Chat": {Input: 0, Output: 0}, + "Qwen/Qwen1.5-110B-Chat": {Input: 0, Output: 0}, + "Qwen/Qwen1.5-32B-Chat": {Input: 0, Output: 0}, + "01-ai/Yi-1.5-6B-Chat": {Input: 0, Output: 0}, + "01-ai/Yi-1.5-9B-Chat-16K": {Input: 0, Output: 0}, + "01-ai/Yi-1.5-34B-Chat-16K": {Input: 0, Output: 0}, + "THUDM/chatglm3-6b": {Input: 0, Output: 0}, + "deepseek-ai/DeepSeek-V2-Chat": {Input: 0, Output: 0}, + "THUDM/glm-4-9b-chat": {Input: 0, Output: 0}, + "Qwen/Qwen2-72B-Instruct": {Input: 0, Output: 0}, + "Qwen/Qwen2-7B-Instruct": {Input: 0, Output: 0}, + "Qwen/Qwen2-57B-A14B-Instruct": {Input: 0, Output: 0}, + "deepseek-ai/DeepSeek-Coder-V2-Instruct": {Input: 0, Output: 0}, + "Qwen/Qwen2-1.5B-Instruct": {Input: 0, Output: 0}, + "internlm/internlm2_5-7b-chat": {Input: 0, Output: 0}, + "BAAI/bge-large-en-v1.5": {Input: 0, Output: 0}, + "BAAI/bge-large-zh-v1.5": {Input: 0, Output: 0}, + "Pro/Qwen/Qwen2-7B-Instruct": {Input: 0, Output: 0}, + "Pro/Qwen/Qwen2-1.5B-Instruct": {Input: 0, Output: 0}, + "Pro/Qwen/Qwen1.5-7B-Chat": {Input: 0, Output: 0}, + "Pro/THUDM/glm-4-9b-chat": {Input: 0, Output: 0}, + "Pro/THUDM/chatglm3-6b": {Input: 0, Output: 0}, + "Pro/01-ai/Yi-1.5-9B-Chat-16K": {Input: 0, Output: 0}, + "Pro/01-ai/Yi-1.5-6B-Chat": {Input: 0, Output: 0}, + "Pro/google/gemma-2-9b-it": {Input: 0, Output: 0}, + "Pro/internlm/internlm2_5-7b-chat": {Input: 0, Output: 0}, + "Pro/meta-llama/Meta-Llama-3-8B-Instruct": {Input: 0, Output: 0}, + "Pro/mistralai/Mistral-7B-Instruct-v0.2": {Input: 0, Output: 0}, } diff --git a/relay/adaptor/stepfun/constants.go b/relay/adaptor/stepfun/constants.go index 6a2346ca..b4b0aeda 100644 --- a/relay/adaptor/stepfun/constants.go +++ b/relay/adaptor/stepfun/constants.go @@ -1,13 +1,17 @@ package stepfun -var ModelList = []string{ - "step-1-8k", - "step-1-32k", - "step-1-128k", - "step-1-256k", - "step-1-flash", - "step-2-16k", - "step-1v-8k", - "step-1v-32k", - "step-1x-medium", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +// https://platform.stepfun.com/docs/pricing/details +var RatioMap = map[string]ratio.Ratio{ + "step-1-8k": {Input: 5 * ratio.MILLI_RMB, Output: 20 * ratio.MILLI_RMB}, + "step-1-32k": {Input: 15 * ratio.MILLI_RMB, Output: 70 * ratio.MILLI_RMB}, + "step-1-128k": {Input: 40 * ratio.MILLI_RMB, Output: 200 * ratio.MILLI_RMB}, + "step-1-256k": {Input: 95 * ratio.MILLI_RMB, Output: 300 * ratio.MILLI_RMB}, + "step-1-flash": {Input: 1 * ratio.MILLI_RMB, Output: 4 * ratio.MILLI_RMB}, + "step-2-16k": {Input: 38 * ratio.MILLI_RMB, Output: 120 * ratio.MILLI_RMB}, + "step-1v-8k": {Input: 5 * ratio.MILLI_RMB, Output: 20 * ratio.MILLI_RMB}, + "step-1v-32k": {Input: 15 * ratio.MILLI_RMB, Output: 70 * ratio.MILLI_RMB}, + "step-1.5v-mini": {Input: 8 * ratio.MILLI_RMB, Output: 35 * ratio.MILLI_RMB}, + "step-1x-medium": {Input: 0.1 * ratio.RMB, Output: 0}, } diff --git a/relay/adaptor/tencent/adaptor.go b/relay/adaptor/tencent/adaptor.go index 0de92d4a..8c7eb507 100644 --- a/relay/adaptor/tencent/adaptor.go +++ b/relay/adaptor/tencent/adaptor.go @@ -2,16 +2,18 @@ package tencent import ( "errors" - "github.com/gin-gonic/gin" - "github.com/songquanpeng/one-api/common/helper" - "github.com/songquanpeng/one-api/relay/adaptor" - "github.com/songquanpeng/one-api/relay/adaptor/openai" - "github.com/songquanpeng/one-api/relay/meta" - "github.com/songquanpeng/one-api/relay/model" "io" "net/http" "strconv" "strings" + + "github.com/gin-gonic/gin" + "github.com/songquanpeng/one-api/common/helper" + "github.com/songquanpeng/one-api/relay/adaptor" + "github.com/songquanpeng/one-api/relay/adaptor/openai" + "github.com/songquanpeng/one-api/relay/billing/ratio" + "github.com/songquanpeng/one-api/relay/meta" + "github.com/songquanpeng/one-api/relay/model" ) // https://cloud.tencent.com/document/api/1729/101837 @@ -80,8 +82,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met return } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} + func (a *Adaptor) GetModelList() []string { - return ModelList + return adaptor.GetModelListHelper(RatioMap) } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/tencent/constants.go b/relay/adaptor/tencent/constants.go index e8631e5f..ede8697d 100644 --- a/relay/adaptor/tencent/constants.go +++ b/relay/adaptor/tencent/constants.go @@ -1,9 +1,19 @@ package tencent -var ModelList = []string{ - "hunyuan-lite", - "hunyuan-standard", - "hunyuan-standard-256K", - "hunyuan-pro", - "hunyuan-vision", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +// https://cloud.tencent.com/document/product/1729/97731 +var RatioMap = map[string]ratio.Ratio{ + "hunyuan-turbo": {Input: 0.015 * ratio.RMB, Output: 0.05 * ratio.RMB}, + "hunyuan-large": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB}, + "hunyuan-large-longcontext": {Input: 0.006 * ratio.RMB, Output: 0.018 * ratio.RMB}, + "hunyuan-standard": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "hunyuan-standard-256K": {Input: 0.0005 * ratio.RMB, Output: 0.002 * ratio.RMB}, + "hunyuan-translation-lite": {Input: 0.005 * ratio.RMB, Output: 0.015 * ratio.RMB}, + "hunyuan-role": {Input: 0.004 * ratio.RMB, Output: 0.008 * ratio.RMB}, + "hunyuan-functioncall": {Input: 0.004 * ratio.RMB, Output: 0.008 * ratio.RMB}, + "hunyuan-code": {Input: 0.004 * ratio.RMB, Output: 0.008 * ratio.RMB}, + "hunyuan-turbo-vision": {Input: 0.08 * ratio.RMB, Output: 0.08 * ratio.RMB}, + "hunyuan-vision": {Input: 0.018 * ratio.RMB, Output: 0.018 * ratio.RMB}, + "hunyuan-embedding": {Input: 0.0007 * ratio.RMB, Output: 0.0007 * ratio.RMB}, } diff --git a/relay/adaptor/togetherai/constants.go b/relay/adaptor/togetherai/constants.go index 0a79fbdc..5291122d 100644 --- a/relay/adaptor/togetherai/constants.go +++ b/relay/adaptor/togetherai/constants.go @@ -1,10 +1,12 @@ package togetherai -// https://docs.together.ai/docs/inference-models +import "github.com/songquanpeng/one-api/relay/billing/ratio" -var ModelList = []string{ - "meta-llama/Llama-3-70b-chat-hf", - "deepseek-ai/deepseek-coder-33b-instruct", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "Qwen/Qwen1.5-72B-Chat", +// https://www.together.ai/pricing +// https://docs.together.ai/docs/inference-models +var RatioMap = map[string]ratio.Ratio{ + "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {Input: 0.88 * ratio.MILLI_USD, Output: 0.88 * ratio.MILLI_USD}, + "deepseek-ai/deepseek-coder-33b-instruct": {Input: 1.25 * ratio.MILLI_USD, Output: 1.25 * ratio.MILLI_USD}, + "mistralai/Mixtral-8x22B-Instruct-v0.1": {Input: 1.20 * ratio.MILLI_USD, Output: 1.20 * ratio.MILLI_USD}, + "Qwen/Qwen2-72B-Instruct": {Input: 0.90 * ratio.MILLI_USD, Output: 0.90 * ratio.MILLI_USD}, } diff --git a/relay/adaptor/vertexai/adaptor.go b/relay/adaptor/vertexai/adaptor.go index 3fab4a45..b635a229 100644 --- a/relay/adaptor/vertexai/adaptor.go +++ b/relay/adaptor/vertexai/adaptor.go @@ -10,9 +10,9 @@ import ( "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/relay/adaptor" channelhelper "github.com/songquanpeng/one-api/relay/adaptor" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" - relaymodel "github.com/songquanpeng/one-api/relay/model" ) var _ adaptor.Adaptor = new(Adaptor) @@ -40,19 +40,29 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) { adaptor := GetAdaptor(meta.ActualModelName) if adaptor == nil { - return nil, &relaymodel.ErrorWithStatusCode{ + return nil, &model.ErrorWithStatusCode{ StatusCode: http.StatusInternalServerError, - Error: relaymodel.Error{ - Message: "adaptor not found", - }, + Error: model.Error{Message: "adaptor not found"}, } } + return adaptor.DoResponse(c, resp, meta) } -func (a *Adaptor) GetModelList() (models []string) { - models = modelList - return +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + adaptor := GetAdaptor(meta.ActualModelName) + if adaptor == nil { + return nil + } + return adaptor.GetRatio(meta) +} + +func (a *Adaptor) GetModelList() []string { + var resp []string + for model := range modelMapping { + resp = append(resp, model) + } + return resp } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/vertexai/claude/adapter.go b/relay/adaptor/vertexai/claude/adapter.go index cb911cfe..8f8288ff 100644 --- a/relay/adaptor/vertexai/claude/adapter.go +++ b/relay/adaptor/vertexai/claude/adapter.go @@ -6,19 +6,21 @@ import ( "github.com/gin-gonic/gin" "github.com/pkg/errors" "github.com/songquanpeng/one-api/common/ctxkey" + "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/anthropic" - + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" ) -var ModelList = []string{ - "claude-3-haiku@20240307", - "claude-3-sonnet@20240229", - "claude-3-opus@20240229", - "claude-3-5-sonnet@20240620", - "claude-3-5-sonnet-v2@20241022", - "claude-3-5-haiku@20241022", +// https://cloud.google.com/vertex-ai/generative-ai/pricing?hl=zh-cn#claude-models +var RatioMap = map[string]ratio.Ratio{ + "claude-3-haiku@20240307": {Input: 0.25 * ratio.MILLI_USD, Output: 1.25 * ratio.MILLI_USD}, + "claude-3-sonnet@20240229": {Input: 3 * ratio.MILLI_USD, Output: 15 * ratio.MILLI_USD}, + "claude-3-opus@20240229": {Input: 15 * ratio.MILLI_USD, Output: 75 * ratio.MILLI_USD}, + "claude-3-5-sonnet@20240620": {Input: 3 * ratio.MILLI_USD, Output: 15 * ratio.MILLI_USD}, + "claude-3-5-sonnet-v2@20241022": {Input: 3 * ratio.MILLI_USD, Output: 15 * ratio.MILLI_USD}, + "claude-3-5-haiku@20241022": {Input: 0.80 * ratio.MILLI_USD, Output: 4 * ratio.MILLI_USD}, } const anthropicVersion = "vertex-2023-10-16" @@ -58,3 +60,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met } return } + +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} diff --git a/relay/adaptor/vertexai/gemini/adapter.go b/relay/adaptor/vertexai/gemini/adapter.go index b5377875..27ac0d1b 100644 --- a/relay/adaptor/vertexai/gemini/adapter.go +++ b/relay/adaptor/vertexai/gemini/adapter.go @@ -6,20 +6,16 @@ import ( "github.com/gin-gonic/gin" "github.com/pkg/errors" "github.com/songquanpeng/one-api/common/ctxkey" + "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/gemini" "github.com/songquanpeng/one-api/relay/adaptor/openai" - "github.com/songquanpeng/one-api/relay/relaymode" - + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" + "github.com/songquanpeng/one-api/relay/relaymode" ) -var ModelList = []string{ - "gemini-pro", "gemini-pro-vision", - "gemini-1.5-pro-001", "gemini-1.5-flash-001", - "gemini-1.5-pro-002", "gemini-1.5-flash-002", - "gemini-2.0-flash-exp", "gemini-2.0-flash-thinking-exp", -} +var RatioMap = gemini.RatioMap type Adaptor struct { } @@ -50,3 +46,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met } return } + +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} diff --git a/relay/adaptor/vertexai/registry.go b/relay/adaptor/vertexai/registry.go index 41099f02..966b91d5 100644 --- a/relay/adaptor/vertexai/registry.go +++ b/relay/adaptor/vertexai/registry.go @@ -6,6 +6,7 @@ import ( "github.com/gin-gonic/gin" claude "github.com/songquanpeng/one-api/relay/adaptor/vertexai/claude" gemini "github.com/songquanpeng/one-api/relay/adaptor/vertexai/gemini" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" ) @@ -18,16 +19,13 @@ const ( ) var modelMapping = map[string]VertexAIModelType{} -var modelList = []string{} func init() { - modelList = append(modelList, claude.ModelList...) - for _, model := range claude.ModelList { + for model := range claude.RatioMap { modelMapping[model] = VerterAIClaude } - modelList = append(modelList, gemini.ModelList...) - for _, model := range gemini.ModelList { + for model := range gemini.RatioMap { modelMapping[model] = VerterAIGemini } } @@ -35,6 +33,7 @@ func init() { type innerAIAdapter interface { ConvertRequest(c *gin.Context, relayMode int, request *model.GeneralOpenAIRequest) (any, error) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) + GetRatio(meta *meta.Meta) *ratio.Ratio } func GetAdaptor(model string) innerAIAdapter { diff --git a/relay/adaptor/xai/constants.go b/relay/adaptor/xai/constants.go index 9082b999..ff720e44 100644 --- a/relay/adaptor/xai/constants.go +++ b/relay/adaptor/xai/constants.go @@ -1,5 +1,7 @@ package xai -var ModelList = []string{ - "grok-beta", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +var RatioMap = map[string]ratio.Ratio{ + "grok-beta": {Input: 5.0 * ratio.MILLI_USD, Output: 15.0 * ratio.MILLI_USD}, } diff --git a/relay/adaptor/xunfei/adaptor.go b/relay/adaptor/xunfei/adaptor.go index b5967f26..ab45ecf2 100644 --- a/relay/adaptor/xunfei/adaptor.go +++ b/relay/adaptor/xunfei/adaptor.go @@ -2,14 +2,16 @@ package xunfei import ( "errors" - "github.com/gin-gonic/gin" - "github.com/songquanpeng/one-api/relay/adaptor" - "github.com/songquanpeng/one-api/relay/adaptor/openai" - "github.com/songquanpeng/one-api/relay/meta" - "github.com/songquanpeng/one-api/relay/model" "io" "net/http" "strings" + + "github.com/gin-gonic/gin" + "github.com/songquanpeng/one-api/relay/adaptor" + "github.com/songquanpeng/one-api/relay/adaptor/openai" + "github.com/songquanpeng/one-api/relay/billing/ratio" + "github.com/songquanpeng/one-api/relay/meta" + "github.com/songquanpeng/one-api/relay/model" ) type Adaptor struct { @@ -77,8 +79,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met return } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} + func (a *Adaptor) GetModelList() []string { - return ModelList + return adaptor.GetModelListHelper(RatioMap) } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/xunfei/constants.go b/relay/adaptor/xunfei/constants.go index 5b82ac29..2ac2253e 100644 --- a/relay/adaptor/xunfei/constants.go +++ b/relay/adaptor/xunfei/constants.go @@ -1,12 +1,14 @@ package xunfei -var ModelList = []string{ - "SparkDesk", - "SparkDesk-v1.1", - "SparkDesk-v2.1", - "SparkDesk-v3.1", - "SparkDesk-v3.1-128K", - "SparkDesk-v3.5", - "SparkDesk-v3.5-32K", - "SparkDesk-v4.0", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +var RatioMap = map[string]ratio.Ratio{ + "SparkDesk": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens + "SparkDesk-v1.1": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens + "SparkDesk-v2.1": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens + "SparkDesk-v3.1": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens + "SparkDesk-v3.1-128K": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens + "SparkDesk-v3.5": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens + "SparkDesk-v3.5-32K": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens + "SparkDesk-v4.0": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens } diff --git a/relay/adaptor/zhipu/adaptor.go b/relay/adaptor/zhipu/adaptor.go index 660bd379..863a491f 100644 --- a/relay/adaptor/zhipu/adaptor.go +++ b/relay/adaptor/zhipu/adaptor.go @@ -3,16 +3,18 @@ package zhipu import ( "errors" "fmt" + "io" + "net/http" + "strings" + "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/common/helper" "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/openai" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" "github.com/songquanpeng/one-api/relay/relaymode" - "io" - "net/http" - "strings" ) type Adaptor struct { @@ -140,8 +142,12 @@ func ConvertEmbeddingRequest(request model.GeneralOpenAIRequest) (*EmbeddingRequ }, nil } +func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio { + return adaptor.GetRatioHelper(meta, RatioMap) +} + func (a *Adaptor) GetModelList() []string { - return ModelList + return adaptor.GetModelListHelper(RatioMap) } func (a *Adaptor) GetChannelName() string { diff --git a/relay/adaptor/zhipu/constants.go b/relay/adaptor/zhipu/constants.go index e1192123..b8fa39f9 100644 --- a/relay/adaptor/zhipu/constants.go +++ b/relay/adaptor/zhipu/constants.go @@ -1,7 +1,24 @@ package zhipu -var ModelList = []string{ - "chatglm_turbo", "chatglm_pro", "chatglm_std", "chatglm_lite", - "glm-4", "glm-4v", "glm-3-turbo", "embedding-2", - "cogview-3", +import "github.com/songquanpeng/one-api/relay/billing/ratio" + +var RatioMap = map[string]ratio.Ratio{ + "glm-zero-preview": {Input: 0.01 * ratio.RMB, Output: 0.01 * ratio.RMB}, + "glm-4-plus": {Input: 0.05 * ratio.RMB, Output: 0.05 * ratio.RMB}, + "glm-4-0520": {Input: 0.1 * ratio.RMB, Output: 0.1 * ratio.RMB}, + "glm-4-airx": {Input: 0.01 * ratio.RMB, Output: 0.01 * ratio.RMB}, + "glm-4-air": {Input: 0.0005 * ratio.RMB, Output: 0.0005 * ratio.RMB}, + "glm-4-long": {Input: 0.001 * ratio.RMB, Output: 0.001 * ratio.RMB}, + "glm-4-flashx": {Input: 0.0001 * ratio.RMB, Output: 0.0001 * ratio.RMB}, + "glm-4v-plus": {Input: 0.004 * ratio.RMB, Output: 0.004 * ratio.RMB}, + "glm-4v": {Input: 0.05 * ratio.RMB, Output: 0}, + "cogview-3-plus": {Input: 0.06 * ratio.RMB, Output: 0}, + "cogview-3": {Input: 0.1 * ratio.RMB, Output: 0}, + "cogvideox": {Input: 0.5 * ratio.RMB, Output: 0}, + "embedding-3": {Input: 0.0005 * ratio.RMB, Output: 0}, + "embedding-2": {Input: 0.0005 * ratio.RMB, Output: 0}, + "glm-4-flash": {Input: 0, Output: 0}, // 免费 + "glm-4v-flash": {Input: 0, Output: 0}, // 免费 + "cogview-3-flash": {Input: 0, Output: 0}, // 免费 + "cogvideox-flash": {Input: 0, Output: 0}, // 免费 } diff --git a/relay/adaptor/zhipu/main.go b/relay/adaptor/zhipu/main.go index ab3a5678..488a0df1 100644 --- a/relay/adaptor/zhipu/main.go +++ b/relay/adaptor/zhipu/main.go @@ -3,13 +3,14 @@ package zhipu import ( "bufio" "encoding/json" - "github.com/songquanpeng/one-api/common/render" "io" "net/http" "strings" "sync" "time" + "github.com/songquanpeng/one-api/common/render" + "github.com/gin-gonic/gin" "github.com/golang-jwt/jwt" "github.com/songquanpeng/one-api/common" @@ -268,6 +269,9 @@ func EmbeddingsHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithSta c.Writer.Header().Set("Content-Type", "application/json") c.Writer.WriteHeader(resp.StatusCode) _, err = c.Writer.Write(jsonResponse) + if err != nil { + return openai.ErrorWrapper(err, "write_response_body_failed", http.StatusInternalServerError), nil + } return nil, &fullTextResponse.Usage } @@ -276,11 +280,7 @@ func embeddingResponseZhipu2OpenAI(response *EmbeddingResponse) *openai.Embeddin Object: "list", Data: make([]openai.EmbeddingResponseItem, 0, len(response.Embeddings)), Model: response.Model, - Usage: model.Usage{ - PromptTokens: response.PromptTokens, - CompletionTokens: response.CompletionTokens, - TotalTokens: response.Usage.TotalTokens, - }, + Usage: response.Usage, } for _, item := range response.Embeddings { diff --git a/relay/billing/billing.go b/relay/billing/billing.go index a99d37ee..0a66bacd 100644 --- a/relay/billing/billing.go +++ b/relay/billing/billing.go @@ -3,6 +3,7 @@ package billing import ( "context" "fmt" + "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/model" ) @@ -19,20 +20,22 @@ func ReturnPreConsumedQuota(ctx context.Context, preConsumedQuota int64, tokenId } } -func PostConsumeQuota(ctx context.Context, tokenId int, quotaDelta int64, totalQuota int64, userId int, channelId int, modelRatio float64, groupRatio float64, modelName string, tokenName string) { +func PostConsumeQuota(ctx context.Context, tokenId int, quotaDelta int64, totalQuota int64, userId int, channelId int, modelRatio float64, groupRatio float64, modelName string, tokenName string, promptTokens int, completionTokens int) { // quotaDelta is remaining quota to be consumed - err := model.PostConsumeTokenQuota(tokenId, quotaDelta) - if err != nil { - logger.SysError("error consuming token remain quota: " + err.Error()) + if quotaDelta != 0 { + err := model.PostConsumeTokenQuota(tokenId, quotaDelta) + if err != nil { + logger.SysError("error consuming token remain quota: " + err.Error()) + } } - err = model.CacheUpdateUserQuota(ctx, userId) + err := model.CacheUpdateUserQuota(ctx, userId) if err != nil { logger.SysError("error update user quota cache: " + err.Error()) } // totalQuota is total quota consumed if totalQuota != 0 { logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f", modelRatio, groupRatio) - model.RecordConsumeLog(ctx, userId, channelId, int(totalQuota), 0, modelName, tokenName, totalQuota, logContent) + model.RecordConsumeLog(ctx, userId, channelId, promptTokens, completionTokens, modelName, tokenName, totalQuota, logContent) model.UpdateUserUsedQuotaAndRequestCount(userId, totalQuota) model.UpdateChannelUsedQuota(channelId, totalQuota) } diff --git a/relay/billing/ratio/model.go b/relay/billing/ratio/model.go index f83aa70c..80dad948 100644 --- a/relay/billing/ratio/model.go +++ b/relay/billing/ratio/model.go @@ -9,17 +9,35 @@ import ( ) const ( - USD2RMB = 7 - USD = 500 // $0.002 = 1 -> $1 = 500 - RMB = USD / USD2RMB + USD2RMB = 7 + USD = 500 // $0.002 = 1 -> $1 = 500 + RMB = USD / USD2RMB // 1RMB = 1/7USD + MILLI_USD = 1.0 / 1000 * USD + MILLI_RMB = 1.0 / 1000 * RMB + TokensPerSecond = 1000 / 20 // $0.006 / minute -> $0.002 / 20 seconds -> $0.002 / 1K tokens ) +type Ratio struct { + Input float64 `json:"input,omitempty"` // input ratio + Output float64 `json:"output,omitempty"` // output ratio + LongThreshold int `json:"long_threshold,omitempty"` // for gemini like models, prompt longer than threshold will be charged as long input + LongInput float64 `json:"long_input,omitempty"` // long input ratio + LongOutput float64 `json:"long_output,omitempty"` // long output ratio +} + +var ( + FallbackRatio = Ratio{Input: 30, Output: 30} +) + +// Deprecated +// TODO: remove this // ModelRatio // https://platform.openai.com/docs/models/model-endpoint-compatibility // https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Blfmc9dlf // https://openai.com/pricing // 1 === $0.002 / 1K tokens // 1 === ¥0.014 / 1k tokens +// 1 === $0.002 / 20 seconds (50 tokens per second) var ModelRatio = map[string]float64{ // https://openai.com/pricing "gpt-4": 15, @@ -342,6 +360,7 @@ var CompletionRatio = map[string]float64{ var ( DefaultModelRatio map[string]float64 DefaultCompletionRatio map[string]float64 + DefaultRatio = make(map[string]Ratio) ) func init() { @@ -536,3 +555,30 @@ func GetCompletionRatio(name string, channelType int) float64 { return 1 } + +func Ratio2JSONString() string { + jsonBytes, err := json.Marshal(DefaultRatio) + if err != nil { + logger.SysError("error marshalling ratio: " + err.Error()) + } + return string(jsonBytes) +} + +func UpdateRatioByJSONString(jsonStr string) error { + DefaultRatio = make(map[string]Ratio) + return json.Unmarshal([]byte(jsonStr), &DefaultRatio) +} + +func GetRatio(name string, channelType int) *Ratio { + var result Ratio + model := fmt.Sprintf("%s(%d)", name, channelType) + if ratio, ok := DefaultRatio[model]; ok { + result = ratio + return &result + } + if ratio, ok := DefaultRatio[name]; ok { + result = ratio + return &result + } + return nil +} diff --git a/relay/controller/audio.go b/relay/controller/audio.go index e3d57b1e..7d076279 100644 --- a/relay/controller/audio.go +++ b/relay/controller/audio.go @@ -18,6 +18,7 @@ import ( "github.com/songquanpeng/one-api/common/ctxkey" "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/model" + "github.com/songquanpeng/one-api/relay" "github.com/songquanpeng/one-api/relay/adaptor/openai" "github.com/songquanpeng/one-api/relay/billing" billingratio "github.com/songquanpeng/one-api/relay/billing/ratio" @@ -54,9 +55,16 @@ func RelayAudioHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus } } - modelRatio := billingratio.GetModelRatio(audioModel, channelType) + adaptor := relay.GetAdaptor(meta.APIType) + if adaptor == nil { + return openai.ErrorWrapper(fmt.Errorf("invalid api type: %d", meta.APIType), "invalid_api_type", http.StatusBadRequest) + } + adaptor.Init(meta) + groupRatio := billingratio.GetGroupRatio(group) - ratio := modelRatio * groupRatio + adaptorRatio := GetRatio(meta, adaptor) + ratio := adaptorRatio.Input * groupRatio + var quota int64 var preConsumedQuota int64 switch relayMode { @@ -216,7 +224,7 @@ func RelayAudioHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus succeed = true quotaDelta := quota - preConsumedQuota defer func(ctx context.Context) { - go billing.PostConsumeQuota(ctx, tokenId, quotaDelta, quota, userId, channelId, modelRatio, groupRatio, audioModel, tokenName) + go billing.PostConsumeQuota(ctx, tokenId, quotaDelta, quota, userId, channelId, adaptorRatio.Input, groupRatio, audioModel, tokenName, 0, 0) }(c.Request.Context()) for k, v := range resp.Header { diff --git a/relay/controller/helper.go b/relay/controller/helper.go index 5f5fc90c..1a76baa7 100644 --- a/relay/controller/helper.go +++ b/relay/controller/helper.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "github.com/songquanpeng/one-api/relay/constant/role" "math" "net/http" "strings" @@ -14,9 +13,12 @@ import ( "github.com/songquanpeng/one-api/common/config" "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/model" + "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/openai" + "github.com/songquanpeng/one-api/relay/billing/ratio" billingratio "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/channeltype" + "github.com/songquanpeng/one-api/relay/constant/role" "github.com/songquanpeng/one-api/relay/controller/validator" "github.com/songquanpeng/one-api/relay/meta" relaymodel "github.com/songquanpeng/one-api/relay/model" @@ -91,17 +93,26 @@ func preConsumeQuota(ctx context.Context, textRequest *relaymodel.GeneralOpenAIR return preConsumedQuota, nil } -func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.Meta, textRequest *relaymodel.GeneralOpenAIRequest, ratio float64, preConsumedQuota int64, modelRatio float64, groupRatio float64, systemPromptReset bool) { +func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.Meta, textRequest *relaymodel.GeneralOpenAIRequest, ratio billingratio.Ratio, preConsumedQuota int64, groupRatio float64, systemPromptReset bool) { if usage == nil { logger.Error(ctx, "usage is nil, which is unexpected") return } var quota int64 - completionRatio := billingratio.GetCompletionRatio(textRequest.Model, meta.ChannelType) + // use meta.OriginalModelName instead of mapped model name, which may named randomly in azure promptTokens := usage.PromptTokens completionTokens := usage.CompletionTokens - quota = int64(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * ratio)) - if ratio != 0 && quota <= 0 { + promptRatio := ratio.Input + completionRatio := ratio.Output + + // for gemini, prompt longer than 128k will be charged as long input + if ratio.LongInput > 0 && promptTokens > ratio.LongThreshold { + promptRatio = ratio.LongInput + completionRatio = ratio.LongOutput + } + quota = int64(math.Ceil(groupRatio * (float64(promptTokens)*promptRatio + float64(completionTokens)*completionRatio))) + + if quota <= 0 && (ratio.Input > 0 || ratio.Output > 0) { quota = 1 } totalTokens := promptTokens + completionTokens @@ -123,8 +134,8 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M if systemPromptReset { extraLog = " (注意系统提示词已被重置)" } - logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f,补全倍率 %.2f%s", modelRatio, groupRatio, completionRatio, extraLog) - model.RecordConsumeLog(ctx, meta.UserId, meta.ChannelId, promptTokens, completionTokens, textRequest.Model, meta.TokenName, quota, logContent) + logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f,补全倍率 %.2f%s", promptRatio, groupRatio, completionRatio/promptRatio, extraLog) + model.RecordConsumeLog(ctx, meta.UserId, meta.ChannelId, promptTokens, completionTokens, meta.OriginModelName, meta.TokenName, quota, logContent) model.UpdateUserUsedQuotaAndRequestCount(meta.UserId, quota) model.UpdateChannelUsedQuota(meta.ChannelId, quota) } @@ -185,3 +196,16 @@ func setSystemPrompt(ctx context.Context, request *relaymodel.GeneralOpenAIReque logger.Infof(ctx, "add system prompt") return true } + +func GetRatio(meta *meta.Meta, adaptor adaptor.Adaptor) ratio.Ratio { + result := billingratio.GetRatio(meta.OriginModelName, meta.ChannelType) + if result != nil { + return *result + } + ratio := adaptor.GetRatio(meta) + if ratio != nil { + return *ratio + } + logger.SysError("model ratio not found: " + meta.OriginModelName) + return billingratio.FallbackRatio +} diff --git a/relay/controller/image.go b/relay/controller/image.go index 1b69d97d..94d182bb 100644 --- a/relay/controller/image.go +++ b/relay/controller/image.go @@ -128,7 +128,6 @@ func RelayImageHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus return openai.ErrorWrapper(err, "get_image_cost_ratio_failed", http.StatusInternalServerError) } - imageModel := imageRequest.Model // Convert the original image model imageRequest.Model, _ = getMappedModelName(imageRequest.Model, billingratio.ImageOriginModelName) c.Set("response_format", imageRequest.ResponseFormat) @@ -167,9 +166,9 @@ func RelayImageHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus requestBody = bytes.NewBuffer(jsonStr) } - modelRatio := billingratio.GetModelRatio(imageModel, meta.ChannelType) groupRatio := billingratio.GetGroupRatio(meta.Group) - ratio := modelRatio * groupRatio + adaptorRatio := GetRatio(meta, adaptor) + ratio := adaptorRatio.Input * groupRatio userQuota, err := model.CacheGetUserQuota(ctx, meta.UserId) var quota int64 @@ -209,7 +208,7 @@ func RelayImageHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus } if quota != 0 { tokenName := c.GetString(ctxkey.TokenName) - logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f", modelRatio, groupRatio) + logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f", adaptorRatio.Input, groupRatio) model.RecordConsumeLog(ctx, meta.UserId, meta.ChannelId, 0, 0, imageRequest.Model, tokenName, quota, logContent) model.UpdateUserUsedQuotaAndRequestCount(meta.UserId, quota) channelId := c.GetInt(ctxkey.ChannelId) diff --git a/relay/controller/text.go b/relay/controller/text.go index 9a47c58b..9e8a6f9d 100644 --- a/relay/controller/text.go +++ b/relay/controller/text.go @@ -4,10 +4,11 @@ import ( "bytes" "encoding/json" "fmt" - "github.com/songquanpeng/one-api/common/config" "io" "net/http" + "github.com/songquanpeng/one-api/common/config" + "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/relay" @@ -32,6 +33,12 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode { } meta.IsStream = textRequest.Stream + adaptor := relay.GetAdaptor(meta.APIType) + if adaptor == nil { + return openai.ErrorWrapper(fmt.Errorf("invalid api type: %d", meta.APIType), "invalid_api_type", http.StatusBadRequest) + } + adaptor.Init(meta) + // map model name meta.OriginModelName = textRequest.Model textRequest.Model, _ = getMappedModelName(textRequest.Model, meta.ModelMapping) @@ -39,9 +46,10 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode { // set system prompt if not empty systemPromptReset := setSystemPrompt(ctx, textRequest, meta.SystemPrompt) // get model ratio & group ratio - modelRatio := billingratio.GetModelRatio(textRequest.Model, meta.ChannelType) groupRatio := billingratio.GetGroupRatio(meta.Group) - ratio := modelRatio * groupRatio + adaptorRatio := GetRatio(meta, adaptor) + ratio := adaptorRatio.Input * groupRatio + // pre-consume quota promptTokens := getPromptTokens(textRequest, meta.Mode) meta.PromptTokens = promptTokens @@ -51,12 +59,6 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode { return bizErr } - adaptor := relay.GetAdaptor(meta.APIType) - if adaptor == nil { - return openai.ErrorWrapper(fmt.Errorf("invalid api type: %d", meta.APIType), "invalid_api_type", http.StatusBadRequest) - } - adaptor.Init(meta) - // get request body requestBody, err := getRequestBody(c, meta, textRequest, adaptor) if err != nil { @@ -82,7 +84,7 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode { return respErr } // post-consume quota - go postConsumeQuota(ctx, usage, meta, textRequest, ratio, preConsumedQuota, modelRatio, groupRatio, systemPromptReset) + go postConsumeQuota(ctx, usage, meta, textRequest, adaptorRatio, preConsumedQuota, groupRatio, systemPromptReset) return nil } diff --git a/web/default/src/components/OperationSetting.js b/web/default/src/components/OperationSetting.js index 6356ac66..88cbf2d0 100644 --- a/web/default/src/components/OperationSetting.js +++ b/web/default/src/components/OperationSetting.js @@ -2,6 +2,12 @@ import React, { useEffect, useState } from 'react'; import { Divider, Form, Grid, Header } from 'semantic-ui-react'; import { API, showError, showSuccess, timestamp2string, verifyJSON } from '../helpers'; +const RATIO_MAPPING_EXAMPLE = { + 'gpt-4o-mini': {'input': 0.075, 'output': 0.3}, + 'llama3-8b-8192(33)': {'input': 0.15, 'output': 0.3}, + 'llama3-70b-8192(33)': {'input': 1.325, 'output': 1.749}, +}; + const OperationSetting = () => { let now = new Date(); let [inputs, setInputs] = useState({ @@ -10,9 +16,10 @@ const OperationSetting = () => { QuotaForInvitee: 0, QuotaRemindThreshold: 0, PreConsumedQuota: 0, - ModelRatio: '', - CompletionRatio: '', + ModelRatio: '', // Deprecated + CompletionRatio: '', // Deprecated GroupRatio: '', + Ratio: '', TopUpLink: '', ChatLink: '', QuotaPerUnit: 0, @@ -35,7 +42,7 @@ const OperationSetting = () => { if (success) { let newInputs = {}; data.forEach((item) => { - if (item.key === 'ModelRatio' || item.key === 'GroupRatio' || item.key === 'CompletionRatio') { + if (item.key === 'ModelRatio' || item.key === 'GroupRatio' || item.key === 'CompletionRatio' || item.key === 'Ratio') { item.value = JSON.stringify(JSON.parse(item.value), null, 2); } if (item.value === '{}') { @@ -112,6 +119,13 @@ const OperationSetting = () => { } await updateOption('CompletionRatio', inputs.CompletionRatio); } + if (originInputs['Ratio'] !== inputs.Ratio) { + if (!verifyJSON(inputs.Ratio)) { + showError('倍率不是合法的 JSON 字符串'); + return; + } + await updateOption('Ratio', inputs.Ratio); + } break; case 'quota': if (originInputs['QuotaForNewUser'] !== inputs.QuotaForNewUser) { @@ -346,7 +360,18 @@ const OperationSetting = () => {