From 97c18d0c7fcc275ca077d5ed82f7368ce3ae0844 Mon Sep 17 00:00:00 2001 From: Nanami Date: Sat, 31 Aug 2024 10:20:22 +0800 Subject: [PATCH 1/3] feat: support more zhipu models --- common/model-ratio.go | 7 +++++++ relay/channel/zhipu_4v/constants.go | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/common/model-ratio.go b/common/model-ratio.go index 946ee46..889796c 100644 --- a/common/model-ratio.go +++ b/common/model-ratio.go @@ -119,6 +119,13 @@ var defaultModelRatio = map[string]float64{ "glm-4v": 0.05 * RMB, // ¥0.05 / 1k tokens "glm-4-alltools": 0.1 * RMB, // ¥0.1 / 1k tokens "glm-3-turbo": 0.3572, + "glm-4-plus": 0.05 * RMB, + "glm-4-0520": 0.1 * RMB, + "glm-4-air": 0.001 * RMB, + "glm-4-airx": 0.01 * RMB, + "glm-4-long": 0.001 * RMB, + "glm-4-flash": 0, + "glm-4v-plus": 0.01 * RMB, "qwen-turbo": 0.8572, // ¥0.012 / 1k tokens "qwen-plus": 10, // ¥0.14 / 1k tokens "text-embedding-v1": 0.05, // ¥0.0007 / 1k tokens diff --git a/relay/channel/zhipu_4v/constants.go b/relay/channel/zhipu_4v/constants.go index 3383eb3..816fa53 100644 --- a/relay/channel/zhipu_4v/constants.go +++ b/relay/channel/zhipu_4v/constants.go @@ -1,7 +1,7 @@ package zhipu_4v var ModelList = []string{ - "glm-4", "glm-4v", "glm-3-turbo", "glm-4-alltools", + "glm-4", "glm-4v", "glm-3-turbo", "glm-4-alltools", "glm-4-plus", "glm-4-0520", "glm-4-air", "glm-4-airx", "glm-4-long", "glm-4-flash", "glm-4v-plus", } var ChannelName = "zhipu_4v" From 722cc174b72a38be0c5e39bed6aea55b0ca4dc43 Mon Sep 17 00:00:00 2001 From: HynoR <20227709+HynoR@users.noreply.github.com> Date: Sun, 1 Sep 2024 15:21:05 +0700 Subject: [PATCH 2/3] Cohere Update --- common/model-ratio.go | 46 ++++++++++++++++++-------------- relay/channel/cohere/constant.go | 5 +++- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/common/model-ratio.go b/common/model-ratio.go index 946ee46..eda5504 100644 --- a/common/model-ratio.go +++ b/common/model-ratio.go @@ -137,26 +137,28 @@ var defaultModelRatio = map[string]float64{ "hunyuan": 7.143, // ¥0.1 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0 // https://platform.lingyiwanwu.com/docs#-计费单元 // 已经按照 7.2 来换算美元价格 - "yi-34b-chat-0205": 0.18, - "yi-34b-chat-200k": 0.864, - "yi-vl-plus": 0.432, - "yi-large": 20.0 / 1000 * RMB, - "yi-medium": 2.5 / 1000 * RMB, - "yi-vision": 6.0 / 1000 * RMB, - "yi-medium-200k": 12.0 / 1000 * RMB, - "yi-spark": 1.0 / 1000 * RMB, - "yi-large-rag": 25.0 / 1000 * RMB, - "yi-large-turbo": 12.0 / 1000 * RMB, - "yi-large-preview": 20.0 / 1000 * RMB, - "yi-large-rag-preview": 25.0 / 1000 * RMB, - "command": 0.5, - "command-nightly": 0.5, - "command-light": 0.5, - "command-light-nightly": 0.5, - "command-r": 0.25, - "command-r-plus ": 1.5, - "deepseek-chat": 0.07, - "deepseek-coder": 0.07, + "yi-34b-chat-0205": 0.18, + "yi-34b-chat-200k": 0.864, + "yi-vl-plus": 0.432, + "yi-large": 20.0 / 1000 * RMB, + "yi-medium": 2.5 / 1000 * RMB, + "yi-vision": 6.0 / 1000 * RMB, + "yi-medium-200k": 12.0 / 1000 * RMB, + "yi-spark": 1.0 / 1000 * RMB, + "yi-large-rag": 25.0 / 1000 * RMB, + "yi-large-turbo": 12.0 / 1000 * RMB, + "yi-large-preview": 20.0 / 1000 * RMB, + "yi-large-rag-preview": 25.0 / 1000 * RMB, + "command": 0.5, + "command-nightly": 0.5, + "command-light": 0.5, + "command-light-nightly": 0.5, + "command-r": 0.25, + "command-r-plus": 1.5, + "command-r-08-2024": 0.075, + "command-r-plus-08-2024": 1.25, + "deepseek-chat": 0.07, + "deepseek-coder": 0.07, // Perplexity online 模型对搜索额外收费,有需要应自行调整,此处不计入搜索费用 "llama-3-sonar-small-32k-chat": 0.2 / 1000 * USD, "llama-3-sonar-small-32k-online": 0.2 / 1000 * USD, @@ -365,6 +367,10 @@ func GetCompletionRatio(name string) float64 { return 3 case "command-r-plus": return 5 + case "command-r-08-2024": + return 4 + case "command-r-plus-08-2024": + return 4 default: return 2 } diff --git a/relay/channel/cohere/constant.go b/relay/channel/cohere/constant.go index 8f34e4f..734620a 100644 --- a/relay/channel/cohere/constant.go +++ b/relay/channel/cohere/constant.go @@ -1,7 +1,10 @@ package cohere var ModelList = []string{ - "command-r", "command-r-plus", "command-light", "command-light-nightly", "command", "command-nightly", + "command-r", "command-r-plus", + "command-r-08-2024", "command-r-plus-08-2024", + "c4ai-aya-23-35b", "c4ai-aya-23-8b", + "command-light", "command-light-nightly", "command", "command-nightly", "rerank-english-v3.0", "rerank-multilingual-v3.0", "rerank-english-v2.0", "rerank-multilingual-v2.0", } From 0830ef33053ed0ef2fbf0d4e1784775a74fb1492 Mon Sep 17 00:00:00 2001 From: CalciumIon <1808837298@qq.com> Date: Mon, 2 Sep 2024 01:11:19 +0800 Subject: [PATCH 3/3] feat: support jina embedding --- relay/channel/jina/adaptor.go | 4 +++- relay/channel/jina/relay-jina.go | 25 +++++++++++++++++++++++++ relay/relay-text.go | 2 +- 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/relay/channel/jina/adaptor.go b/relay/channel/jina/adaptor.go index 6a04d08..f296ed0 100644 --- a/relay/channel/jina/adaptor.go +++ b/relay/channel/jina/adaptor.go @@ -32,7 +32,7 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) { if info.RelayMode == constant.RelayModeRerank { return fmt.Sprintf("%s/v1/rerank", info.BaseUrl), nil } else if info.RelayMode == constant.RelayModeEmbeddings { - return fmt.Sprintf("%s/v1/embeddings ", info.BaseUrl), nil + return fmt.Sprintf("%s/v1/embeddings", info.BaseUrl), nil } return "", errors.New("invalid relay mode") } @@ -58,6 +58,8 @@ func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dt func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage *dto.Usage, err *dto.OpenAIErrorWithStatusCode) { if info.RelayMode == constant.RelayModeRerank { err, usage = jinaRerankHandler(c, resp) + } else if info.RelayMode == constant.RelayModeEmbeddings { + err, usage = jinaEmbeddingHandler(c, resp) } return } diff --git a/relay/channel/jina/relay-jina.go b/relay/channel/jina/relay-jina.go index 5fdd44f..6c339ae 100644 --- a/relay/channel/jina/relay-jina.go +++ b/relay/channel/jina/relay-jina.go @@ -33,3 +33,28 @@ func jinaRerankHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorWit _, err = c.Writer.Write(jsonResponse) return nil, &jinaResp.Usage } + +func jinaEmbeddingHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) { + responseBody, err := io.ReadAll(resp.Body) + if err != nil { + return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil + } + err = resp.Body.Close() + if err != nil { + return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil + } + var jinaResp dto.OpenAIEmbeddingResponse + err = json.Unmarshal(responseBody, &jinaResp) + if err != nil { + return service.OpenAIErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError), nil + } + + jsonResponse, err := json.Marshal(jinaResp) + if err != nil { + return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil + } + c.Writer.Header().Set("Content-Type", "application/json") + c.Writer.WriteHeader(resp.StatusCode) + _, err = c.Writer.Write(jsonResponse) + return nil, &jinaResp.Usage +} diff --git a/relay/relay-text.go b/relay/relay-text.go index 3c5393a..14e82f1 100644 --- a/relay/relay-text.go +++ b/relay/relay-text.go @@ -52,7 +52,7 @@ func getAndValidateTextRequest(c *gin.Context, relayInfo *relaycommon.RelayInfo) } case relayconstant.RelayModeEmbeddings: case relayconstant.RelayModeModerations: - if textRequest.Input == "" { + if textRequest.Input == "" || textRequest.Input == nil { return nil, errors.New("field input is required") } case relayconstant.RelayModeEdits: