Merge branch 'upstream/main'

2025-11-07 17:23:42 +08:00 · 2025-02-03 12:25:00 +00:00
parent 9dd86e8a15 33edcf604c
commit aa30c37e3c
70 changed files with 4032 additions and 2781 deletions
--- a/relay/adaptor/anthropic/constants.go
+++ b/relay/adaptor/anthropic/constants.go
@@ -4,6 +4,7 @@ var ModelList = []string{
 	"claude-instant-1.2", "claude-2.0", "claude-2.1",
 	"claude-3-haiku-20240307",
 	"claude-3-5-haiku-20241022",
+	"claude-3-5-haiku-latest",
 	"claude-3-sonnet-20240229",
 	"claude-3-opus-20240229",
 	"claude-3-5-sonnet-20240620",
--- a/relay/adaptor/groq/constants.go
+++ b/relay/adaptor/groq/constants.go
@@ -11,7 +11,6 @@ var ModelList = []string{
 	"llama-3.2-11b-vision-preview",
 	"llama-3.2-1b-preview",
 	"llama-3.2-3b-preview",
-	"llama-3.2-11b-vision-preview",
 	"llama-3.2-90b-text-preview",
 	"llama-3.2-90b-vision-preview",
 	"llama-guard-3-8b",
--- a/relay/adaptor/openai/token.go
+++ b/relay/adaptor/openai/token.go
@@ -27,7 +27,8 @@ func InitTokenEncoders() {
 	logger.SysLog("initializing token encoders")
 	gpt35TokenEncoder, err := tiktoken.EncodingForModel("gpt-3.5-turbo")
 	if err != nil {
-		logger.FatalLog(fmt.Sprintf("failed to get gpt-3.5-turbo token encoder: %s", err.Error()))
+		logger.FatalLog(fmt.Sprintf("failed to get gpt-3.5-turbo token encoder: %s, "+
+			"if you are using in offline environment, please set TIKTOKEN_CACHE_DIR to use exsited files, check this link for more information: https://stackoverflow.com/questions/76106366/how-to-use-tiktoken-in-offline-mode-computer ", err.Error()))
 	}
 	defaultTokenEncoder = gpt35TokenEncoder
 	gpt4oTokenEncoder, err := tiktoken.EncodingForModel("gpt-4o")
--- a/relay/adaptor/zhipu/constants.go
+++ b/relay/adaptor/zhipu/constants.go
@@ -1,7 +1,14 @@
 package zhipu

+// https://open.bigmodel.cn/pricing
+
 var ModelList = []string{
-	"chatglm_turbo", "chatglm_pro", "chatglm_std", "chatglm_lite",
-	"glm-4", "glm-4v", "glm-3-turbo", "embedding-2",
-	"cogview-3",
+	"glm-zero-preview", "glm-4-plus", "glm-4-0520", "glm-4-airx",
+	"glm-4-air", "glm-4-long", "glm-4-flashx", "glm-4-flash",
+	"glm-4", "glm-3-turbo",
+	"glm-4v-plus", "glm-4v", "glm-4v-flash",
+	"cogview-3-plus", "cogview-3", "cogview-3-flash",
+	"cogviewx", "cogviewx-flash",
+	"charglm-4", "emohaa", "codegeex-4",
+	"embedding-2", "embedding-3",
 }
--- a/relay/billing/ratio/model.go
+++ b/relay/billing/ratio/model.go
@@ -98,6 +98,7 @@ var ModelRatio = map[string]float64{
 	"claude-2.1":                 8.0 / 1000 * USD,
 	"claude-3-haiku-20240307":    0.25 / 1000 * USD,
 	"claude-3-5-haiku-20241022":  1.0 / 1000 * USD,
+	"claude-3-5-haiku-latest":    1.0 / 1000 * USD,
 	"claude-3-sonnet-20240229":   3.0 / 1000 * USD,
 	"claude-3-5-sonnet-20240620": 3.0 / 1000 * USD,
 	"claude-3-5-sonnet-20241022": 3.0 / 1000 * USD,
@@ -131,15 +132,29 @@ var ModelRatio = map[string]float64{
 	"gemini-2.0-flash-thinking-exp-01-21": 0.075 * MILLI_USD,
 	"aqa":                                 1,
 	// https://open.bigmodel.cn/pricing
-	"glm-4":         0.1 * RMB,
-	"glm-4v":        0.1 * RMB,
-	"glm-3-turbo":   0.005 * RMB,
-	"embedding-2":   0.0005 * RMB,
-	"chatglm_turbo": 0.3572, // ￥0.005 / 1k tokens
-	"chatglm_pro":   0.7143, // ￥0.01 / 1k tokens
-	"chatglm_std":   0.3572, // ￥0.005 / 1k tokens
-	"chatglm_lite":  0.1429, // ￥0.002 / 1k tokens
-	"cogview-3":     0.25 * RMB,
+	"glm-zero-preview": 0.01 * RMB,
+	"glm-4-plus":       0.05 * RMB,
+	"glm-4-0520":       0.1 * RMB,
+	"glm-4-airx":       0.01 * RMB,
+	"glm-4-air":        0.0005 * RMB,
+	"glm-4-long":       0.001 * RMB,
+	"glm-4-flashx":     0.0001 * RMB,
+	"glm-4-flash":      0,
+	"glm-4":            0.1 * RMB,   // deprecated model, available until 2025/06
+	"glm-3-turbo":      0.001 * RMB, // deprecated model, available until 2025/06
+	"glm-4v-plus":      0.004 * RMB,
+	"glm-4v":           0.05 * RMB,
+	"glm-4v-flash":     0,
+	"cogview-3-plus":   0.06 * RMB,
+	"cogview-3":        0.1 * RMB,
+	"cogview-3-flash":  0,
+	"cogviewx":         0.5 * RMB,
+	"cogviewx-flash":   0,
+	"charglm-4":        0.001 * RMB,
+	"emohaa":           0.015 * RMB,
+	"codegeex-4":       0.0001 * RMB,
+	"embedding-2":      0.0005 * RMB,
+	"embedding-3":      0.0005 * RMB,
 	// https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing
 	"qwen-turbo":                  1.4286, // ￥0.02 / 1k tokens
 	"qwen-turbo-latest":           1.4286,
@@ -226,9 +241,19 @@ var ModelRatio = map[string]float64{
 	"embedding-bert-512-v1":       0.0715, // ¥0.001 / 1k tokens
 	"embedding_s1_v1":             0.0715, // ¥0.001 / 1k tokens
 	"semantic_similarity_s1_v1":   0.0715, // ¥0.001 / 1k tokens
-	"hunyuan":                     7.143,  // ¥0.1 / 1k tokens  // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
-	"ChatStd":                     0.01 * RMB,
-	"ChatPro":                     0.1 * RMB,
+	// https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
+	"hunyuan-turbo":             0.015 * RMB,
+	"hunyuan-large":             0.004 * RMB,
+	"hunyuan-large-longcontext": 0.006 * RMB,
+	"hunyuan-standard":          0.0008 * RMB,
+	"hunyuan-standard-256K":     0.0005 * RMB,
+	"hunyuan-translation-lite":  0.005 * RMB,
+	"hunyuan-role":              0.004 * RMB,
+	"hunyuan-functioncall":      0.004 * RMB,
+	"hunyuan-code":              0.004 * RMB,
+	"hunyuan-turbo-vision":      0.08 * RMB,
+	"hunyuan-vision":            0.018 * RMB,
+	"hunyuan-embedding":         0.0007 * RMB,
 	// https://platform.moonshot.cn/pricing
 	"moonshot-v1-8k":   0.012 * RMB,
 	"moonshot-v1-32k":  0.024 * RMB,
--- a/relay/client/init.go
+++ b/relay/client/init.go
@@ -5,7 +5,7 @@ import (
 	"os"
 	"time"

-	gutils "github.com/Laisky/go-utils/v4"
+	gutils "github.com/Laisky/go-utils/v5"
 	"github.com/songquanpeng/one-api/common/config"
 )