Merge remote-tracking branch 'origin/upstream/main'

2025-12-28 02:35:56 +08:00 · 2024-03-05 01:02:35 +00:00
parent f9df8eaa63 4fb22ad4ce
commit cb6cb66646
28 changed files with 536 additions and 149 deletions
--- a/common/model-ratio.go
+++ b/common/model-ratio.go
@@ -7,29 +7,6 @@ import (
 	"time"
 )

-var DalleSizeRatios = map[string]map[string]float64{
-	"dall-e-2": {
-		"256x256":   1,
-		"512x512":   1.125,
-		"1024x1024": 1.25,
-	},
-	"dall-e-3": {
-		"1024x1024": 1,
-		"1024x1792": 2,
-		"1792x1024": 2,
-	},
-}
-
-var DalleGenerationImageAmounts = map[string][2]int{
-	"dall-e-2": {1, 10},
-	"dall-e-3": {1, 1}, // OpenAI allows n=1 currently.
-}
-
-var DalleImagePromptLengthLimitations = map[string]int{
-	"dall-e-2": 1000,
-	"dall-e-3": 4000,
-}
-
 const (
 	USD2RMB = 7
 	USD     = 500 // $0.002 = 1 -> $1 = 500
@@ -40,7 +17,6 @@ const (
 // https://platform.openai.com/docs/models/model-endpoint-compatibility
 // https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Blfmc9dlf
 // https://openai.com/pricing
-// TODO: when a new api is enabled, check the pricing here
 // 1 === $0.002 / 1K tokens
 // 1 === ￥0.014 / 1k tokens
 var ModelRatio = map[string]float64{
@@ -94,14 +70,18 @@ var ModelRatio = map[string]float64{
 	"claude-2.0":              5.51,  // $11.02 / 1M tokens
 	"claude-2.1":              5.51,  // $11.02 / 1M tokens
 	// https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
-	"ERNIE-Bot":                 0.8572,     // ￥0.012 / 1k tokens
-	"ERNIE-Bot-turbo":           0.5715,     // ￥0.008 / 1k tokens
-	"ERNIE-Bot-4":               0.12 * RMB, // ￥0.12 / 1k tokens
-	"ERNIE-Bot-8k":              0.024 * RMB,
-	"Embedding-V1":              0.1429, // ￥0.002 / 1k tokens
-	"PaLM-2":                    1,
-	"gemini-pro":                1,      // $0.00025 / 1k characters -> $0.001 / 1k tokens
-	"gemini-pro-vision":         1,      // $0.00025 / 1k characters -> $0.001 / 1k tokens
+	"ERNIE-Bot":         0.8572,     // ￥0.012 / 1k tokens
+	"ERNIE-Bot-turbo":   0.5715,     // ￥0.008 / 1k tokens
+	"ERNIE-Bot-4":       0.12 * RMB, // ￥0.12 / 1k tokens
+	"ERNIE-Bot-8k":      0.024 * RMB,
+	"Embedding-V1":      0.1429, // ￥0.002 / 1k tokens
+	"PaLM-2":            1,
+	"gemini-pro":        1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
+	"gemini-pro-vision": 1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
+	// https://open.bigmodel.cn/pricing
+	"glm-4":                     0.1 * RMB,
+	"glm-4v":                    0.1 * RMB,
+	"glm-3-turbo":               0.005 * RMB,
 	"chatglm_turbo":             0.3572, // ￥0.005 / 1k tokens
 	"chatglm_pro":               0.7143, // ￥0.01 / 1k tokens
 	"chatglm_std":               0.3572, // ￥0.005 / 1k tokens
@@ -127,6 +107,37 @@ var ModelRatio = map[string]float64{
 	"moonshot-v1-8k":   0.012 * RMB,
 	"moonshot-v1-32k":  0.024 * RMB,
 	"moonshot-v1-128k": 0.06 * RMB,
+	// https://platform.baichuan-ai.com/price
+	"Baichuan2-Turbo":      0.008 * RMB,
+	"Baichuan2-Turbo-192k": 0.016 * RMB,
+	"Baichuan2-53B":        0.02 * RMB,
+	// https://api.minimax.chat/document/price
+	"abab6-chat":    0.1 * RMB,
+	"abab5.5-chat":  0.015 * RMB,
+	"abab5.5s-chat": 0.005 * RMB,
+	// https://docs.mistral.ai/platform/pricing/
+	"open-mistral-7b":       0.25 / 1000 * USD,
+	"open-mixtral-8x7b":     0.7 / 1000 * USD,
+	"mistral-small-latest":  2.0 / 1000 * USD,
+	"mistral-medium-latest": 2.7 / 1000 * USD,
+	"mistral-large-latest":  8.0 / 1000 * USD,
+	"mistral-embed":         0.1 / 1000 * USD,
+}
+
+var CompletionRatio = map[string]float64{}
+
+var DefaultModelRatio map[string]float64
+var DefaultCompletionRatio map[string]float64
+
+func init() {
+	DefaultModelRatio = make(map[string]float64)
+	for k, v := range ModelRatio {
+		DefaultModelRatio[k] = v
+	}
+	DefaultCompletionRatio = make(map[string]float64)
+	for k, v := range CompletionRatio {
+		DefaultCompletionRatio[k] = v
+	}
 }

 func ModelRatio2JSONString() string {
@@ -147,6 +158,9 @@ func GetModelRatio(name string) float64 {
 		name = strings.TrimSuffix(name, "-internet")
 	}
 	ratio, ok := ModelRatio[name]
+	if !ok {
+		ratio, ok = DefaultModelRatio[name]
+	}
 	if !ok {
 		logger.SysError("model ratio not found: " + name)
 		return 30
@@ -154,8 +168,6 @@ func GetModelRatio(name string) float64 {
 	return ratio
 }

-var CompletionRatio = map[string]float64{}
-
 func CompletionRatio2JSONString() string {
 	jsonBytes, err := json.Marshal(CompletionRatio)
 	if err != nil {
@@ -176,6 +188,9 @@ func GetCompletionRatio(name string) float64 {
 	if ratio, ok := CompletionRatio[name]; ok {
 		return ratio
 	}
+	if ratio, ok := DefaultCompletionRatio[name]; ok {
+		return ratio
+	}
 	if strings.HasPrefix(name, "gpt-3.5") {
 		if strings.HasSuffix(name, "0125") {
 			// https://openai.com/blog/new-embedding-models-and-api-updates
@@ -208,5 +223,8 @@ func GetCompletionRatio(name string) float64 {
 	if strings.HasPrefix(name, "claude-2") {
 		return 2.965517
 	}
+	if strings.HasPrefix(name, "mistral-") {
+		return 3
+	}
 	return 1
 }