Merge remote-tracking branch 'remotes/origin_songquanpeng/main'

# Conflicts: # relay/adaptor/openai/adaptor.go # relay/controller/text.go
2026-04-07 10:54:27 +08:00 · 2024-08-07 06:24:20 +08:00
parent c885953c6d f9774698e9
commit 04aee84485
114 changed files with 3392 additions and 1700 deletions
--- a/relay/billing/ratio/model.go
+++ b/relay/billing/ratio/model.go
@@ -2,6 +2,7 @@ package ratio

 import (
 	"encoding/json"
+	"fmt"
 	"strings"

 	"github.com/songquanpeng/one-api/common/logger"
@@ -27,15 +28,17 @@ var ModelRatio = map[string]float64{
 	"gpt-4-32k":               30,
 	"gpt-4-32k-0314":          30,
 	"gpt-4-32k-0613":          30,
-	"gpt-4-1106-preview":      5,    // $0.01 / 1K tokens
-	"gpt-4-0125-preview":      5,    // $0.01 / 1K tokens
-	"gpt-4-turbo-preview":     5,    // $0.01 / 1K tokens
-	"gpt-4-turbo":             5,    // $0.01 / 1K tokens
-	"gpt-4-turbo-2024-04-09":  5,    // $0.01 / 1K tokens
-	"gpt-4o":                  2.5,  // $0.005 / 1K tokens
-	"gpt-4o-2024-05-13":       2.5,  // $0.005 / 1K tokens
-	"gpt-4-vision-preview":    5,    // $0.01 / 1K tokens
-	"gpt-3.5-turbo":           0.25, // $0.0005 / 1K tokens
+	"gpt-4-1106-preview":      5,     // $0.01 / 1K tokens
+	"gpt-4-0125-preview":      5,     // $0.01 / 1K tokens
+	"gpt-4-turbo-preview":     5,     // $0.01 / 1K tokens
+	"gpt-4-turbo":             5,     // $0.01 / 1K tokens
+	"gpt-4-turbo-2024-04-09":  5,     // $0.01 / 1K tokens
+	"gpt-4o":                  2.5,   // $0.005 / 1K tokens
+	"gpt-4o-2024-05-13":       2.5,   // $0.005 / 1K tokens
+	"gpt-4o-mini":             0.075, // $0.00015 / 1K tokens
+	"gpt-4o-mini-2024-07-18":  0.075, // $0.00015 / 1K tokens
+	"gpt-4-vision-preview":    5,     // $0.01 / 1K tokens
+	"gpt-3.5-turbo":           0.25,  // $0.0005 / 1K tokens
 	"gpt-3.5-turbo-0301":      0.75,
 	"gpt-3.5-turbo-0613":      0.75,
 	"gpt-3.5-turbo-16k":       1.5, // $0.003 / 1K tokens
@@ -70,12 +73,13 @@ var ModelRatio = map[string]float64{
 	"dall-e-2":                0.02 * USD, // $0.016 - $0.020 / image
 	"dall-e-3":                0.04 * USD, // $0.040 - $0.120 / image
 	// https://www.anthropic.com/api#pricing
-	"claude-instant-1.2":       0.8 / 1000 * USD,
-	"claude-2.0":               8.0 / 1000 * USD,
-	"claude-2.1":               8.0 / 1000 * USD,
-	"claude-3-haiku-20240307":  0.25 / 1000 * USD,
-	"claude-3-sonnet-20240229": 3.0 / 1000 * USD,
-	"claude-3-opus-20240229":   15.0 / 1000 * USD,
+	"claude-instant-1.2":         0.8 / 1000 * USD,
+	"claude-2.0":                 8.0 / 1000 * USD,
+	"claude-2.1":                 8.0 / 1000 * USD,
+	"claude-3-haiku-20240307":    0.25 / 1000 * USD,
+	"claude-3-sonnet-20240229":   3.0 / 1000 * USD,
+	"claude-3-5-sonnet-20240620": 3.0 / 1000 * USD,
+	"claude-3-opus-20240229":     15.0 / 1000 * USD,
 	// https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
 	"ERNIE-4.0-8K":       0.120 * RMB,
 	"ERNIE-3.5-8K":       0.012 * RMB,
@@ -94,12 +98,11 @@ var ModelRatio = map[string]float64{
 	"bge-large-en":       0.002 * RMB,
 	"tao-8k":             0.002 * RMB,
 	// https://ai.google.dev/pricing
-	"PaLM-2":                    1,
-	"gemini-pro":                1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
-	"gemini-pro-vision":         1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
-	"gemini-1.0-pro-vision-001": 1,
-	"gemini-1.0-pro-001":        1,
-	"gemini-1.5-pro":            1,
+	"gemini-pro":       1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
+	"gemini-1.0-pro":   1,
+	"gemini-1.5-flash": 1,
+	"gemini-1.5-pro":   1,
+	"aqa":              1,
 	// https://open.bigmodel.cn/pricing
 	"glm-4":         0.1 * RMB,
 	"glm-4v":        0.1 * RMB,
@@ -124,6 +127,7 @@ var ModelRatio = map[string]float64{
 	"SparkDesk-v2.1":            1.2858, // ￥0.018 / 1k tokens
 	"SparkDesk-v3.1":            1.2858, // ￥0.018 / 1k tokens
 	"SparkDesk-v3.5":            1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v4.0":            1.2858, // ￥0.018 / 1k tokens
 	"360GPT_S2_V9":              0.8572, // ¥0.012 / 1k tokens
 	"embedding-bert-512-v1":     0.0715, // ¥0.001 / 1k tokens
 	"embedding_s1_v1":           0.0715, // ¥0.001 / 1k tokens
@@ -153,12 +157,16 @@ var ModelRatio = map[string]float64{
 	"mistral-large-latest":  8.0 / 1000 * USD,
 	"mistral-embed":         0.1 / 1000 * USD,
 	// https://wow.groq.com/#:~:text=inquiries%C2%A0here.-,Model,-Current%20Speed
-	"llama3-70b-8192":    0.59 / 1000 * USD,
-	"mixtral-8x7b-32768": 0.27 / 1000 * USD,
-	"llama3-8b-8192":     0.05 / 1000 * USD,
-	"gemma-7b-it":        0.1 / 1000 * USD,
-	"llama2-70b-4096":    0.64 / 1000 * USD,
-	"llama2-7b-2048":     0.1 / 1000 * USD,
+	"gemma-7b-it":                           0.07 / 1000000 * USD,
+	"mixtral-8x7b-32768":                    0.24 / 1000000 * USD,
+	"llama3-8b-8192":                        0.05 / 1000000 * USD,
+	"llama3-70b-8192":                       0.59 / 1000000 * USD,
+	"gemma2-9b-it":                          0.20 / 1000000 * USD,
+	"llama-3.1-405b-reasoning":              0.89 / 1000000 * USD,
+	"llama-3.1-70b-versatile":               0.59 / 1000000 * USD,
+	"llama-3.1-8b-instant":                  0.05 / 1000000 * USD,
+	"llama3-groq-70b-8192-tool-use-preview": 0.89 / 1000000 * USD,
+	"llama3-groq-8b-8192-tool-use-preview":  0.19 / 1000000 * USD,
 	// https://platform.lingyiwanwu.com/docs#-计费单元
 	"yi-34b-chat-0205": 2.5 / 1000 * RMB,
 	"yi-34b-chat-200k": 12.0 / 1000 * RMB,
@@ -167,6 +175,9 @@ var ModelRatio = map[string]float64{
 	"step-1v-32k": 0.024 * RMB,
 	"step-1-32k":  0.024 * RMB,
 	"step-1-200k": 0.15 * RMB,
+	// aws llama3 https://aws.amazon.com/cn/bedrock/pricing/
+	"llama3-8b-8192(33)":  0.0003 / 0.002,  // $0.0003 / 1K tokens
+	"llama3-70b-8192(33)": 0.00265 / 0.002, // $0.00265 / 1K tokens
 	// https://cohere.com/pricing
 	"command":               0.5,
 	"command-nightly":       0.5,
@@ -183,7 +194,11 @@ var ModelRatio = map[string]float64{
 	"deepl-ja": 25.0 / 1000 * USD,
 }

-var CompletionRatio = map[string]float64{}
+var CompletionRatio = map[string]float64{
+	// aws llama3
+	"llama3-8b-8192(33)":  0.0006 / 0.0003,
+	"llama3-70b-8192(33)": 0.0035 / 0.00265,
+}

 var DefaultModelRatio map[string]float64
 var DefaultCompletionRatio map[string]float64
@@ -232,22 +247,28 @@ func UpdateModelRatioByJSONString(jsonStr string) error {
 	return json.Unmarshal([]byte(jsonStr), &ModelRatio)
 }

-func GetModelRatio(name string) float64 {
+func GetModelRatio(name string, channelType int) float64 {
 	if strings.HasPrefix(name, "qwen-") && strings.HasSuffix(name, "-internet") {
 		name = strings.TrimSuffix(name, "-internet")
 	}
 	if strings.HasPrefix(name, "command-") && strings.HasSuffix(name, "-internet") {
 		name = strings.TrimSuffix(name, "-internet")
 	}
-	ratio, ok := ModelRatio[name]
-	if !ok {
-		ratio, ok = DefaultModelRatio[name]
+	model := fmt.Sprintf("%s(%d)", name, channelType)
+	if ratio, ok := ModelRatio[model]; ok {
+		return ratio
 	}
-	if !ok {
-		logger.SysError("model ratio not found: " + name)
-		return 30
+	if ratio, ok := DefaultModelRatio[model]; ok {
+		return ratio
 	}
-	return ratio
+	if ratio, ok := ModelRatio[name]; ok {
+		return ratio
+	}
+	if ratio, ok := DefaultModelRatio[name]; ok {
+		return ratio
+	}
+	logger.SysError("model ratio not found: " + name)
+	return 30
 }

 func CompletionRatio2JSONString() string {
@@ -263,7 +284,17 @@ func UpdateCompletionRatioByJSONString(jsonStr string) error {
 	return json.Unmarshal([]byte(jsonStr), &CompletionRatio)
 }

-func GetCompletionRatio(name string) float64 {
+func GetCompletionRatio(name string, channelType int) float64 {
+	if strings.HasPrefix(name, "qwen-") && strings.HasSuffix(name, "-internet") {
+		name = strings.TrimSuffix(name, "-internet")
+	}
+	model := fmt.Sprintf("%s(%d)", name, channelType)
+	if ratio, ok := CompletionRatio[model]; ok {
+		return ratio
+	}
+	if ratio, ok := DefaultCompletionRatio[model]; ok {
+		return ratio
+	}
 	if ratio, ok := CompletionRatio[name]; ok {
 		return ratio
 	}
@@ -282,6 +313,9 @@ func GetCompletionRatio(name string) float64 {
 		return 4.0 / 3.0
 	}
 	if strings.HasPrefix(name, "gpt-4") {
+		if strings.HasPrefix(name, "gpt-4o-mini") {
+			return 4
+		}
 		if strings.HasPrefix(name, "gpt-4-turbo") ||
 			strings.HasPrefix(name, "gpt-4o") ||
 			strings.HasSuffix(name, "preview") {