fix:refactor pricing models and enhance completion ratio logic

- Update pricing ratios and calculations for AI models in the billing system. - Introduce new constants and enhance error handling for audio token rates. - Comment out outdated pricing entries and include additional models in calculations.
2026-02-19 12:24:25 +08:00 · 2025-03-14 03:10:24 +00:00
parent 969fdca9ef
commit adcf4712e6
1 changed files with 204 additions and 77 deletions
--- a/relay/billing/ratio/model.go
+++ b/relay/billing/ratio/model.go
@@ -272,9 +272,9 @@ var ModelRatio = map[string]float64{
 	"deepseek-r1-distill-qwen-1.5b": 0.001 * KiloRmb,
 	"deepseek-r1-distill-qwen-7b":   0.0005 * KiloRmb,
 	"deepseek-r1-distill-qwen-14b":  0.001 * KiloRmb,
-	"deepseek-r1-distill-qwen-32b":  0.002 * KiloRmb,
+	// "deepseek-r1-distill-qwen-32b":  0.002 * KiloRmb,
 	"deepseek-r1-distill-llama-8b": 0.0005 * KiloRmb,
-	"deepseek-r1-distill-llama-70b": 0.004 * KiloRmb,
+	// "deepseek-r1-distill-llama-70b": 0.004 * KiloRmb,
 	"SparkDesk":                 1.2858, // ￥0.018 / 1k tokens
 	"SparkDesk-v1.1":            1.2858, // ￥0.018 / 1k tokens
 	"SparkDesk-v2.1":            1.2858, // ￥0.018 / 1k tokens
@@ -321,22 +321,30 @@ var ModelRatio = map[string]float64{
 	"mistral-medium-latest": 2.7 * MilliTokensUsd,
 	"mistral-large-latest":  8.0 * MilliTokensUsd,
 	"mistral-embed":         0.1 * MilliTokensUsd,
-	// https://wow.groq.com/#:~:text=inquiries%C2%A0here.-,Model,-Current%20Speed
+	// -------------------------------------
-	"gemma-7b-it":                           0.07 / 1000 * MilliTokensUsd,
+	// https://groq.com/pricing/
-	"gemma2-9b-it":                          0.20 / 1000 * MilliTokensUsd,
+	// -------------------------------------
-	"llama-3.1-70b-versatile":               0.59 / 1000 * MilliTokensUsd,
+	"gemma2-9b-it":                          0.20 * MilliTokensUsd,
-	"llama-3.1-8b-instant":                  0.05 / 1000 * MilliTokensUsd,
+	"llama-3.1-8b-instant":                  0.05 * MilliTokensUsd,
-	"llama-3.2-11b-text-preview":            0.05 / 1000 * MilliTokensUsd,
+	"llama-3.2-11b-text-preview":            0.18 * MilliTokensUsd,
-	"llama-3.2-11b-vision-preview":          0.05 / 1000 * MilliTokensUsd,
+	"llama-3.2-11b-vision-preview":          0.18 * MilliTokensUsd,
-	"llama-3.2-1b-preview":                  0.05 / 1000 * MilliTokensUsd,
+	"llama-3.2-1b-preview":                  0.04 * MilliTokensUsd,
-	"llama-3.2-3b-preview":                  0.05 / 1000 * MilliTokensUsd,
+	"llama-3.2-3b-preview":                  0.06 * MilliTokensUsd,
-	"llama-3.2-90b-text-preview":            0.59 / 1000 * MilliTokensUsd,
+	"llama-3.2-90b-text-preview":            0.90 * MilliTokensUsd,
-	"llama-guard-3-8b":                      0.05 / 1000 * MilliTokensUsd,
+	"llama-3.2-90b-vision-preview":          0.90 * MilliTokensUsd,
-	"llama3-70b-8192":                       0.59 / 1000 * MilliTokensUsd,
+	"llama-3.3-70b-versatile":               0.59 * MilliTokensUsd,
-	"llama3-8b-8192":                        0.05 / 1000 * MilliTokensUsd,
+	"llama-guard-3-8b":                      0.20 * MilliTokensUsd,
-	"llama3-groq-70b-8192-tool-use-preview": 0.89 / 1000 * MilliTokensUsd,
+	"llama3-70b-8192":                       0.59 * MilliTokensUsd,
-	"llama3-groq-8b-8192-tool-use-preview":  0.19 / 1000 * MilliTokensUsd,
+	"llama3-8b-8192":                        0.05 * MilliTokensUsd,
-	"mixtral-8x7b-32768":                    0.24 / 1000 * MilliTokensUsd,
+	"llama3-groq-70b-8192-tool-use-preview": 0.59 * MilliTokensUsd,
 	"llama3-groq-8b-8192-tool-use-preview":  0.05 * MilliTokensUsd,
 	"mixtral-8x7b-32768":                    0.24 * MilliTokensUsd,
 	"whisper-large-v3":                      0.111 * MilliTokensUsd,
 	"whisper-large-v3-turbo":                0.04 * MilliTokensUsd,
 	"distil-whisper-large-v3-en":            0.02 * MilliTokensUsd,
 	"deepseek-r1-distill-qwen-32b":          0.69 * MilliTokensUsd,
 	"deepseek-r1-distill-llama-70b-specdec": 0.75 * MilliTokensUsd,
 	"deepseek-r1-distill-llama-70b":         0.75 * MilliTokensUsd,
 	// https://platform.lingyiwanwu.com/docs#-计费单元
 	"yi-34b-chat-0205": 2.5 * MilliRmb,
 	"yi-34b-chat-200k": 12.0 * MilliRmb,
@@ -651,15 +659,113 @@ var ModelRatio = map[string]float64{
 	"xwin-lm/xwin-lm-70b":                             1.875,
 }
 // CompletionRatio is the price ratio between completion tokens and prompt tokens
 var CompletionRatio = map[string]float64{
 	// aws llama3
 	"llama3-8b-8192(33)":  0.0006 / 0.0003,
 	"llama3-70b-8192(33)": 0.0035 / 0.00265,
 	// whisper
 	"whisper-1":                  0, // only count input tokens
 	"whisper-large-v3":           0, // only count input tokens
 	"whisper-large-v3-turbo":     0, // only count input tokens
 	"distil-whisper-large-v3-en": 0, // only count input tokens
 	// deepseek
-	"deepseek-chat":     0.28 / 0.14,
+	"deepseek-chat":     1.1 / 0.27,
 	"deepseek-reasoner": 2.19 / 0.55,
 	// openrouter
 	"deepseek/deepseek-chat": 1,
 	"deepseek/deepseek-r1":   1,
 	// -------------------------------------
 	// groq
 	// -------------------------------------
 	"llama-3.3-70b-versatile":               0.79 / 0.59,
 	"llama-3.1-8b-instant":                  0.08 / 0.05,
 	"llama3-70b-8192":                       0.79 / 0.59,
 	"llama3-8b-8192":                        0.08 / 0.05,
 	"gemma2-9b-it":                          1.0,
 	"llama-3.2-11b-text-preview":            1.0,
 	"llama-3.2-11b-vision-preview":          1.0,
 	"llama-3.2-1b-preview":                  1.0,
 	"llama-3.2-3b-preview":                  1.0,
 	"llama-3.2-90b-text-preview":            1.0,
 	"llama-3.2-90b-vision-preview":          1.0,
 	"llama-guard-3-8b":                      1.0,
 	"llama3-groq-70b-8192-tool-use-preview": 0.79 / 0.59,
 	"llama3-groq-8b-8192-tool-use-preview":  0.08 / 0.05,
 	"mixtral-8x7b-32768":                    1.0,
 	"deepseek-r1-distill-qwen-32b":          1,
 	"deepseek-r1-distill-llama-70b-specdec": 0.99 / 0.75,
 	"deepseek-r1-distill-llama-70b":         0.99 / 0.75,
 }
 // AudioRatio represents the price ratio between audio tokens and text tokens
 var AudioRatio = map[string]float64{
 	"gpt-4o-audio-preview":                 16,
 	"gpt-4o-audio-preview-2024-12-17":      16,
 	"gpt-4o-audio-preview-2024-10-01":      40,
 	"gpt-4o-mini-audio-preview":            10 / 0.15,
 	"gpt-4o-mini-audio-preview-2024-12-17": 10 / 0.15,
 }
 // GetAudioPromptRatio returns the audio prompt ratio for the given model.
 func GetAudioPromptRatio(actualModelName string) float64 {
 	var v float64
 	if ratio, ok := AudioRatio[actualModelName]; ok {
 		v = ratio
 	} else {
 		v = 16
 	}
 	return v
 }
 // AudioCompletionRatio is the completion ratio for audio models.
 var AudioCompletionRatio = map[string]float64{
 	"whisper-1":                            0,
 	"gpt-4o-audio-preview":                 2,
 	"gpt-4o-audio-preview-2024-12-17":      2,
 	"gpt-4o-audio-preview-2024-10-01":      2,
 	"gpt-4o-mini-audio-preview":            2,
 	"gpt-4o-mini-audio-preview-2024-12-17": 2,
 }
 // GetAudioCompletionRatio returns the completion ratio for audio models.
 func GetAudioCompletionRatio(actualModelName string) float64 {
 	var v float64
 	if ratio, ok := AudioCompletionRatio[actualModelName]; ok {
 		v = ratio
 	} else {
 		v = 2
 	}
 	return v
 }
 // AudioTokensPerSecond is the number of audio tokens per second for each model.
 var AudioPromptTokensPerSecond = map[string]float64{
 	// Whisper API price is $0.0001/sec. One-api's historical ratio is 15,
 	// corresponding to $0.03/kilo_tokens.
 	// After conversion, tokens per second should be 0.0001/0.03*1000 = 3.3333.
 	"whisper-1": 0.0001 / 0.03 * 1000,
 	// gpt-4o-audio series processes 10 tokens per second
 	"gpt-4o-audio-preview":                 10,
 	"gpt-4o-audio-preview-2024-12-17":      10,
 	"gpt-4o-audio-preview-2024-10-01":      10,
 	"gpt-4o-mini-audio-preview":            10,
 	"gpt-4o-mini-audio-preview-2024-12-17": 10,
 }
 // GetAudioPromptTokensPerSecond returns the number of audio tokens per second
 // for the given model.
 func GetAudioPromptTokensPerSecond(actualModelName string) float64 {
 	var v float64
 	if tokensPerSecond, ok := AudioPromptTokensPerSecond[actualModelName]; ok {
 		v = tokensPerSecond
 	} else {
 		v = 10
 	}
 	return v
 }
 var (
@@ -722,23 +828,26 @@ func GetModelRatio(name string, channelType int) float64 {
 	if strings.HasPrefix(name, "command-") && strings.HasSuffix(name, "-internet") {
 		name = strings.TrimSuffix(name, "-internet")
 	}
 	model := fmt.Sprintf("%s(%d)", name, channelType)
-	if ratio, ok := ModelRatio[model]; ok {
+
 	for _, targetName := range []string{model, name} {
 		for _, ratioMap := range []map[string]float64{
 			ModelRatio,
 			DefaultModelRatio,
 			AudioRatio,
 		} {
 			if ratio, ok := ratioMap[targetName]; ok {
 				return ratio
 			}
 	if ratio, ok := DefaultModelRatio[model]; ok {
 		return ratio
 		}
 	if ratio, ok := ModelRatio[name]; ok {
 		return ratio
 	}
 	if ratio, ok := DefaultModelRatio[name]; ok {
 		return ratio
 	}
 	logger.SysError("model ratio not found: " + name)
 	return 30
 }
 // CompletionRatio2JSONString returns the CompletionRatio map as a JSON string.
 func CompletionRatio2JSONString() string {
 	jsonBytes, err := json.Marshal(CompletionRatio)
 	if err != nil {
@@ -747,59 +856,79 @@ func CompletionRatio2JSONString() string {
 	return string(jsonBytes)
 }
 // completionRatioLock is a mutex for synchronizing access to the CompletionRatio map.
 var completionRatioLock sync.RWMutex
 // UpdateCompletionRatioByJSONString updates the CompletionRatio map with the given JSON string.
 func UpdateCompletionRatioByJSONString(jsonStr string) error {
 	completionRatioLock.Lock()
 	defer completionRatioLock.Unlock()
 	CompletionRatio = make(map[string]float64)
 	return json.Unmarshal([]byte(jsonStr), &CompletionRatio)
 }
 // GetCompletionRatio returns the completion ratio for the given model name and channel type.
 func GetCompletionRatio(name string, channelType int) float64 {
 	completionRatioLock.RLock()
 	defer completionRatioLock.RUnlock()
 	if strings.HasPrefix(name, "qwen-") && strings.HasSuffix(name, "-internet") {
 		name = strings.TrimSuffix(name, "-internet")
 	}
 	model := fmt.Sprintf("%s(%d)", name, channelType)
-	if ratio, ok := CompletionRatio[model]; ok {
+
 	name = strings.TrimPrefix(name, "openai/")
 	for _, targetName := range []string{model, name} {
 		for _, ratioMap := range []map[string]float64{
 			CompletionRatio,
 			DefaultCompletionRatio,
 			AudioCompletionRatio,
 		} {
 			// first try the model name
 			if ratio, ok := ratioMap[targetName]; ok {
 				return ratio
 			}
-	if ratio, ok := DefaultCompletionRatio[model]; ok {
+
 			// then try the model name without some special prefix
 			normalizedTargetName := strings.TrimPrefix(targetName, "openai/")
 			if ratio, ok := ratioMap[normalizedTargetName]; ok {
 				return ratio
 			}
 	if ratio, ok := CompletionRatio[name]; ok {
 		return ratio
 		}
 	if ratio, ok := DefaultCompletionRatio[name]; ok {
 		return ratio
 	}
-	if strings.HasPrefix(name, "gpt-3.5") {
+
-		if name == "gpt-3.5-turbo" || strings.HasSuffix(name, "0125") {
+	// openai
 	switch {
 	case strings.HasPrefix(name, "gpt-3.5"):
 		switch {
 		case name == "gpt-3.5-turbo" || strings.HasSuffix(name, "0125"):
 			// https://openai.com/blog/new-embedding-models-and-api-updates
 			// Updated GPT-3.5 Turbo model and lower pricing
 			return 3
-		}
+		case strings.HasSuffix(name, "1106"):
 		if strings.HasSuffix(name, "1106") {
 			return 2
-		}
+		default:
 			return 4.0 / 3.0
 		}
-	if strings.HasPrefix(name, "gpt-4") {
+	case name == "chatgpt-4o-latest":
-		if strings.HasPrefix(name, "gpt-4o") {
+		return 3
 	case strings.HasPrefix(name, "gpt-4"):
 		switch {
 		case strings.HasPrefix(name, "gpt-4o"):
 			if name == "gpt-4o-2024-05-13" {
 				return 3
 			}
 			return 4
-		}
+		case strings.HasPrefix(name, "gpt-4-"):
 		if strings.HasPrefix(name, "gpt-4-turbo") ||
 			strings.HasSuffix(name, "preview") {
 			return 3
-		}
+		default:
 			return 2
 		}
-	// including o1, o1-preview, o1-mini
+	// including o1/o1-preview/o1-mini
-	if strings.HasPrefix(name, "o1") {
+	case strings.HasPrefix(name, "o1") ||
 		strings.HasPrefix(name, "o3"):
 		return 4
 	}
-	if name == "chatgpt-4o-latest" {
+
 		return 3
 	}
 	if strings.HasPrefix(name, "claude-3") {
 		return 5
 	}
@@ -810,10 +939,7 @@ func GetCompletionRatio(name string, channelType int) float64 {
 		return 3
 	}
 	if strings.HasPrefix(name, "gemini-") {
-		return 3
+		return 4
 	}
 	if strings.HasPrefix(name, "deepseek-") {
 		return 2
 	}
 	switch name {
@@ -861,5 +987,6 @@ func GetCompletionRatio(name string, channelType int) float64 {
 		return 1.000 / 0.300 // ≈3.333333
 	}
 	logger.SysWarn(fmt.Sprintf("completion ratio not found for model: %s (channel type: %d), using default value 1", name, channelType))
 	return 1
 }