mirror of
https://github.com/songquanpeng/one-api.git
synced 2025-09-22 19:36:36 +08:00
fix:refactor pricing models and enhance completion ratio logic
- Update pricing ratios and calculations for AI models in the billing system. - Introduce new constants and enhance error handling for audio token rates. - Comment out outdated pricing entries and include additional models in calculations.
This commit is contained in:
parent
969fdca9ef
commit
adcf4712e6
@ -272,9 +272,9 @@ var ModelRatio = map[string]float64{
|
|||||||
"deepseek-r1-distill-qwen-1.5b": 0.001 * KiloRmb,
|
"deepseek-r1-distill-qwen-1.5b": 0.001 * KiloRmb,
|
||||||
"deepseek-r1-distill-qwen-7b": 0.0005 * KiloRmb,
|
"deepseek-r1-distill-qwen-7b": 0.0005 * KiloRmb,
|
||||||
"deepseek-r1-distill-qwen-14b": 0.001 * KiloRmb,
|
"deepseek-r1-distill-qwen-14b": 0.001 * KiloRmb,
|
||||||
"deepseek-r1-distill-qwen-32b": 0.002 * KiloRmb,
|
// "deepseek-r1-distill-qwen-32b": 0.002 * KiloRmb,
|
||||||
"deepseek-r1-distill-llama-8b": 0.0005 * KiloRmb,
|
"deepseek-r1-distill-llama-8b": 0.0005 * KiloRmb,
|
||||||
"deepseek-r1-distill-llama-70b": 0.004 * KiloRmb,
|
// "deepseek-r1-distill-llama-70b": 0.004 * KiloRmb,
|
||||||
"SparkDesk": 1.2858, // ¥0.018 / 1k tokens
|
"SparkDesk": 1.2858, // ¥0.018 / 1k tokens
|
||||||
"SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens
|
"SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens
|
||||||
"SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens
|
"SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens
|
||||||
@ -321,22 +321,30 @@ var ModelRatio = map[string]float64{
|
|||||||
"mistral-medium-latest": 2.7 * MilliTokensUsd,
|
"mistral-medium-latest": 2.7 * MilliTokensUsd,
|
||||||
"mistral-large-latest": 8.0 * MilliTokensUsd,
|
"mistral-large-latest": 8.0 * MilliTokensUsd,
|
||||||
"mistral-embed": 0.1 * MilliTokensUsd,
|
"mistral-embed": 0.1 * MilliTokensUsd,
|
||||||
// https://wow.groq.com/#:~:text=inquiries%C2%A0here.-,Model,-Current%20Speed
|
// -------------------------------------
|
||||||
"gemma-7b-it": 0.07 / 1000 * MilliTokensUsd,
|
// https://groq.com/pricing/
|
||||||
"gemma2-9b-it": 0.20 / 1000 * MilliTokensUsd,
|
// -------------------------------------
|
||||||
"llama-3.1-70b-versatile": 0.59 / 1000 * MilliTokensUsd,
|
"gemma2-9b-it": 0.20 * MilliTokensUsd,
|
||||||
"llama-3.1-8b-instant": 0.05 / 1000 * MilliTokensUsd,
|
"llama-3.1-8b-instant": 0.05 * MilliTokensUsd,
|
||||||
"llama-3.2-11b-text-preview": 0.05 / 1000 * MilliTokensUsd,
|
"llama-3.2-11b-text-preview": 0.18 * MilliTokensUsd,
|
||||||
"llama-3.2-11b-vision-preview": 0.05 / 1000 * MilliTokensUsd,
|
"llama-3.2-11b-vision-preview": 0.18 * MilliTokensUsd,
|
||||||
"llama-3.2-1b-preview": 0.05 / 1000 * MilliTokensUsd,
|
"llama-3.2-1b-preview": 0.04 * MilliTokensUsd,
|
||||||
"llama-3.2-3b-preview": 0.05 / 1000 * MilliTokensUsd,
|
"llama-3.2-3b-preview": 0.06 * MilliTokensUsd,
|
||||||
"llama-3.2-90b-text-preview": 0.59 / 1000 * MilliTokensUsd,
|
"llama-3.2-90b-text-preview": 0.90 * MilliTokensUsd,
|
||||||
"llama-guard-3-8b": 0.05 / 1000 * MilliTokensUsd,
|
"llama-3.2-90b-vision-preview": 0.90 * MilliTokensUsd,
|
||||||
"llama3-70b-8192": 0.59 / 1000 * MilliTokensUsd,
|
"llama-3.3-70b-versatile": 0.59 * MilliTokensUsd,
|
||||||
"llama3-8b-8192": 0.05 / 1000 * MilliTokensUsd,
|
"llama-guard-3-8b": 0.20 * MilliTokensUsd,
|
||||||
"llama3-groq-70b-8192-tool-use-preview": 0.89 / 1000 * MilliTokensUsd,
|
"llama3-70b-8192": 0.59 * MilliTokensUsd,
|
||||||
"llama3-groq-8b-8192-tool-use-preview": 0.19 / 1000 * MilliTokensUsd,
|
"llama3-8b-8192": 0.05 * MilliTokensUsd,
|
||||||
"mixtral-8x7b-32768": 0.24 / 1000 * MilliTokensUsd,
|
"llama3-groq-70b-8192-tool-use-preview": 0.59 * MilliTokensUsd,
|
||||||
|
"llama3-groq-8b-8192-tool-use-preview": 0.05 * MilliTokensUsd,
|
||||||
|
"mixtral-8x7b-32768": 0.24 * MilliTokensUsd,
|
||||||
|
"whisper-large-v3": 0.111 * MilliTokensUsd,
|
||||||
|
"whisper-large-v3-turbo": 0.04 * MilliTokensUsd,
|
||||||
|
"distil-whisper-large-v3-en": 0.02 * MilliTokensUsd,
|
||||||
|
"deepseek-r1-distill-qwen-32b": 0.69 * MilliTokensUsd,
|
||||||
|
"deepseek-r1-distill-llama-70b-specdec": 0.75 * MilliTokensUsd,
|
||||||
|
"deepseek-r1-distill-llama-70b": 0.75 * MilliTokensUsd,
|
||||||
// https://platform.lingyiwanwu.com/docs#-计费单元
|
// https://platform.lingyiwanwu.com/docs#-计费单元
|
||||||
"yi-34b-chat-0205": 2.5 * MilliRmb,
|
"yi-34b-chat-0205": 2.5 * MilliRmb,
|
||||||
"yi-34b-chat-200k": 12.0 * MilliRmb,
|
"yi-34b-chat-200k": 12.0 * MilliRmb,
|
||||||
@ -651,15 +659,113 @@ var ModelRatio = map[string]float64{
|
|||||||
"xwin-lm/xwin-lm-70b": 1.875,
|
"xwin-lm/xwin-lm-70b": 1.875,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CompletionRatio is the price ratio between completion tokens and prompt tokens
|
||||||
var CompletionRatio = map[string]float64{
|
var CompletionRatio = map[string]float64{
|
||||||
// aws llama3
|
// aws llama3
|
||||||
"llama3-8b-8192(33)": 0.0006 / 0.0003,
|
"llama3-8b-8192(33)": 0.0006 / 0.0003,
|
||||||
"llama3-70b-8192(33)": 0.0035 / 0.00265,
|
"llama3-70b-8192(33)": 0.0035 / 0.00265,
|
||||||
// whisper
|
// whisper
|
||||||
"whisper-1": 0, // only count input tokens
|
"whisper-1": 0, // only count input tokens
|
||||||
|
"whisper-large-v3": 0, // only count input tokens
|
||||||
|
"whisper-large-v3-turbo": 0, // only count input tokens
|
||||||
|
"distil-whisper-large-v3-en": 0, // only count input tokens
|
||||||
// deepseek
|
// deepseek
|
||||||
"deepseek-chat": 0.28 / 0.14,
|
"deepseek-chat": 1.1 / 0.27,
|
||||||
"deepseek-reasoner": 2.19 / 0.55,
|
"deepseek-reasoner": 2.19 / 0.55,
|
||||||
|
// openrouter
|
||||||
|
"deepseek/deepseek-chat": 1,
|
||||||
|
"deepseek/deepseek-r1": 1,
|
||||||
|
// -------------------------------------
|
||||||
|
// groq
|
||||||
|
// -------------------------------------
|
||||||
|
"llama-3.3-70b-versatile": 0.79 / 0.59,
|
||||||
|
"llama-3.1-8b-instant": 0.08 / 0.05,
|
||||||
|
"llama3-70b-8192": 0.79 / 0.59,
|
||||||
|
"llama3-8b-8192": 0.08 / 0.05,
|
||||||
|
"gemma2-9b-it": 1.0,
|
||||||
|
"llama-3.2-11b-text-preview": 1.0,
|
||||||
|
"llama-3.2-11b-vision-preview": 1.0,
|
||||||
|
"llama-3.2-1b-preview": 1.0,
|
||||||
|
"llama-3.2-3b-preview": 1.0,
|
||||||
|
"llama-3.2-90b-text-preview": 1.0,
|
||||||
|
"llama-3.2-90b-vision-preview": 1.0,
|
||||||
|
"llama-guard-3-8b": 1.0,
|
||||||
|
"llama3-groq-70b-8192-tool-use-preview": 0.79 / 0.59,
|
||||||
|
"llama3-groq-8b-8192-tool-use-preview": 0.08 / 0.05,
|
||||||
|
"mixtral-8x7b-32768": 1.0,
|
||||||
|
"deepseek-r1-distill-qwen-32b": 1,
|
||||||
|
"deepseek-r1-distill-llama-70b-specdec": 0.99 / 0.75,
|
||||||
|
"deepseek-r1-distill-llama-70b": 0.99 / 0.75,
|
||||||
|
}
|
||||||
|
|
||||||
|
// AudioRatio represents the price ratio between audio tokens and text tokens
|
||||||
|
var AudioRatio = map[string]float64{
|
||||||
|
"gpt-4o-audio-preview": 16,
|
||||||
|
"gpt-4o-audio-preview-2024-12-17": 16,
|
||||||
|
"gpt-4o-audio-preview-2024-10-01": 40,
|
||||||
|
"gpt-4o-mini-audio-preview": 10 / 0.15,
|
||||||
|
"gpt-4o-mini-audio-preview-2024-12-17": 10 / 0.15,
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAudioPromptRatio returns the audio prompt ratio for the given model.
|
||||||
|
func GetAudioPromptRatio(actualModelName string) float64 {
|
||||||
|
var v float64
|
||||||
|
if ratio, ok := AudioRatio[actualModelName]; ok {
|
||||||
|
v = ratio
|
||||||
|
} else {
|
||||||
|
v = 16
|
||||||
|
}
|
||||||
|
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
// AudioCompletionRatio is the completion ratio for audio models.
|
||||||
|
var AudioCompletionRatio = map[string]float64{
|
||||||
|
"whisper-1": 0,
|
||||||
|
"gpt-4o-audio-preview": 2,
|
||||||
|
"gpt-4o-audio-preview-2024-12-17": 2,
|
||||||
|
"gpt-4o-audio-preview-2024-10-01": 2,
|
||||||
|
"gpt-4o-mini-audio-preview": 2,
|
||||||
|
"gpt-4o-mini-audio-preview-2024-12-17": 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAudioCompletionRatio returns the completion ratio for audio models.
|
||||||
|
func GetAudioCompletionRatio(actualModelName string) float64 {
|
||||||
|
var v float64
|
||||||
|
if ratio, ok := AudioCompletionRatio[actualModelName]; ok {
|
||||||
|
v = ratio
|
||||||
|
} else {
|
||||||
|
v = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
// AudioTokensPerSecond is the number of audio tokens per second for each model.
|
||||||
|
var AudioPromptTokensPerSecond = map[string]float64{
|
||||||
|
// Whisper API price is $0.0001/sec. One-api's historical ratio is 15,
|
||||||
|
// corresponding to $0.03/kilo_tokens.
|
||||||
|
// After conversion, tokens per second should be 0.0001/0.03*1000 = 3.3333.
|
||||||
|
"whisper-1": 0.0001 / 0.03 * 1000,
|
||||||
|
// gpt-4o-audio series processes 10 tokens per second
|
||||||
|
"gpt-4o-audio-preview": 10,
|
||||||
|
"gpt-4o-audio-preview-2024-12-17": 10,
|
||||||
|
"gpt-4o-audio-preview-2024-10-01": 10,
|
||||||
|
"gpt-4o-mini-audio-preview": 10,
|
||||||
|
"gpt-4o-mini-audio-preview-2024-12-17": 10,
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAudioPromptTokensPerSecond returns the number of audio tokens per second
|
||||||
|
// for the given model.
|
||||||
|
func GetAudioPromptTokensPerSecond(actualModelName string) float64 {
|
||||||
|
var v float64
|
||||||
|
if tokensPerSecond, ok := AudioPromptTokensPerSecond[actualModelName]; ok {
|
||||||
|
v = tokensPerSecond
|
||||||
|
} else {
|
||||||
|
v = 10
|
||||||
|
}
|
||||||
|
|
||||||
|
return v
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -722,23 +828,26 @@ func GetModelRatio(name string, channelType int) float64 {
|
|||||||
if strings.HasPrefix(name, "command-") && strings.HasSuffix(name, "-internet") {
|
if strings.HasPrefix(name, "command-") && strings.HasSuffix(name, "-internet") {
|
||||||
name = strings.TrimSuffix(name, "-internet")
|
name = strings.TrimSuffix(name, "-internet")
|
||||||
}
|
}
|
||||||
|
|
||||||
model := fmt.Sprintf("%s(%d)", name, channelType)
|
model := fmt.Sprintf("%s(%d)", name, channelType)
|
||||||
if ratio, ok := ModelRatio[model]; ok {
|
|
||||||
|
for _, targetName := range []string{model, name} {
|
||||||
|
for _, ratioMap := range []map[string]float64{
|
||||||
|
ModelRatio,
|
||||||
|
DefaultModelRatio,
|
||||||
|
AudioRatio,
|
||||||
|
} {
|
||||||
|
if ratio, ok := ratioMap[targetName]; ok {
|
||||||
return ratio
|
return ratio
|
||||||
}
|
}
|
||||||
if ratio, ok := DefaultModelRatio[model]; ok {
|
|
||||||
return ratio
|
|
||||||
}
|
}
|
||||||
if ratio, ok := ModelRatio[name]; ok {
|
|
||||||
return ratio
|
|
||||||
}
|
|
||||||
if ratio, ok := DefaultModelRatio[name]; ok {
|
|
||||||
return ratio
|
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.SysError("model ratio not found: " + name)
|
logger.SysError("model ratio not found: " + name)
|
||||||
return 30
|
return 30
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CompletionRatio2JSONString returns the CompletionRatio map as a JSON string.
|
||||||
func CompletionRatio2JSONString() string {
|
func CompletionRatio2JSONString() string {
|
||||||
jsonBytes, err := json.Marshal(CompletionRatio)
|
jsonBytes, err := json.Marshal(CompletionRatio)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -747,59 +856,79 @@ func CompletionRatio2JSONString() string {
|
|||||||
return string(jsonBytes)
|
return string(jsonBytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// completionRatioLock is a mutex for synchronizing access to the CompletionRatio map.
|
||||||
|
var completionRatioLock sync.RWMutex
|
||||||
|
|
||||||
|
// UpdateCompletionRatioByJSONString updates the CompletionRatio map with the given JSON string.
|
||||||
func UpdateCompletionRatioByJSONString(jsonStr string) error {
|
func UpdateCompletionRatioByJSONString(jsonStr string) error {
|
||||||
|
completionRatioLock.Lock()
|
||||||
|
defer completionRatioLock.Unlock()
|
||||||
CompletionRatio = make(map[string]float64)
|
CompletionRatio = make(map[string]float64)
|
||||||
return json.Unmarshal([]byte(jsonStr), &CompletionRatio)
|
return json.Unmarshal([]byte(jsonStr), &CompletionRatio)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetCompletionRatio returns the completion ratio for the given model name and channel type.
|
||||||
func GetCompletionRatio(name string, channelType int) float64 {
|
func GetCompletionRatio(name string, channelType int) float64 {
|
||||||
|
completionRatioLock.RLock()
|
||||||
|
defer completionRatioLock.RUnlock()
|
||||||
if strings.HasPrefix(name, "qwen-") && strings.HasSuffix(name, "-internet") {
|
if strings.HasPrefix(name, "qwen-") && strings.HasSuffix(name, "-internet") {
|
||||||
name = strings.TrimSuffix(name, "-internet")
|
name = strings.TrimSuffix(name, "-internet")
|
||||||
}
|
}
|
||||||
model := fmt.Sprintf("%s(%d)", name, channelType)
|
model := fmt.Sprintf("%s(%d)", name, channelType)
|
||||||
if ratio, ok := CompletionRatio[model]; ok {
|
|
||||||
|
name = strings.TrimPrefix(name, "openai/")
|
||||||
|
for _, targetName := range []string{model, name} {
|
||||||
|
for _, ratioMap := range []map[string]float64{
|
||||||
|
CompletionRatio,
|
||||||
|
DefaultCompletionRatio,
|
||||||
|
AudioCompletionRatio,
|
||||||
|
} {
|
||||||
|
// first try the model name
|
||||||
|
if ratio, ok := ratioMap[targetName]; ok {
|
||||||
return ratio
|
return ratio
|
||||||
}
|
}
|
||||||
if ratio, ok := DefaultCompletionRatio[model]; ok {
|
|
||||||
|
// then try the model name without some special prefix
|
||||||
|
normalizedTargetName := strings.TrimPrefix(targetName, "openai/")
|
||||||
|
if ratio, ok := ratioMap[normalizedTargetName]; ok {
|
||||||
return ratio
|
return ratio
|
||||||
}
|
}
|
||||||
if ratio, ok := CompletionRatio[name]; ok {
|
|
||||||
return ratio
|
|
||||||
}
|
}
|
||||||
if ratio, ok := DefaultCompletionRatio[name]; ok {
|
|
||||||
return ratio
|
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(name, "gpt-3.5") {
|
|
||||||
if name == "gpt-3.5-turbo" || strings.HasSuffix(name, "0125") {
|
// openai
|
||||||
|
switch {
|
||||||
|
case strings.HasPrefix(name, "gpt-3.5"):
|
||||||
|
switch {
|
||||||
|
case name == "gpt-3.5-turbo" || strings.HasSuffix(name, "0125"):
|
||||||
// https://openai.com/blog/new-embedding-models-and-api-updates
|
// https://openai.com/blog/new-embedding-models-and-api-updates
|
||||||
// Updated GPT-3.5 Turbo model and lower pricing
|
// Updated GPT-3.5 Turbo model and lower pricing
|
||||||
return 3
|
return 3
|
||||||
}
|
case strings.HasSuffix(name, "1106"):
|
||||||
if strings.HasSuffix(name, "1106") {
|
|
||||||
return 2
|
return 2
|
||||||
}
|
default:
|
||||||
return 4.0 / 3.0
|
return 4.0 / 3.0
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(name, "gpt-4") {
|
case name == "chatgpt-4o-latest":
|
||||||
if strings.HasPrefix(name, "gpt-4o") {
|
return 3
|
||||||
|
case strings.HasPrefix(name, "gpt-4"):
|
||||||
|
switch {
|
||||||
|
case strings.HasPrefix(name, "gpt-4o"):
|
||||||
if name == "gpt-4o-2024-05-13" {
|
if name == "gpt-4o-2024-05-13" {
|
||||||
return 3
|
return 3
|
||||||
}
|
}
|
||||||
return 4
|
return 4
|
||||||
}
|
case strings.HasPrefix(name, "gpt-4-"):
|
||||||
if strings.HasPrefix(name, "gpt-4-turbo") ||
|
|
||||||
strings.HasSuffix(name, "preview") {
|
|
||||||
return 3
|
return 3
|
||||||
}
|
default:
|
||||||
return 2
|
return 2
|
||||||
}
|
}
|
||||||
// including o1, o1-preview, o1-mini
|
// including o1/o1-preview/o1-mini
|
||||||
if strings.HasPrefix(name, "o1") {
|
case strings.HasPrefix(name, "o1") ||
|
||||||
|
strings.HasPrefix(name, "o3"):
|
||||||
return 4
|
return 4
|
||||||
}
|
}
|
||||||
if name == "chatgpt-4o-latest" {
|
|
||||||
return 3
|
|
||||||
}
|
|
||||||
if strings.HasPrefix(name, "claude-3") {
|
if strings.HasPrefix(name, "claude-3") {
|
||||||
return 5
|
return 5
|
||||||
}
|
}
|
||||||
@ -810,10 +939,7 @@ func GetCompletionRatio(name string, channelType int) float64 {
|
|||||||
return 3
|
return 3
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(name, "gemini-") {
|
if strings.HasPrefix(name, "gemini-") {
|
||||||
return 3
|
return 4
|
||||||
}
|
|
||||||
if strings.HasPrefix(name, "deepseek-") {
|
|
||||||
return 2
|
|
||||||
}
|
}
|
||||||
|
|
||||||
switch name {
|
switch name {
|
||||||
@ -861,5 +987,6 @@ func GetCompletionRatio(name string, channelType int) float64 {
|
|||||||
return 1.000 / 0.300 // ≈3.333333
|
return 1.000 / 0.300 // ≈3.333333
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger.SysWarn(fmt.Sprintf("completion ratio not found for model: %s (channel type: %d), using default value 1", name, channelType))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user