diff --git a/common/model-ratio.go b/common/model-ratio.go index 7c6a012..63d3247 100644 --- a/common/model-ratio.go +++ b/common/model-ratio.go @@ -3,7 +3,6 @@ package common import ( "encoding/json" "strings" - "time" ) // modelRatio @@ -27,7 +26,7 @@ var DefaultModelRatio = map[string]float64{ "gpt-4-turbo-preview": 5, // $0.01 / 1K tokens "gpt-4-vision-preview": 5, // $0.01 / 1K tokens "gpt-4-1106-vision-preview": 5, // $0.01 / 1K tokens - "gpt-3.5-turbo": 0.75, // $0.0015 / 1K tokens + "gpt-3.5-turbo": 0.25, // $0.0015 / 1K tokens "gpt-3.5-turbo-0301": 0.75, "gpt-3.5-turbo-0613": 0.75, "gpt-3.5-turbo-16k": 1.5, // $0.003 / 1K tokens @@ -188,22 +187,15 @@ func GetModelRatio(name string) float64 { func GetCompletionRatio(name string) float64 { if strings.HasPrefix(name, "gpt-3.5") { - if strings.HasSuffix(name, "0125") { + if name == "gpt-3.5-turbo" || strings.HasSuffix(name, "0125") { + // https://openai.com/blog/new-embedding-models-and-api-updates + // Updated GPT-3.5 Turbo model and lower pricing return 3 } if strings.HasSuffix(name, "1106") { return 2 } - if name == "gpt-3.5-turbo" || name == "gpt-3.5-turbo-16k" { - // TODO: clear this after 2023-12-11 - now := time.Now() - // https://platform.openai.com/docs/models/continuous-model-upgrades - // if after 2023-12-11, use 2 - if now.After(time.Date(2023, 12, 11, 0, 0, 0, 0, time.UTC)) { - return 2 - } - } - return 1.333333 + return 4.0 / 3.0 } if strings.HasPrefix(name, "gpt-4") { if strings.HasSuffix(name, "preview") { @@ -218,5 +210,15 @@ func GetCompletionRatio(name string) float64 { } else if strings.HasPrefix(name, "claude-3") { return 5 } + if strings.HasPrefix(name, "mistral-") { + return 3 + } + if strings.HasPrefix(name, "gemini-") { + return 3 + } + switch name { + case "llama2-70b-4096": + return 0.8 / 0.7 + } return 1 }