feat: extend support for o3 models and update model ratios

2025-11-11 02:43:44 +08:00 · 2025-02-01 11:44:04 +00:00
parent f47c70aaa2
commit 0e19d60769
4 changed files with 70 additions and 56 deletions
--- a/relay/adaptor/openai/adaptor.go
+++ b/relay/adaptor/openai/adaptor.go
@@ -86,7 +86,8 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
 	}
 	// o1/o1-mini/o1-preview do not support system prompt/max_tokens/temperature
-	if strings.HasPrefix(request.Model, "o1") {
+	if strings.HasPrefix(request.Model, "o1") ||
 		strings.HasPrefix(request.Model, "o3") {
 		temperature := float64(1)
 		request.Temperature = &temperature // Only the default (1) value is supported
--- a/relay/adaptor/openai/constants.go
+++ b/relay/adaptor/openai/constants.go
@@ -9,6 +9,7 @@ var ModelList = []string{
 	"gpt-4-turbo-preview", "gpt-4-turbo", "gpt-4-turbo-2024-04-09",
 	"gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-2024-08-06", "gpt-4o-2024-11-20", "chatgpt-4o-latest",
 	"gpt-4o-mini", "gpt-4o-mini-2024-07-18",
 	"gpt-4o-mini-audio-preview", "gpt-4o-mini-audio-preview-2024-12-17",
 	"gpt-4o-audio-preview", "gpt-4o-audio-preview-2024-12-17", "gpt-4o-audio-preview-2024-10-01",
 	"gpt-4-vision-preview",
 	"text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large",
@@ -22,4 +23,5 @@ var ModelList = []string{
 	"o1", "o1-2024-12-17",
 	"o1-preview", "o1-preview-2024-09-12",
 	"o1-mini", "o1-mini-2024-09-12",
 	"o3-mini", "o3-mini-2025-01-31",
 }
--- a/relay/billing/ratio/model.go
+++ b/relay/billing/ratio/model.go
@@ -45,49 +45,53 @@ var ModelRatio = map[string]float64{
 	// Audio billing will mix text and audio tokens, the unit price is different.
 	// Here records the cost of text, the cost multiplier of audio
 	// relative to text is in AudioRatio
-	"gpt-4o-audio-preview":            1.25, // $0.0025 / 1K tokens
+	"gpt-4o-audio-preview":                 1.25,             // $0.0025 / 1K tokens
-	"gpt-4o-audio-preview-2024-12-17": 1.25, // $0.0025 / 1K tokens
+	"gpt-4o-audio-preview-2024-12-17":      1.25,             // $0.0025 / 1K tokens
-	"gpt-4o-audio-preview-2024-10-01": 1.25, // $0.0025 / 1K tokens
+	"gpt-4o-audio-preview-2024-10-01":      1.25,             // $0.0025 / 1K tokens
-	"gpt-3.5-turbo":                   0.25, // $0.0005 / 1K tokens
+	"gpt-4o-mini-audio-preview":            0.15 * MILLI_USD, // $0.15/1M tokens
-	"gpt-3.5-turbo-0301":              0.75,
+	"gpt-4o-mini-audio-preview-2024-12-17": 0.15 * MILLI_USD, // $0.15/1M tokens
-	"gpt-3.5-turbo-0613":              0.75,
+	"gpt-3.5-turbo":                        0.25,             // $0.0005 / 1K tokens
-	"gpt-3.5-turbo-16k":               1.5, // $0.003 / 1K tokens
+	"gpt-3.5-turbo-0301":                   0.75,
-	"gpt-3.5-turbo-16k-0613":          1.5,
+	"gpt-3.5-turbo-0613":                   0.75,
-	"gpt-3.5-turbo-instruct":          0.75, // $0.0015 / 1K tokens
+	"gpt-3.5-turbo-16k":                    1.5, // $0.003 / 1K tokens
-	"gpt-3.5-turbo-1106":              0.5,  // $0.001 / 1K tokens
+	"gpt-3.5-turbo-16k-0613":               1.5,
-	"gpt-3.5-turbo-0125":              0.25, // $0.0005 / 1K tokens
+	"gpt-3.5-turbo-instruct":               0.75, // $0.0015 / 1K tokens
-	"o1":                              7.5,  // $15.00 / 1M input tokens
+	"gpt-3.5-turbo-1106":                   0.5,  // $0.001 / 1K tokens
-	"o1-2024-12-17":                   7.5,
+	"gpt-3.5-turbo-0125":                   0.25, // $0.0005 / 1K tokens
-	"o1-preview":                      7.5, // $15.00 / 1M input tokens
+	"o1":                                   7.5,  // $15.00 / 1M input tokens
-	"o1-preview-2024-09-12":           7.5,
+	"o1-2024-12-17":                        7.5,
-	"o1-mini":                         1.5, // $3.00 / 1M input tokens
+	"o1-preview":                           7.5, // $15.00 / 1M input tokens
-	"o1-mini-2024-09-12":              1.5,
+	"o1-preview-2024-09-12":                7.5,
-	"davinci-002":                     1,   // $0.002 / 1K tokens
+	"o1-mini":                              1.5, // $3.00 / 1M input tokens
-	"babbage-002":                     0.2, // $0.0004 / 1K tokens
+	"o1-mini-2024-09-12":                   1.5,
-	"text-ada-001":                    0.2,
+	"o3-mini":                              1.1 * MILLI_USD,
-	"text-babbage-001":                0.25,
+	"o3-mini-2025-01-31":                   1.1 * MILLI_USD,
-	"text-curie-001":                  1,
+	"davinci-002":                          1,   // $0.002 / 1K tokens
-	"text-davinci-002":                10,
+	"babbage-002":                          0.2, // $0.0004 / 1K tokens
-	"text-davinci-003":                10,
+	"text-ada-001":                         0.2,
-	"text-davinci-edit-001":           10,
+	"text-babbage-001":                     0.25,
-	"code-davinci-edit-001":           10,
+	"text-curie-001":                       1,
-	"whisper-1":                       15,
+	"text-davinci-002":                     10,
-	"tts-1":                           7.5, // $0.015 / 1K characters
+	"text-davinci-003":                     10,
-	"tts-1-1106":                      7.5,
+	"text-davinci-edit-001":                10,
-	"tts-1-hd":                        15, // $0.030 / 1K characters
+	"code-davinci-edit-001":                10,
-	"tts-1-hd-1106":                   15,
+	"whisper-1":                            15,
-	"davinci":                         10,
+	"tts-1":                                7.5, // $0.015 / 1K characters
-	"curie":                           10,
+	"tts-1-1106":                           7.5,
-	"babbage":                         10,
+	"tts-1-hd":                             15, // $0.030 / 1K characters
-	"ada":                             10,
+	"tts-1-hd-1106":                        15,
-	"text-embedding-ada-002":          0.05,
+	"davinci":                              10,
-	"text-embedding-3-small":          0.01,
+	"curie":                                10,
-	"text-embedding-3-large":          0.065,
+	"babbage":                              10,
-	"text-search-ada-doc-001":         10,
+	"ada":                                  10,
-	"text-moderation-stable":          0.1,
+	"text-embedding-ada-002":               0.05,
-	"text-moderation-latest":          0.1,
+	"text-embedding-3-small":               0.01,
-	"dall-e-2":                        0.02 * USD, // $0.016 - $0.020 / image
+	"text-embedding-3-large":               0.065,
-	"dall-e-3":                        0.04 * USD, // $0.040 - $0.120 / image
+	"text-search-ada-doc-001":              10,
 	"text-moderation-stable":               0.1,
 	"text-moderation-latest":               0.1,
 	"dall-e-2":                             0.02 * USD, // $0.016 - $0.020 / image
 	"dall-e-3":                             0.04 * USD, // $0.040 - $0.120 / image
 	// https://www.anthropic.com/api#pricing
 	"claude-instant-1.2":         0.8 / 1000 * USD,
 	"claude-2.0":                 8.0 / 1000 * USD,
@@ -345,9 +349,11 @@ var ModelRatio = map[string]float64{
 // AudioRatio represents the price ratio between audio tokens and text tokens
 var AudioRatio = map[string]float64{
-	"gpt-4o-audio-preview":            16,
+	"gpt-4o-audio-preview":                 16,
-	"gpt-4o-audio-preview-2024-12-17": 16,
+	"gpt-4o-audio-preview-2024-12-17":      16,
-	"gpt-4o-audio-preview-2024-10-01": 40,
+	"gpt-4o-audio-preview-2024-10-01":      40,
 	"gpt-4o-mini-audio-preview":            10 / 0.15,
 	"gpt-4o-mini-audio-preview-2024-12-17": 10 / 0.15,
 }
 // GetAudioPromptRatio returns the audio prompt ratio for the given model.
@@ -364,10 +370,12 @@ func GetAudioPromptRatio(actualModelName string) float64 {
 // AudioCompletionRatio is the completion ratio for audio models.
 var AudioCompletionRatio = map[string]float64{
-	"whisper-1":                       0,
+	"whisper-1":                            0,
-	"gpt-4o-audio-preview":            2,
+	"gpt-4o-audio-preview":                 2,
-	"gpt-4o-audio-preview-2024-12-17": 2,
+	"gpt-4o-audio-preview-2024-12-17":      2,
-	"gpt-4o-audio-preview-2024-10-01": 2,
+	"gpt-4o-audio-preview-2024-10-01":      2,
 	"gpt-4o-mini-audio-preview":            2,
 	"gpt-4o-mini-audio-preview-2024-12-17": 2,
 }
 // GetAudioCompletionRatio returns the completion ratio for audio models.
@@ -389,9 +397,11 @@ var AudioPromptTokensPerSecond = map[string]float64{
 	// After conversion, tokens per second should be 0.0001/0.03*1000 = 3.3333.
 	"whisper-1": 0.0001 / 0.03 * 1000,
 	// gpt-4o-audio series processes 10 tokens per second
-	"gpt-4o-audio-preview":            10,
+	"gpt-4o-audio-preview":                 10,
-	"gpt-4o-audio-preview-2024-12-17": 10,
+	"gpt-4o-audio-preview-2024-12-17":      10,
-	"gpt-4o-audio-preview-2024-10-01": 10,
+	"gpt-4o-audio-preview-2024-10-01":      10,
 	"gpt-4o-mini-audio-preview":            10,
 	"gpt-4o-mini-audio-preview-2024-12-17": 10,
 }
 // GetAudioPromptTokensPerSecond returns the number of audio tokens per second
@@ -549,7 +559,8 @@ func GetCompletionRatio(name string, channelType int) float64 {
 		return 2
 	}
 	// including o1/o1-preview/o1-mini
-	if strings.HasPrefix(name, "o1") {
+	if strings.HasPrefix(name, "o1") ||
 		strings.HasPrefix(name, "o3") {
 		return 4
 	}
 	if name == "chatgpt-4o-latest" {
--- a/relay/model/general.go
+++ b/relay/model/general.go
@@ -34,7 +34,7 @@ type GeneralOpenAIRequest struct {
 	MaxTokens           int       `json:"max_tokens,omitempty"`
 	MaxCompletionTokens *int      `json:"max_completion_tokens,omitempty"`
 	N                   int       `json:"n,omitempty"`
-	// ReasoningEffort constrains effort on reasoning for reasoning models, o1 models only.
+	// ReasoningEffort constrains effort on reasoning for reasoning models, reasoning models only.
 	ReasoningEffort string `json:"reasoning_effort,omitempty" binding:"omitempty,oneof=low medium high"`
 	// Modalities currently the model only programmatically allows modalities = [“text”, “audio”]
 	Modalities       []string        `json:"modalities,omitempty"`