diff --git a/relay/adaptor/openai/adaptor.go b/relay/adaptor/openai/adaptor.go index e1f80161..fc5cf4c6 100644 --- a/relay/adaptor/openai/adaptor.go +++ b/relay/adaptor/openai/adaptor.go @@ -86,7 +86,8 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G } // o1/o1-mini/o1-preview do not support system prompt/max_tokens/temperature - if strings.HasPrefix(request.Model, "o1") { + if strings.HasPrefix(request.Model, "o1") || + strings.HasPrefix(request.Model, "o3") { temperature := float64(1) request.Temperature = &temperature // Only the default (1) value is supported diff --git a/relay/adaptor/openai/constants.go b/relay/adaptor/openai/constants.go index 7f116e28..735f3374 100644 --- a/relay/adaptor/openai/constants.go +++ b/relay/adaptor/openai/constants.go @@ -9,6 +9,7 @@ var ModelList = []string{ "gpt-4-turbo-preview", "gpt-4-turbo", "gpt-4-turbo-2024-04-09", "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-2024-08-06", "gpt-4o-2024-11-20", "chatgpt-4o-latest", "gpt-4o-mini", "gpt-4o-mini-2024-07-18", + "gpt-4o-mini-audio-preview", "gpt-4o-mini-audio-preview-2024-12-17", "gpt-4o-audio-preview", "gpt-4o-audio-preview-2024-12-17", "gpt-4o-audio-preview-2024-10-01", "gpt-4-vision-preview", "text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large", @@ -22,4 +23,5 @@ var ModelList = []string{ "o1", "o1-2024-12-17", "o1-preview", "o1-preview-2024-09-12", "o1-mini", "o1-mini-2024-09-12", + "o3-mini", "o3-mini-2025-01-31", } diff --git a/relay/billing/ratio/model.go b/relay/billing/ratio/model.go index cb72b383..237001ad 100644 --- a/relay/billing/ratio/model.go +++ b/relay/billing/ratio/model.go @@ -45,49 +45,53 @@ var ModelRatio = map[string]float64{ // Audio billing will mix text and audio tokens, the unit price is different. // Here records the cost of text, the cost multiplier of audio // relative to text is in AudioRatio - "gpt-4o-audio-preview": 1.25, // $0.0025 / 1K tokens - "gpt-4o-audio-preview-2024-12-17": 1.25, // $0.0025 / 1K tokens - "gpt-4o-audio-preview-2024-10-01": 1.25, // $0.0025 / 1K tokens - "gpt-3.5-turbo": 0.25, // $0.0005 / 1K tokens - "gpt-3.5-turbo-0301": 0.75, - "gpt-3.5-turbo-0613": 0.75, - "gpt-3.5-turbo-16k": 1.5, // $0.003 / 1K tokens - "gpt-3.5-turbo-16k-0613": 1.5, - "gpt-3.5-turbo-instruct": 0.75, // $0.0015 / 1K tokens - "gpt-3.5-turbo-1106": 0.5, // $0.001 / 1K tokens - "gpt-3.5-turbo-0125": 0.25, // $0.0005 / 1K tokens - "o1": 7.5, // $15.00 / 1M input tokens - "o1-2024-12-17": 7.5, - "o1-preview": 7.5, // $15.00 / 1M input tokens - "o1-preview-2024-09-12": 7.5, - "o1-mini": 1.5, // $3.00 / 1M input tokens - "o1-mini-2024-09-12": 1.5, - "davinci-002": 1, // $0.002 / 1K tokens - "babbage-002": 0.2, // $0.0004 / 1K tokens - "text-ada-001": 0.2, - "text-babbage-001": 0.25, - "text-curie-001": 1, - "text-davinci-002": 10, - "text-davinci-003": 10, - "text-davinci-edit-001": 10, - "code-davinci-edit-001": 10, - "whisper-1": 15, - "tts-1": 7.5, // $0.015 / 1K characters - "tts-1-1106": 7.5, - "tts-1-hd": 15, // $0.030 / 1K characters - "tts-1-hd-1106": 15, - "davinci": 10, - "curie": 10, - "babbage": 10, - "ada": 10, - "text-embedding-ada-002": 0.05, - "text-embedding-3-small": 0.01, - "text-embedding-3-large": 0.065, - "text-search-ada-doc-001": 10, - "text-moderation-stable": 0.1, - "text-moderation-latest": 0.1, - "dall-e-2": 0.02 * USD, // $0.016 - $0.020 / image - "dall-e-3": 0.04 * USD, // $0.040 - $0.120 / image + "gpt-4o-audio-preview": 1.25, // $0.0025 / 1K tokens + "gpt-4o-audio-preview-2024-12-17": 1.25, // $0.0025 / 1K tokens + "gpt-4o-audio-preview-2024-10-01": 1.25, // $0.0025 / 1K tokens + "gpt-4o-mini-audio-preview": 0.15 * MILLI_USD, // $0.15/1M tokens + "gpt-4o-mini-audio-preview-2024-12-17": 0.15 * MILLI_USD, // $0.15/1M tokens + "gpt-3.5-turbo": 0.25, // $0.0005 / 1K tokens + "gpt-3.5-turbo-0301": 0.75, + "gpt-3.5-turbo-0613": 0.75, + "gpt-3.5-turbo-16k": 1.5, // $0.003 / 1K tokens + "gpt-3.5-turbo-16k-0613": 1.5, + "gpt-3.5-turbo-instruct": 0.75, // $0.0015 / 1K tokens + "gpt-3.5-turbo-1106": 0.5, // $0.001 / 1K tokens + "gpt-3.5-turbo-0125": 0.25, // $0.0005 / 1K tokens + "o1": 7.5, // $15.00 / 1M input tokens + "o1-2024-12-17": 7.5, + "o1-preview": 7.5, // $15.00 / 1M input tokens + "o1-preview-2024-09-12": 7.5, + "o1-mini": 1.5, // $3.00 / 1M input tokens + "o1-mini-2024-09-12": 1.5, + "o3-mini": 1.1 * MILLI_USD, + "o3-mini-2025-01-31": 1.1 * MILLI_USD, + "davinci-002": 1, // $0.002 / 1K tokens + "babbage-002": 0.2, // $0.0004 / 1K tokens + "text-ada-001": 0.2, + "text-babbage-001": 0.25, + "text-curie-001": 1, + "text-davinci-002": 10, + "text-davinci-003": 10, + "text-davinci-edit-001": 10, + "code-davinci-edit-001": 10, + "whisper-1": 15, + "tts-1": 7.5, // $0.015 / 1K characters + "tts-1-1106": 7.5, + "tts-1-hd": 15, // $0.030 / 1K characters + "tts-1-hd-1106": 15, + "davinci": 10, + "curie": 10, + "babbage": 10, + "ada": 10, + "text-embedding-ada-002": 0.05, + "text-embedding-3-small": 0.01, + "text-embedding-3-large": 0.065, + "text-search-ada-doc-001": 10, + "text-moderation-stable": 0.1, + "text-moderation-latest": 0.1, + "dall-e-2": 0.02 * USD, // $0.016 - $0.020 / image + "dall-e-3": 0.04 * USD, // $0.040 - $0.120 / image // https://www.anthropic.com/api#pricing "claude-instant-1.2": 0.8 / 1000 * USD, "claude-2.0": 8.0 / 1000 * USD, @@ -345,9 +349,11 @@ var ModelRatio = map[string]float64{ // AudioRatio represents the price ratio between audio tokens and text tokens var AudioRatio = map[string]float64{ - "gpt-4o-audio-preview": 16, - "gpt-4o-audio-preview-2024-12-17": 16, - "gpt-4o-audio-preview-2024-10-01": 40, + "gpt-4o-audio-preview": 16, + "gpt-4o-audio-preview-2024-12-17": 16, + "gpt-4o-audio-preview-2024-10-01": 40, + "gpt-4o-mini-audio-preview": 10 / 0.15, + "gpt-4o-mini-audio-preview-2024-12-17": 10 / 0.15, } // GetAudioPromptRatio returns the audio prompt ratio for the given model. @@ -364,10 +370,12 @@ func GetAudioPromptRatio(actualModelName string) float64 { // AudioCompletionRatio is the completion ratio for audio models. var AudioCompletionRatio = map[string]float64{ - "whisper-1": 0, - "gpt-4o-audio-preview": 2, - "gpt-4o-audio-preview-2024-12-17": 2, - "gpt-4o-audio-preview-2024-10-01": 2, + "whisper-1": 0, + "gpt-4o-audio-preview": 2, + "gpt-4o-audio-preview-2024-12-17": 2, + "gpt-4o-audio-preview-2024-10-01": 2, + "gpt-4o-mini-audio-preview": 2, + "gpt-4o-mini-audio-preview-2024-12-17": 2, } // GetAudioCompletionRatio returns the completion ratio for audio models. @@ -389,9 +397,11 @@ var AudioPromptTokensPerSecond = map[string]float64{ // After conversion, tokens per second should be 0.0001/0.03*1000 = 3.3333. "whisper-1": 0.0001 / 0.03 * 1000, // gpt-4o-audio series processes 10 tokens per second - "gpt-4o-audio-preview": 10, - "gpt-4o-audio-preview-2024-12-17": 10, - "gpt-4o-audio-preview-2024-10-01": 10, + "gpt-4o-audio-preview": 10, + "gpt-4o-audio-preview-2024-12-17": 10, + "gpt-4o-audio-preview-2024-10-01": 10, + "gpt-4o-mini-audio-preview": 10, + "gpt-4o-mini-audio-preview-2024-12-17": 10, } // GetAudioPromptTokensPerSecond returns the number of audio tokens per second @@ -549,7 +559,8 @@ func GetCompletionRatio(name string, channelType int) float64 { return 2 } // including o1/o1-preview/o1-mini - if strings.HasPrefix(name, "o1") { + if strings.HasPrefix(name, "o1") || + strings.HasPrefix(name, "o3") { return 4 } if name == "chatgpt-4o-latest" { diff --git a/relay/model/general.go b/relay/model/general.go index 57a5b0b6..3d940dd3 100644 --- a/relay/model/general.go +++ b/relay/model/general.go @@ -34,7 +34,7 @@ type GeneralOpenAIRequest struct { MaxTokens int `json:"max_tokens,omitempty"` MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"` N int `json:"n,omitempty"` - // ReasoningEffort constrains effort on reasoning for reasoning models, o1 models only. + // ReasoningEffort constrains effort on reasoning for reasoning models, reasoning models only. ReasoningEffort string `json:"reasoning_effort,omitempty" binding:"omitempty,oneof=low medium high"` // Modalities currently the model only programmatically allows modalities = [“text”, “audio”] Modalities []string `json:"modalities,omitempty"`