diff --git a/relay/adaptor/openai/adaptor.go b/relay/adaptor/openai/adaptor.go index 6946e402..55eb5a4b 100644 --- a/relay/adaptor/openai/adaptor.go +++ b/relay/adaptor/openai/adaptor.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/gin-gonic/gin" + "github.com/songquanpeng/one-api/common/config" "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/doubao" "github.com/songquanpeng/one-api/relay/adaptor/minimax" @@ -82,6 +83,33 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G } request.StreamOptions.IncludeUsage = true } + + // o1/o1-mini/o1-preview do not support system prompt/max_tokens/temperature + if strings.HasPrefix(request.Model, "o1") || + strings.HasPrefix(request.Model, "o3") { + temperature := float64(1) + request.Temperature = &temperature // Only the default (1) value is supported + + request.MaxTokens = 0 + request.Messages = func(raw []model.Message) (filtered []model.Message) { + for i := range raw { + if raw[i].Role != "system" { + filtered = append(filtered, raw[i]) + } + } + + return + }(request.Messages) + } + + if request.Stream && !config.EnforceIncludeUsage && + (strings.HasPrefix(request.Model, "gpt-4o-audio") || + strings.HasPrefix(request.Model, "gpt-4o-mini-audio")) { + // TODO: Since it is not clear how to implement billing in stream mode, + // it is temporarily not supported + return nil, errors.New("set ENFORCE_INCLUDE_USAGE=true to enable stream mode for gpt-4o-audio") + } + return request, nil } diff --git a/relay/adaptor/openai/constants.go b/relay/adaptor/openai/constants.go index 8a643bc6..f59e075c 100644 --- a/relay/adaptor/openai/constants.go +++ b/relay/adaptor/openai/constants.go @@ -12,6 +12,8 @@ var ModelList = []string{ "gpt-4o-2024-11-20", "chatgpt-4o-latest", "gpt-4o-mini", "gpt-4o-mini-2024-07-18", + "gpt-4o-mini-audio-preview", "gpt-4o-mini-audio-preview-2024-12-17", + "gpt-4o-audio-preview", "gpt-4o-audio-preview-2024-12-17", "gpt-4o-audio-preview-2024-10-01", "gpt-4-vision-preview", "text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large", "text-curie-001", "text-babbage-001", "text-ada-001", "text-davinci-002", "text-davinci-003", @@ -24,4 +26,5 @@ var ModelList = []string{ "o1", "o1-2024-12-17", "o1-preview", "o1-preview-2024-09-12", "o1-mini", "o1-mini-2024-09-12", + "o3-mini", "o3-mini-2025-01-31", } diff --git a/relay/billing/ratio/model.go b/relay/billing/ratio/model.go index 7fe08506..e631e64d 100644 --- a/relay/billing/ratio/model.go +++ b/relay/billing/ratio/model.go @@ -23,65 +23,75 @@ const ( // 1 === ¥0.014 / 1k tokens var ModelRatio = map[string]float64{ // https://openai.com/pricing - "gpt-4": 15, - "gpt-4-0314": 15, - "gpt-4-0613": 15, - "gpt-4-32k": 30, - "gpt-4-32k-0314": 30, - "gpt-4-32k-0613": 30, - "gpt-4-1106-preview": 5, // $0.01 / 1K tokens - "gpt-4-0125-preview": 5, // $0.01 / 1K tokens - "gpt-4-turbo-preview": 5, // $0.01 / 1K tokens - "gpt-4-turbo": 5, // $0.01 / 1K tokens - "gpt-4-turbo-2024-04-09": 5, // $0.01 / 1K tokens - "gpt-4o": 2.5, // $0.005 / 1K tokens - "chatgpt-4o-latest": 2.5, // $0.005 / 1K tokens - "gpt-4o-2024-05-13": 2.5, // $0.005 / 1K tokens - "gpt-4o-2024-08-06": 1.25, // $0.0025 / 1K tokens - "gpt-4o-2024-11-20": 1.25, // $0.0025 / 1K tokens - "gpt-4o-mini": 0.075, // $0.00015 / 1K tokens - "gpt-4o-mini-2024-07-18": 0.075, // $0.00015 / 1K tokens - "gpt-4-vision-preview": 5, // $0.01 / 1K tokens - "gpt-3.5-turbo": 0.25, // $0.0005 / 1K tokens - "gpt-3.5-turbo-0301": 0.75, - "gpt-3.5-turbo-0613": 0.75, - "gpt-3.5-turbo-16k": 1.5, // $0.003 / 1K tokens - "gpt-3.5-turbo-16k-0613": 1.5, - "gpt-3.5-turbo-instruct": 0.75, // $0.0015 / 1K tokens - "gpt-3.5-turbo-1106": 0.5, // $0.001 / 1K tokens - "gpt-3.5-turbo-0125": 0.25, // $0.0005 / 1K tokens - "o1": 7.5, // $15.00 / 1M input tokens - "o1-2024-12-17": 7.5, - "o1-preview": 7.5, // $15.00 / 1M input tokens - "o1-preview-2024-09-12": 7.5, - "o1-mini": 1.5, // $3.00 / 1M input tokens - "o1-mini-2024-09-12": 1.5, - "davinci-002": 1, // $0.002 / 1K tokens - "babbage-002": 0.2, // $0.0004 / 1K tokens - "text-ada-001": 0.2, - "text-babbage-001": 0.25, - "text-curie-001": 1, - "text-davinci-002": 10, - "text-davinci-003": 10, - "text-davinci-edit-001": 10, - "code-davinci-edit-001": 10, - "whisper-1": 15, // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens - "tts-1": 7.5, // $0.015 / 1K characters - "tts-1-1106": 7.5, - "tts-1-hd": 15, // $0.030 / 1K characters - "tts-1-hd-1106": 15, - "davinci": 10, - "curie": 10, - "babbage": 10, - "ada": 10, - "text-embedding-ada-002": 0.05, - "text-embedding-3-small": 0.01, - "text-embedding-3-large": 0.065, - "text-search-ada-doc-001": 10, - "text-moderation-stable": 0.1, - "text-moderation-latest": 0.1, - "dall-e-2": 0.02 * USD, // $0.016 - $0.020 / image - "dall-e-3": 0.04 * USD, // $0.040 - $0.120 / image + "gpt-4": 15, + "gpt-4-0314": 15, + "gpt-4-0613": 15, + "gpt-4-32k": 30, + "gpt-4-32k-0314": 30, + "gpt-4-32k-0613": 30, + "gpt-4-1106-preview": 5, // $0.01 / 1K tokens + "gpt-4-0125-preview": 5, // $0.01 / 1K tokens + "gpt-4-turbo-preview": 5, // $0.01 / 1K tokens + "gpt-4-turbo": 5, // $0.01 / 1K tokens + "gpt-4-turbo-2024-04-09": 5, // $0.01 / 1K tokens + "gpt-4o": 2.5, // $0.005 / 1K tokens + "chatgpt-4o-latest": 2.5, // $0.005 / 1K tokens + "gpt-4o-2024-05-13": 2.5, // $0.005 / 1K tokens + "gpt-4o-2024-08-06": 1.25, // $0.0025 / 1K tokens + "gpt-4o-2024-11-20": 1.25, // $0.0025 / 1K tokens + "gpt-4o-mini": 0.075, // $0.00015 / 1K tokens + "gpt-4o-mini-2024-07-18": 0.075, // $0.00015 / 1K tokens + "gpt-4-vision-preview": 5, // $0.01 / 1K tokens + // Audio billing will mix text and audio tokens, the unit price is different. + // Here records the cost of text, the cost multiplier of audio + // relative to text is in AudioRatio + "gpt-4o-audio-preview": 1.25, // $0.0025 / 1K tokens + "gpt-4o-audio-preview-2024-12-17": 1.25, // $0.0025 / 1K tokens + "gpt-4o-audio-preview-2024-10-01": 1.25, // $0.0025 / 1K tokens + "gpt-4o-mini-audio-preview": 0.15 * MILLI_USD, // $0.15/1M tokens + "gpt-4o-mini-audio-preview-2024-12-17": 0.15 * MILLI_USD, // $0.15/1M tokens + "gpt-3.5-turbo": 0.25, // $0.0005 / 1K tokens + "gpt-3.5-turbo-0301": 0.75, + "gpt-3.5-turbo-0613": 0.75, + "gpt-3.5-turbo-16k": 1.5, // $0.003 / 1K tokens + "gpt-3.5-turbo-16k-0613": 1.5, + "gpt-3.5-turbo-instruct": 0.75, // $0.0015 / 1K tokens + "gpt-3.5-turbo-1106": 0.5, // $0.001 / 1K tokens + "gpt-3.5-turbo-0125": 0.25, // $0.0005 / 1K tokens + "o1": 7.5, // $15.00 / 1M input tokens + "o1-2024-12-17": 7.5, + "o1-preview": 7.5, // $15.00 / 1M input tokens + "o1-preview-2024-09-12": 7.5, + "o1-mini": 1.5, // $3.00 / 1M input tokens + "o1-mini-2024-09-12": 1.5, + "o3-mini": 1.1 * MILLI_USD, + "o3-mini-2025-01-31": 1.1 * MILLI_USD, + "davinci-002": 1, // $0.002 / 1K tokens + "babbage-002": 0.2, // $0.0004 / 1K tokens + "text-ada-001": 0.2, + "text-babbage-001": 0.25, + "text-curie-001": 1, + "text-davinci-002": 10, + "text-davinci-003": 10, + "text-davinci-edit-001": 10, + "code-davinci-edit-001": 10, + "whisper-1": 15, + "tts-1": 7.5, // $0.015 / 1K characters + "tts-1-1106": 7.5, + "tts-1-hd": 15, // $0.030 / 1K characters + "tts-1-hd-1106": 15, + "davinci": 10, + "curie": 10, + "babbage": 10, + "ada": 10, + "text-embedding-ada-002": 0.05, + "text-embedding-3-small": 0.01, + "text-embedding-3-large": 0.065, + "text-search-ada-doc-001": 10, + "text-moderation-stable": 0.1, + "text-moderation-latest": 0.1, + "dall-e-2": 0.02 * USD, // $0.016 - $0.020 / image + "dall-e-3": 0.04 * USD, // $0.040 - $0.120 / image // https://www.anthropic.com/api#pricing "claude-instant-1.2": 0.8 / 1000 * USD, "claude-2.0": 8.0 / 1000 * USD, @@ -335,6 +345,76 @@ var ModelRatio = map[string]float64{ "mistralai/mixtral-8x7b-instruct-v0.1": 0.300 * USD, } +// AudioRatio represents the price ratio between audio tokens and text tokens +var AudioRatio = map[string]float64{ + "gpt-4o-audio-preview": 16, + "gpt-4o-audio-preview-2024-12-17": 16, + "gpt-4o-audio-preview-2024-10-01": 40, + "gpt-4o-mini-audio-preview": 10 / 0.15, + "gpt-4o-mini-audio-preview-2024-12-17": 10 / 0.15, +} + +// GetAudioPromptRatio returns the audio prompt ratio for the given model. +func GetAudioPromptRatio(actualModelName string) float64 { + var v float64 + if ratio, ok := AudioRatio[actualModelName]; ok { + v = ratio + } else { + v = 16 + } + + return v +} + +// AudioCompletionRatio is the completion ratio for audio models. +var AudioCompletionRatio = map[string]float64{ + "whisper-1": 0, + "gpt-4o-audio-preview": 2, + "gpt-4o-audio-preview-2024-12-17": 2, + "gpt-4o-audio-preview-2024-10-01": 2, + "gpt-4o-mini-audio-preview": 2, + "gpt-4o-mini-audio-preview-2024-12-17": 2, +} + +// GetAudioCompletionRatio returns the completion ratio for audio models. +func GetAudioCompletionRatio(actualModelName string) float64 { + var v float64 + if ratio, ok := AudioCompletionRatio[actualModelName]; ok { + v = ratio + } else { + v = 2 + } + + return v +} + +// AudioTokensPerSecond is the number of audio tokens per second for each model. +var AudioPromptTokensPerSecond = map[string]float64{ + // Whisper API price is $0.0001/sec. One-api's historical ratio is 15, + // corresponding to $0.03/kilo_tokens. + // After conversion, tokens per second should be 0.0001/0.03*1000 = 3.3333. + "whisper-1": 0.0001 / 0.03 * 1000, + // gpt-4o-audio series processes 10 tokens per second + "gpt-4o-audio-preview": 10, + "gpt-4o-audio-preview-2024-12-17": 10, + "gpt-4o-audio-preview-2024-10-01": 10, + "gpt-4o-mini-audio-preview": 10, + "gpt-4o-mini-audio-preview-2024-12-17": 10, +} + +// GetAudioPromptTokensPerSecond returns the number of audio tokens per second +// for the given model. +func GetAudioPromptTokensPerSecond(actualModelName string) float64 { + var v float64 + if tokensPerSecond, ok := AudioPromptTokensPerSecond[actualModelName]; ok { + v = tokensPerSecond + } else { + v = 10 + } + + return v +} + var CompletionRatio = map[string]float64{ // aws llama3 "llama3-8b-8192(33)": 0.0006 / 0.0003, @@ -473,8 +553,9 @@ func GetCompletionRatio(name string, channelType int) float64 { } return 2 } - // including o1, o1-preview, o1-mini - if strings.HasPrefix(name, "o1") { + // including o1/o1-preview/o1-mini + if strings.HasPrefix(name, "o1") || + strings.HasPrefix(name, "o3") { return 4 } if name == "chatgpt-4o-latest" { diff --git a/relay/model/general.go b/relay/model/general.go index 288c07ff..3d940dd3 100644 --- a/relay/model/general.go +++ b/relay/model/general.go @@ -23,36 +23,39 @@ type StreamOptions struct { type GeneralOpenAIRequest struct { // https://platform.openai.com/docs/api-reference/chat/create - Messages []Message `json:"messages,omitempty"` - Model string `json:"model,omitempty"` - Store *bool `json:"store,omitempty"` - Metadata any `json:"metadata,omitempty"` - FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` - LogitBias any `json:"logit_bias,omitempty"` - Logprobs *bool `json:"logprobs,omitempty"` - TopLogprobs *int `json:"top_logprobs,omitempty"` - MaxTokens int `json:"max_tokens,omitempty"` - MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"` - N int `json:"n,omitempty"` - Modalities []string `json:"modalities,omitempty"` - Prediction any `json:"prediction,omitempty"` - Audio *Audio `json:"audio,omitempty"` - PresencePenalty *float64 `json:"presence_penalty,omitempty"` - ResponseFormat *ResponseFormat `json:"response_format,omitempty"` - Seed float64 `json:"seed,omitempty"` - ServiceTier *string `json:"service_tier,omitempty"` - Stop any `json:"stop,omitempty"` - Stream bool `json:"stream,omitempty"` - StreamOptions *StreamOptions `json:"stream_options,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopP *float64 `json:"top_p,omitempty"` - TopK int `json:"top_k,omitempty"` - Tools []Tool `json:"tools,omitempty"` - ToolChoice any `json:"tool_choice,omitempty"` - ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"` - User string `json:"user,omitempty"` - FunctionCall any `json:"function_call,omitempty"` - Functions any `json:"functions,omitempty"` + Messages []Message `json:"messages,omitempty"` + Model string `json:"model,omitempty"` + Store *bool `json:"store,omitempty"` + Metadata any `json:"metadata,omitempty"` + FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` + LogitBias any `json:"logit_bias,omitempty"` + Logprobs *bool `json:"logprobs,omitempty"` + TopLogprobs *int `json:"top_logprobs,omitempty"` + MaxTokens int `json:"max_tokens,omitempty"` + MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"` + N int `json:"n,omitempty"` + // ReasoningEffort constrains effort on reasoning for reasoning models, reasoning models only. + ReasoningEffort string `json:"reasoning_effort,omitempty" binding:"omitempty,oneof=low medium high"` + // Modalities currently the model only programmatically allows modalities = [“text”, “audio”] + Modalities []string `json:"modalities,omitempty"` + Prediction any `json:"prediction,omitempty"` + Audio *Audio `json:"audio,omitempty"` + PresencePenalty *float64 `json:"presence_penalty,omitempty"` + ResponseFormat *ResponseFormat `json:"response_format,omitempty"` + Seed float64 `json:"seed,omitempty"` + ServiceTier *string `json:"service_tier,omitempty"` + Stop any `json:"stop,omitempty"` + Stream bool `json:"stream,omitempty"` + StreamOptions *StreamOptions `json:"stream_options,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopK int `json:"top_k,omitempty"` + Tools []Tool `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` + ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"` + User string `json:"user,omitempty"` + FunctionCall any `json:"function_call,omitempty"` + Functions any `json:"functions,omitempty"` // https://platform.openai.com/docs/api-reference/embeddings/create Input any `json:"input,omitempty"` EncodingFormat string `json:"encoding_format,omitempty"`