Merge 790e1e9f5b into 6ded638f70

2025-11-11 19:03:43 +08:00 · 2025-02-02 22:23:05 +08:00
parent 6ded638f70 790e1e9f5b
commit 022988c007
4 changed files with 206 additions and 91 deletions
--- a/relay/adaptor/openai/adaptor.go
+++ b/relay/adaptor/openai/adaptor.go
@@ -8,6 +8,7 @@ import (
 	"strings"

 	"github.com/gin-gonic/gin"
+	"github.com/songquanpeng/one-api/common/config"
 	"github.com/songquanpeng/one-api/relay/adaptor"
 	"github.com/songquanpeng/one-api/relay/adaptor/doubao"
 	"github.com/songquanpeng/one-api/relay/adaptor/minimax"
@@ -82,6 +83,33 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
 		}
 		request.StreamOptions.IncludeUsage = true
 	}
+
+	// o1/o1-mini/o1-preview do not support system prompt/max_tokens/temperature
+	if strings.HasPrefix(request.Model, "o1") ||
+		strings.HasPrefix(request.Model, "o3") {
+		temperature := float64(1)
+		request.Temperature = &temperature // Only the default (1) value is supported
+
+		request.MaxTokens = 0
+		request.Messages = func(raw []model.Message) (filtered []model.Message) {
+			for i := range raw {
+				if raw[i].Role != "system" {
+					filtered = append(filtered, raw[i])
+				}
+			}
+
+			return
+		}(request.Messages)
+	}
+
+	if request.Stream && !config.EnforceIncludeUsage &&
+		(strings.HasPrefix(request.Model, "gpt-4o-audio") ||
+			strings.HasPrefix(request.Model, "gpt-4o-mini-audio")) {
+		// TODO: Since it is not clear how to implement billing in stream mode,
+		// it is temporarily not supported
+		return nil, errors.New("set ENFORCE_INCLUDE_USAGE=true to enable stream mode for gpt-4o-audio")
+	}
+
 	return request, nil
 }

--- a/relay/adaptor/openai/constants.go
+++ b/relay/adaptor/openai/constants.go
@@ -12,6 +12,8 @@ var ModelList = []string{
 	"gpt-4o-2024-11-20",
 	"chatgpt-4o-latest",
 	"gpt-4o-mini", "gpt-4o-mini-2024-07-18",
+	"gpt-4o-mini-audio-preview", "gpt-4o-mini-audio-preview-2024-12-17",
+	"gpt-4o-audio-preview", "gpt-4o-audio-preview-2024-12-17", "gpt-4o-audio-preview-2024-10-01",
 	"gpt-4-vision-preview",
 	"text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large",
 	"text-curie-001", "text-babbage-001", "text-ada-001", "text-davinci-002", "text-davinci-003",
@@ -24,4 +26,5 @@ var ModelList = []string{
 	"o1", "o1-2024-12-17",
 	"o1-preview", "o1-preview-2024-09-12",
 	"o1-mini", "o1-mini-2024-09-12",
+	"o3-mini", "o3-mini-2025-01-31",
 }
--- a/relay/billing/ratio/model.go
+++ b/relay/billing/ratio/model.go
@@ -42,6 +42,14 @@ var ModelRatio = map[string]float64{
 	"gpt-4o-mini":            0.075, // $0.00015 / 1K tokens
 	"gpt-4o-mini-2024-07-18": 0.075, // $0.00015 / 1K tokens
 	"gpt-4-vision-preview":   5,     // $0.01 / 1K tokens
+	// Audio billing will mix text and audio tokens, the unit price is different.
+	// Here records the cost of text, the cost multiplier of audio
+	// relative to text is in AudioRatio
+	"gpt-4o-audio-preview":                 1.25,             // $0.0025 / 1K tokens
+	"gpt-4o-audio-preview-2024-12-17":      1.25,             // $0.0025 / 1K tokens
+	"gpt-4o-audio-preview-2024-10-01":      1.25,             // $0.0025 / 1K tokens
+	"gpt-4o-mini-audio-preview":            0.15 * MILLI_USD, // $0.15/1M tokens
+	"gpt-4o-mini-audio-preview-2024-12-17": 0.15 * MILLI_USD, // $0.15/1M tokens
 	"gpt-3.5-turbo":                        0.25,             // $0.0005 / 1K tokens
 	"gpt-3.5-turbo-0301":                   0.75,
 	"gpt-3.5-turbo-0613":                   0.75,
@@ -56,6 +64,8 @@ var ModelRatio = map[string]float64{
 	"o1-preview-2024-09-12":                7.5,
 	"o1-mini":                              1.5, // $3.00 / 1M input tokens
 	"o1-mini-2024-09-12":                   1.5,
+	"o3-mini":                              1.1 * MILLI_USD,
+	"o3-mini-2025-01-31":                   1.1 * MILLI_USD,
 	"davinci-002":                          1,   // $0.002 / 1K tokens
 	"babbage-002":                          0.2, // $0.0004 / 1K tokens
 	"text-ada-001":                         0.2,
@@ -65,7 +75,7 @@ var ModelRatio = map[string]float64{
 	"text-davinci-003":                     10,
 	"text-davinci-edit-001":                10,
 	"code-davinci-edit-001":                10,
-	"whisper-1":               15,  // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
+	"whisper-1":                            15,
 	"tts-1":                                7.5, // $0.015 / 1K characters
 	"tts-1-1106":                           7.5,
 	"tts-1-hd":                             15, // $0.030 / 1K characters
@@ -359,6 +369,76 @@ var ModelRatio = map[string]float64{
 	"mistralai/mixtral-8x7b-instruct-v0.1":      0.300 * USD,
 }

+// AudioRatio represents the price ratio between audio tokens and text tokens
+var AudioRatio = map[string]float64{
+	"gpt-4o-audio-preview":                 16,
+	"gpt-4o-audio-preview-2024-12-17":      16,
+	"gpt-4o-audio-preview-2024-10-01":      40,
+	"gpt-4o-mini-audio-preview":            10 / 0.15,
+	"gpt-4o-mini-audio-preview-2024-12-17": 10 / 0.15,
+}
+
+// GetAudioPromptRatio returns the audio prompt ratio for the given model.
+func GetAudioPromptRatio(actualModelName string) float64 {
+	var v float64
+	if ratio, ok := AudioRatio[actualModelName]; ok {
+		v = ratio
+	} else {
+		v = 16
+	}
+
+	return v
+}
+
+// AudioCompletionRatio is the completion ratio for audio models.
+var AudioCompletionRatio = map[string]float64{
+	"whisper-1":                            0,
+	"gpt-4o-audio-preview":                 2,
+	"gpt-4o-audio-preview-2024-12-17":      2,
+	"gpt-4o-audio-preview-2024-10-01":      2,
+	"gpt-4o-mini-audio-preview":            2,
+	"gpt-4o-mini-audio-preview-2024-12-17": 2,
+}
+
+// GetAudioCompletionRatio returns the completion ratio for audio models.
+func GetAudioCompletionRatio(actualModelName string) float64 {
+	var v float64
+	if ratio, ok := AudioCompletionRatio[actualModelName]; ok {
+		v = ratio
+	} else {
+		v = 2
+	}
+
+	return v
+}
+
+// AudioTokensPerSecond is the number of audio tokens per second for each model.
+var AudioPromptTokensPerSecond = map[string]float64{
+	// Whisper API price is $0.0001/sec. One-api's historical ratio is 15,
+	// corresponding to $0.03/kilo_tokens.
+	// After conversion, tokens per second should be 0.0001/0.03*1000 = 3.3333.
+	"whisper-1": 0.0001 / 0.03 * 1000,
+	// gpt-4o-audio series processes 10 tokens per second
+	"gpt-4o-audio-preview":                 10,
+	"gpt-4o-audio-preview-2024-12-17":      10,
+	"gpt-4o-audio-preview-2024-10-01":      10,
+	"gpt-4o-mini-audio-preview":            10,
+	"gpt-4o-mini-audio-preview-2024-12-17": 10,
+}
+
+// GetAudioPromptTokensPerSecond returns the number of audio tokens per second
+// for the given model.
+func GetAudioPromptTokensPerSecond(actualModelName string) float64 {
+	var v float64
+	if tokensPerSecond, ok := AudioPromptTokensPerSecond[actualModelName]; ok {
+		v = tokensPerSecond
+	} else {
+		v = 10
+	}
+
+	return v
+}
+
 var CompletionRatio = map[string]float64{
 	// aws llama3
 	"llama3-8b-8192(33)":  0.0006 / 0.0003,
@@ -497,8 +577,9 @@ func GetCompletionRatio(name string, channelType int) float64 {
 		}
 		return 2
 	}
-	// including o1, o1-preview, o1-mini
-	if strings.HasPrefix(name, "o1") {
+	// including o1/o1-preview/o1-mini
+	if strings.HasPrefix(name, "o1") ||
+		strings.HasPrefix(name, "o3") {
 		return 4
 	}
 	if name == "chatgpt-4o-latest" {
--- a/relay/model/general.go
+++ b/relay/model/general.go
@@ -34,6 +34,9 @@ type GeneralOpenAIRequest struct {
 	MaxTokens           int       `json:"max_tokens,omitempty"`
 	MaxCompletionTokens *int      `json:"max_completion_tokens,omitempty"`
 	N                   int       `json:"n,omitempty"`
+	// ReasoningEffort constrains effort on reasoning for reasoning models, reasoning models only.
+	ReasoningEffort string `json:"reasoning_effort,omitempty" binding:"omitempty,oneof=low medium high"`
+	// Modalities currently the model only programmatically allows modalities = [“text”, “audio”]
 	Modalities       []string        `json:"modalities,omitempty"`
 	Prediction       any             `json:"prediction,omitempty"`
 	Audio            *Audio          `json:"audio,omitempty"`