Merge 790e1e9f5b into 6ded638f70

2025-11-11 10:53:42 +08:00 · 2025-02-02 22:23:05 +08:00
parent 6ded638f70 790e1e9f5b
commit 022988c007
4 changed files with 206 additions and 91 deletions
--- a/relay/adaptor/openai/adaptor.go
+++ b/relay/adaptor/openai/adaptor.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 	"github.com/gin-gonic/gin"
 	"github.com/songquanpeng/one-api/common/config"
 	"github.com/songquanpeng/one-api/relay/adaptor"
 	"github.com/songquanpeng/one-api/relay/adaptor/doubao"
 	"github.com/songquanpeng/one-api/relay/adaptor/minimax"
@@ -82,6 +83,33 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
 		}
 		request.StreamOptions.IncludeUsage = true
 	}
 	// o1/o1-mini/o1-preview do not support system prompt/max_tokens/temperature
 	if strings.HasPrefix(request.Model, "o1") ||
 		strings.HasPrefix(request.Model, "o3") {
 		temperature := float64(1)
 		request.Temperature = &temperature // Only the default (1) value is supported
 		request.MaxTokens = 0
 		request.Messages = func(raw []model.Message) (filtered []model.Message) {
 			for i := range raw {
 				if raw[i].Role != "system" {
 					filtered = append(filtered, raw[i])
 				}
 			}
 			return
 		}(request.Messages)
 	}
 	if request.Stream && !config.EnforceIncludeUsage &&
 		(strings.HasPrefix(request.Model, "gpt-4o-audio") ||
 			strings.HasPrefix(request.Model, "gpt-4o-mini-audio")) {
 		// TODO: Since it is not clear how to implement billing in stream mode,
 		// it is temporarily not supported
 		return nil, errors.New("set ENFORCE_INCLUDE_USAGE=true to enable stream mode for gpt-4o-audio")
 	}
 	return request, nil
 }
--- a/relay/adaptor/openai/constants.go
+++ b/relay/adaptor/openai/constants.go
@@ -12,6 +12,8 @@ var ModelList = []string{
 	"gpt-4o-2024-11-20",
 	"chatgpt-4o-latest",
 	"gpt-4o-mini", "gpt-4o-mini-2024-07-18",
 	"gpt-4o-mini-audio-preview", "gpt-4o-mini-audio-preview-2024-12-17",
 	"gpt-4o-audio-preview", "gpt-4o-audio-preview-2024-12-17", "gpt-4o-audio-preview-2024-10-01",
 	"gpt-4-vision-preview",
 	"text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large",
 	"text-curie-001", "text-babbage-001", "text-ada-001", "text-davinci-002", "text-davinci-003",
@@ -24,4 +26,5 @@ var ModelList = []string{
 	"o1", "o1-2024-12-17",
 	"o1-preview", "o1-preview-2024-09-12",
 	"o1-mini", "o1-mini-2024-09-12",
 	"o3-mini", "o3-mini-2025-01-31",
 }
--- a/relay/billing/ratio/model.go
+++ b/relay/billing/ratio/model.go
@@ -23,65 +23,75 @@ const (
 // 1 === ￥0.014 / 1k tokens
 var ModelRatio = map[string]float64{
 	// https://openai.com/pricing
-	"gpt-4":                   15,
+	"gpt-4":                  15,
-	"gpt-4-0314":              15,
+	"gpt-4-0314":             15,
-	"gpt-4-0613":              15,
+	"gpt-4-0613":             15,
-	"gpt-4-32k":               30,
+	"gpt-4-32k":              30,
-	"gpt-4-32k-0314":          30,
+	"gpt-4-32k-0314":         30,
-	"gpt-4-32k-0613":          30,
+	"gpt-4-32k-0613":         30,
-	"gpt-4-1106-preview":      5,     // $0.01 / 1K tokens
+	"gpt-4-1106-preview":     5,     // $0.01 / 1K tokens
-	"gpt-4-0125-preview":      5,     // $0.01 / 1K tokens
+	"gpt-4-0125-preview":     5,     // $0.01 / 1K tokens
-	"gpt-4-turbo-preview":     5,     // $0.01 / 1K tokens
+	"gpt-4-turbo-preview":    5,     // $0.01 / 1K tokens
-	"gpt-4-turbo":             5,     // $0.01 / 1K tokens
+	"gpt-4-turbo":            5,     // $0.01 / 1K tokens
-	"gpt-4-turbo-2024-04-09":  5,     // $0.01 / 1K tokens
+	"gpt-4-turbo-2024-04-09": 5,     // $0.01 / 1K tokens
-	"gpt-4o":                  2.5,   // $0.005 / 1K tokens
+	"gpt-4o":                 2.5,   // $0.005 / 1K tokens
-	"chatgpt-4o-latest":       2.5,   // $0.005 / 1K tokens
+	"chatgpt-4o-latest":      2.5,   // $0.005 / 1K tokens
-	"gpt-4o-2024-05-13":       2.5,   // $0.005 / 1K tokens
+	"gpt-4o-2024-05-13":      2.5,   // $0.005 / 1K tokens
-	"gpt-4o-2024-08-06":       1.25,  // $0.0025 / 1K tokens
+	"gpt-4o-2024-08-06":      1.25,  // $0.0025 / 1K tokens
-	"gpt-4o-2024-11-20":       1.25,  // $0.0025 / 1K tokens
+	"gpt-4o-2024-11-20":      1.25,  // $0.0025 / 1K tokens
-	"gpt-4o-mini":             0.075, // $0.00015 / 1K tokens
+	"gpt-4o-mini":            0.075, // $0.00015 / 1K tokens
-	"gpt-4o-mini-2024-07-18":  0.075, // $0.00015 / 1K tokens
+	"gpt-4o-mini-2024-07-18": 0.075, // $0.00015 / 1K tokens
-	"gpt-4-vision-preview":    5,     // $0.01 / 1K tokens
+	"gpt-4-vision-preview":   5,     // $0.01 / 1K tokens
-	"gpt-3.5-turbo":           0.25,  // $0.0005 / 1K tokens
+	// Audio billing will mix text and audio tokens, the unit price is different.
-	"gpt-3.5-turbo-0301":      0.75,
+	// Here records the cost of text, the cost multiplier of audio
-	"gpt-3.5-turbo-0613":      0.75,
+	// relative to text is in AudioRatio
-	"gpt-3.5-turbo-16k":       1.5, // $0.003 / 1K tokens
+	"gpt-4o-audio-preview":                 1.25,             // $0.0025 / 1K tokens
-	"gpt-3.5-turbo-16k-0613":  1.5,
+	"gpt-4o-audio-preview-2024-12-17":      1.25,             // $0.0025 / 1K tokens
-	"gpt-3.5-turbo-instruct":  0.75, // $0.0015 / 1K tokens
+	"gpt-4o-audio-preview-2024-10-01":      1.25,             // $0.0025 / 1K tokens
-	"gpt-3.5-turbo-1106":      0.5,  // $0.001 / 1K tokens
+	"gpt-4o-mini-audio-preview":            0.15 * MILLI_USD, // $0.15/1M tokens
-	"gpt-3.5-turbo-0125":      0.25, // $0.0005 / 1K tokens
+	"gpt-4o-mini-audio-preview-2024-12-17": 0.15 * MILLI_USD, // $0.15/1M tokens
-	"o1":                      7.5,  // $15.00 / 1M input tokens
+	"gpt-3.5-turbo":                        0.25,             // $0.0005 / 1K tokens
-	"o1-2024-12-17":           7.5,
+	"gpt-3.5-turbo-0301":                   0.75,
-	"o1-preview":              7.5, // $15.00 / 1M input tokens
+	"gpt-3.5-turbo-0613":                   0.75,
-	"o1-preview-2024-09-12":   7.5,
+	"gpt-3.5-turbo-16k":                    1.5, // $0.003 / 1K tokens
-	"o1-mini":                 1.5, // $3.00 / 1M input tokens
+	"gpt-3.5-turbo-16k-0613":               1.5,
-	"o1-mini-2024-09-12":      1.5,
+	"gpt-3.5-turbo-instruct":               0.75, // $0.0015 / 1K tokens
-	"davinci-002":             1,   // $0.002 / 1K tokens
+	"gpt-3.5-turbo-1106":                   0.5,  // $0.001 / 1K tokens
-	"babbage-002":             0.2, // $0.0004 / 1K tokens
+	"gpt-3.5-turbo-0125":                   0.25, // $0.0005 / 1K tokens
-	"text-ada-001":            0.2,
+	"o1":                                   7.5,  // $15.00 / 1M input tokens
-	"text-babbage-001":        0.25,
+	"o1-2024-12-17":                        7.5,
-	"text-curie-001":          1,
+	"o1-preview":                           7.5, // $15.00 / 1M input tokens
-	"text-davinci-002":        10,
+	"o1-preview-2024-09-12":                7.5,
-	"text-davinci-003":        10,
+	"o1-mini":                              1.5, // $3.00 / 1M input tokens
-	"text-davinci-edit-001":   10,
+	"o1-mini-2024-09-12":                   1.5,
-	"code-davinci-edit-001":   10,
+	"o3-mini":                              1.1 * MILLI_USD,
-	"whisper-1":               15,  // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
+	"o3-mini-2025-01-31":                   1.1 * MILLI_USD,
-	"tts-1":                   7.5, // $0.015 / 1K characters
+	"davinci-002":                          1,   // $0.002 / 1K tokens
-	"tts-1-1106":              7.5,
+	"babbage-002":                          0.2, // $0.0004 / 1K tokens
-	"tts-1-hd":                15, // $0.030 / 1K characters
+	"text-ada-001":                         0.2,
-	"tts-1-hd-1106":           15,
+	"text-babbage-001":                     0.25,
-	"davinci":                 10,
+	"text-curie-001":                       1,
-	"curie":                   10,
+	"text-davinci-002":                     10,
-	"babbage":                 10,
+	"text-davinci-003":                     10,
-	"ada":                     10,
+	"text-davinci-edit-001":                10,
-	"text-embedding-ada-002":  0.05,
+	"code-davinci-edit-001":                10,
-	"text-embedding-3-small":  0.01,
+	"whisper-1":                            15,
-	"text-embedding-3-large":  0.065,
+	"tts-1":                                7.5, // $0.015 / 1K characters
-	"text-search-ada-doc-001": 10,
+	"tts-1-1106":                           7.5,
-	"text-moderation-stable":  0.1,
+	"tts-1-hd":                             15, // $0.030 / 1K characters
-	"text-moderation-latest":  0.1,
+	"tts-1-hd-1106":                        15,
-	"dall-e-2":                0.02 * USD, // $0.016 - $0.020 / image
+	"davinci":                              10,
-	"dall-e-3":                0.04 * USD, // $0.040 - $0.120 / image
+	"curie":                                10,
 	"babbage":                              10,
 	"ada":                                  10,
 	"text-embedding-ada-002":               0.05,
 	"text-embedding-3-small":               0.01,
 	"text-embedding-3-large":               0.065,
 	"text-search-ada-doc-001":              10,
 	"text-moderation-stable":               0.1,
 	"text-moderation-latest":               0.1,
 	"dall-e-2":                             0.02 * USD, // $0.016 - $0.020 / image
 	"dall-e-3":                             0.04 * USD, // $0.040 - $0.120 / image
 	// https://www.anthropic.com/api#pricing
 	"claude-instant-1.2":         0.8 / 1000 * USD,
 	"claude-2.0":                 8.0 / 1000 * USD,
@@ -359,6 +369,76 @@ var ModelRatio = map[string]float64{
 	"mistralai/mixtral-8x7b-instruct-v0.1":      0.300 * USD,
 }
 // AudioRatio represents the price ratio between audio tokens and text tokens
 var AudioRatio = map[string]float64{
 	"gpt-4o-audio-preview":                 16,
 	"gpt-4o-audio-preview-2024-12-17":      16,
 	"gpt-4o-audio-preview-2024-10-01":      40,
 	"gpt-4o-mini-audio-preview":            10 / 0.15,
 	"gpt-4o-mini-audio-preview-2024-12-17": 10 / 0.15,
 }
 // GetAudioPromptRatio returns the audio prompt ratio for the given model.
 func GetAudioPromptRatio(actualModelName string) float64 {
 	var v float64
 	if ratio, ok := AudioRatio[actualModelName]; ok {
 		v = ratio
 	} else {
 		v = 16
 	}
 	return v
 }
 // AudioCompletionRatio is the completion ratio for audio models.
 var AudioCompletionRatio = map[string]float64{
 	"whisper-1":                            0,
 	"gpt-4o-audio-preview":                 2,
 	"gpt-4o-audio-preview-2024-12-17":      2,
 	"gpt-4o-audio-preview-2024-10-01":      2,
 	"gpt-4o-mini-audio-preview":            2,
 	"gpt-4o-mini-audio-preview-2024-12-17": 2,
 }
 // GetAudioCompletionRatio returns the completion ratio for audio models.
 func GetAudioCompletionRatio(actualModelName string) float64 {
 	var v float64
 	if ratio, ok := AudioCompletionRatio[actualModelName]; ok {
 		v = ratio
 	} else {
 		v = 2
 	}
 	return v
 }
 // AudioTokensPerSecond is the number of audio tokens per second for each model.
 var AudioPromptTokensPerSecond = map[string]float64{
 	// Whisper API price is $0.0001/sec. One-api's historical ratio is 15,
 	// corresponding to $0.03/kilo_tokens.
 	// After conversion, tokens per second should be 0.0001/0.03*1000 = 3.3333.
 	"whisper-1": 0.0001 / 0.03 * 1000,
 	// gpt-4o-audio series processes 10 tokens per second
 	"gpt-4o-audio-preview":                 10,
 	"gpt-4o-audio-preview-2024-12-17":      10,
 	"gpt-4o-audio-preview-2024-10-01":      10,
 	"gpt-4o-mini-audio-preview":            10,
 	"gpt-4o-mini-audio-preview-2024-12-17": 10,
 }
 // GetAudioPromptTokensPerSecond returns the number of audio tokens per second
 // for the given model.
 func GetAudioPromptTokensPerSecond(actualModelName string) float64 {
 	var v float64
 	if tokensPerSecond, ok := AudioPromptTokensPerSecond[actualModelName]; ok {
 		v = tokensPerSecond
 	} else {
 		v = 10
 	}
 	return v
 }
 var CompletionRatio = map[string]float64{
 	// aws llama3
 	"llama3-8b-8192(33)":  0.0006 / 0.0003,
@@ -497,8 +577,9 @@ func GetCompletionRatio(name string, channelType int) float64 {
 		}
 		return 2
 	}
-	// including o1, o1-preview, o1-mini
+	// including o1/o1-preview/o1-mini
-	if strings.HasPrefix(name, "o1") {
+	if strings.HasPrefix(name, "o1") ||
 		strings.HasPrefix(name, "o3") {
 		return 4
 	}
 	if name == "chatgpt-4o-latest" {
--- a/relay/model/general.go
+++ b/relay/model/general.go
@@ -23,36 +23,39 @@ type StreamOptions struct {
 type GeneralOpenAIRequest struct {
 	// https://platform.openai.com/docs/api-reference/chat/create
-	Messages            []Message       `json:"messages,omitempty"`
+	Messages            []Message `json:"messages,omitempty"`
-	Model               string          `json:"model,omitempty"`
+	Model               string    `json:"model,omitempty"`
-	Store               *bool           `json:"store,omitempty"`
+	Store               *bool     `json:"store,omitempty"`
-	Metadata            any             `json:"metadata,omitempty"`
+	Metadata            any       `json:"metadata,omitempty"`
-	FrequencyPenalty    *float64        `json:"frequency_penalty,omitempty"`
+	FrequencyPenalty    *float64  `json:"frequency_penalty,omitempty"`
-	LogitBias           any             `json:"logit_bias,omitempty"`
+	LogitBias           any       `json:"logit_bias,omitempty"`
-	Logprobs            *bool           `json:"logprobs,omitempty"`
+	Logprobs            *bool     `json:"logprobs,omitempty"`
-	TopLogprobs         *int            `json:"top_logprobs,omitempty"`
+	TopLogprobs         *int      `json:"top_logprobs,omitempty"`
-	MaxTokens           int             `json:"max_tokens,omitempty"`
+	MaxTokens           int       `json:"max_tokens,omitempty"`
-	MaxCompletionTokens *int            `json:"max_completion_tokens,omitempty"`
+	MaxCompletionTokens *int      `json:"max_completion_tokens,omitempty"`
-	N                   int             `json:"n,omitempty"`
+	N                   int       `json:"n,omitempty"`
-	Modalities          []string        `json:"modalities,omitempty"`
+	// ReasoningEffort constrains effort on reasoning for reasoning models, reasoning models only.
-	Prediction          any             `json:"prediction,omitempty"`
+	ReasoningEffort string `json:"reasoning_effort,omitempty" binding:"omitempty,oneof=low medium high"`
-	Audio               *Audio          `json:"audio,omitempty"`
+	// Modalities currently the model only programmatically allows modalities = [“text”, “audio”]
-	PresencePenalty     *float64        `json:"presence_penalty,omitempty"`
+	Modalities       []string        `json:"modalities,omitempty"`
-	ResponseFormat      *ResponseFormat `json:"response_format,omitempty"`
+	Prediction       any             `json:"prediction,omitempty"`
-	Seed                float64         `json:"seed,omitempty"`
+	Audio            *Audio          `json:"audio,omitempty"`
-	ServiceTier         *string         `json:"service_tier,omitempty"`
+	PresencePenalty  *float64        `json:"presence_penalty,omitempty"`
-	Stop                any             `json:"stop,omitempty"`
+	ResponseFormat   *ResponseFormat `json:"response_format,omitempty"`
-	Stream              bool            `json:"stream,omitempty"`
+	Seed             float64         `json:"seed,omitempty"`
-	StreamOptions       *StreamOptions  `json:"stream_options,omitempty"`
+	ServiceTier      *string         `json:"service_tier,omitempty"`
-	Temperature         *float64        `json:"temperature,omitempty"`
+	Stop             any             `json:"stop,omitempty"`
-	TopP                *float64        `json:"top_p,omitempty"`
+	Stream           bool            `json:"stream,omitempty"`
-	TopK                int             `json:"top_k,omitempty"`
+	StreamOptions    *StreamOptions  `json:"stream_options,omitempty"`
-	Tools               []Tool          `json:"tools,omitempty"`
+	Temperature      *float64        `json:"temperature,omitempty"`
-	ToolChoice          any             `json:"tool_choice,omitempty"`
+	TopP             *float64        `json:"top_p,omitempty"`
-	ParallelTooCalls    *bool           `json:"parallel_tool_calls,omitempty"`
+	TopK             int             `json:"top_k,omitempty"`
-	User                string          `json:"user,omitempty"`
+	Tools            []Tool          `json:"tools,omitempty"`
-	FunctionCall        any             `json:"function_call,omitempty"`
+	ToolChoice       any             `json:"tool_choice,omitempty"`
-	Functions           any             `json:"functions,omitempty"`
+	ParallelTooCalls *bool           `json:"parallel_tool_calls,omitempty"`
 	User             string          `json:"user,omitempty"`
 	FunctionCall     any             `json:"function_call,omitempty"`
 	Functions        any             `json:"functions,omitempty"`
 	// https://platform.openai.com/docs/api-reference/embeddings/create
 	Input          any    `json:"input,omitempty"`
 	EncodingFormat string `json:"encoding_format,omitempty"`