This commit is contained in:
Laisky.Cai 2025-02-02 22:23:05 +08:00 committed by GitHub
commit 022988c007
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 206 additions and 91 deletions

View File

@ -8,6 +8,7 @@ import (
"strings"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/common/config"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/doubao"
"github.com/songquanpeng/one-api/relay/adaptor/minimax"
@ -82,6 +83,33 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
}
request.StreamOptions.IncludeUsage = true
}
// o1/o1-mini/o1-preview do not support system prompt/max_tokens/temperature
if strings.HasPrefix(request.Model, "o1") ||
strings.HasPrefix(request.Model, "o3") {
temperature := float64(1)
request.Temperature = &temperature // Only the default (1) value is supported
request.MaxTokens = 0
request.Messages = func(raw []model.Message) (filtered []model.Message) {
for i := range raw {
if raw[i].Role != "system" {
filtered = append(filtered, raw[i])
}
}
return
}(request.Messages)
}
if request.Stream && !config.EnforceIncludeUsage &&
(strings.HasPrefix(request.Model, "gpt-4o-audio") ||
strings.HasPrefix(request.Model, "gpt-4o-mini-audio")) {
// TODO: Since it is not clear how to implement billing in stream mode,
// it is temporarily not supported
return nil, errors.New("set ENFORCE_INCLUDE_USAGE=true to enable stream mode for gpt-4o-audio")
}
return request, nil
}

View File

@ -12,6 +12,8 @@ var ModelList = []string{
"gpt-4o-2024-11-20",
"chatgpt-4o-latest",
"gpt-4o-mini", "gpt-4o-mini-2024-07-18",
"gpt-4o-mini-audio-preview", "gpt-4o-mini-audio-preview-2024-12-17",
"gpt-4o-audio-preview", "gpt-4o-audio-preview-2024-12-17", "gpt-4o-audio-preview-2024-10-01",
"gpt-4-vision-preview",
"text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large",
"text-curie-001", "text-babbage-001", "text-ada-001", "text-davinci-002", "text-davinci-003",
@ -24,4 +26,5 @@ var ModelList = []string{
"o1", "o1-2024-12-17",
"o1-preview", "o1-preview-2024-09-12",
"o1-mini", "o1-mini-2024-09-12",
"o3-mini", "o3-mini-2025-01-31",
}

View File

@ -42,6 +42,14 @@ var ModelRatio = map[string]float64{
"gpt-4o-mini": 0.075, // $0.00015 / 1K tokens
"gpt-4o-mini-2024-07-18": 0.075, // $0.00015 / 1K tokens
"gpt-4-vision-preview": 5, // $0.01 / 1K tokens
// Audio billing will mix text and audio tokens, the unit price is different.
// Here records the cost of text, the cost multiplier of audio
// relative to text is in AudioRatio
"gpt-4o-audio-preview": 1.25, // $0.0025 / 1K tokens
"gpt-4o-audio-preview-2024-12-17": 1.25, // $0.0025 / 1K tokens
"gpt-4o-audio-preview-2024-10-01": 1.25, // $0.0025 / 1K tokens
"gpt-4o-mini-audio-preview": 0.15 * MILLI_USD, // $0.15/1M tokens
"gpt-4o-mini-audio-preview-2024-12-17": 0.15 * MILLI_USD, // $0.15/1M tokens
"gpt-3.5-turbo": 0.25, // $0.0005 / 1K tokens
"gpt-3.5-turbo-0301": 0.75,
"gpt-3.5-turbo-0613": 0.75,
@ -56,6 +64,8 @@ var ModelRatio = map[string]float64{
"o1-preview-2024-09-12": 7.5,
"o1-mini": 1.5, // $3.00 / 1M input tokens
"o1-mini-2024-09-12": 1.5,
"o3-mini": 1.1 * MILLI_USD,
"o3-mini-2025-01-31": 1.1 * MILLI_USD,
"davinci-002": 1, // $0.002 / 1K tokens
"babbage-002": 0.2, // $0.0004 / 1K tokens
"text-ada-001": 0.2,
@ -65,7 +75,7 @@ var ModelRatio = map[string]float64{
"text-davinci-003": 10,
"text-davinci-edit-001": 10,
"code-davinci-edit-001": 10,
"whisper-1": 15, // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
"whisper-1": 15,
"tts-1": 7.5, // $0.015 / 1K characters
"tts-1-1106": 7.5,
"tts-1-hd": 15, // $0.030 / 1K characters
@ -359,6 +369,76 @@ var ModelRatio = map[string]float64{
"mistralai/mixtral-8x7b-instruct-v0.1": 0.300 * USD,
}
// AudioRatio represents the price ratio between audio tokens and text tokens
var AudioRatio = map[string]float64{
"gpt-4o-audio-preview": 16,
"gpt-4o-audio-preview-2024-12-17": 16,
"gpt-4o-audio-preview-2024-10-01": 40,
"gpt-4o-mini-audio-preview": 10 / 0.15,
"gpt-4o-mini-audio-preview-2024-12-17": 10 / 0.15,
}
// GetAudioPromptRatio returns the audio prompt ratio for the given model.
func GetAudioPromptRatio(actualModelName string) float64 {
var v float64
if ratio, ok := AudioRatio[actualModelName]; ok {
v = ratio
} else {
v = 16
}
return v
}
// AudioCompletionRatio is the completion ratio for audio models.
var AudioCompletionRatio = map[string]float64{
"whisper-1": 0,
"gpt-4o-audio-preview": 2,
"gpt-4o-audio-preview-2024-12-17": 2,
"gpt-4o-audio-preview-2024-10-01": 2,
"gpt-4o-mini-audio-preview": 2,
"gpt-4o-mini-audio-preview-2024-12-17": 2,
}
// GetAudioCompletionRatio returns the completion ratio for audio models.
func GetAudioCompletionRatio(actualModelName string) float64 {
var v float64
if ratio, ok := AudioCompletionRatio[actualModelName]; ok {
v = ratio
} else {
v = 2
}
return v
}
// AudioTokensPerSecond is the number of audio tokens per second for each model.
var AudioPromptTokensPerSecond = map[string]float64{
// Whisper API price is $0.0001/sec. One-api's historical ratio is 15,
// corresponding to $0.03/kilo_tokens.
// After conversion, tokens per second should be 0.0001/0.03*1000 = 3.3333.
"whisper-1": 0.0001 / 0.03 * 1000,
// gpt-4o-audio series processes 10 tokens per second
"gpt-4o-audio-preview": 10,
"gpt-4o-audio-preview-2024-12-17": 10,
"gpt-4o-audio-preview-2024-10-01": 10,
"gpt-4o-mini-audio-preview": 10,
"gpt-4o-mini-audio-preview-2024-12-17": 10,
}
// GetAudioPromptTokensPerSecond returns the number of audio tokens per second
// for the given model.
func GetAudioPromptTokensPerSecond(actualModelName string) float64 {
var v float64
if tokensPerSecond, ok := AudioPromptTokensPerSecond[actualModelName]; ok {
v = tokensPerSecond
} else {
v = 10
}
return v
}
var CompletionRatio = map[string]float64{
// aws llama3
"llama3-8b-8192(33)": 0.0006 / 0.0003,
@ -497,8 +577,9 @@ func GetCompletionRatio(name string, channelType int) float64 {
}
return 2
}
// including o1, o1-preview, o1-mini
if strings.HasPrefix(name, "o1") {
// including o1/o1-preview/o1-mini
if strings.HasPrefix(name, "o1") ||
strings.HasPrefix(name, "o3") {
return 4
}
if name == "chatgpt-4o-latest" {

View File

@ -34,6 +34,9 @@ type GeneralOpenAIRequest struct {
MaxTokens int `json:"max_tokens,omitempty"`
MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"`
N int `json:"n,omitempty"`
// ReasoningEffort constrains effort on reasoning for reasoning models, reasoning models only.
ReasoningEffort string `json:"reasoning_effort,omitempty" binding:"omitempty,oneof=low medium high"`
// Modalities currently the model only programmatically allows modalities = [“text”, “audio”]
Modalities []string `json:"modalities,omitempty"`
Prediction any `json:"prediction,omitempty"`
Audio *Audio `json:"audio,omitempty"`