mirror of
https://github.com/songquanpeng/one-api.git
synced 2025-09-17 17:16:38 +08:00
Merge 790e1e9f5b
into 6ded638f70
This commit is contained in:
commit
022988c007
@ -8,6 +8,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/songquanpeng/one-api/common/config"
|
||||||
"github.com/songquanpeng/one-api/relay/adaptor"
|
"github.com/songquanpeng/one-api/relay/adaptor"
|
||||||
"github.com/songquanpeng/one-api/relay/adaptor/doubao"
|
"github.com/songquanpeng/one-api/relay/adaptor/doubao"
|
||||||
"github.com/songquanpeng/one-api/relay/adaptor/minimax"
|
"github.com/songquanpeng/one-api/relay/adaptor/minimax"
|
||||||
@ -82,6 +83,33 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
|
|||||||
}
|
}
|
||||||
request.StreamOptions.IncludeUsage = true
|
request.StreamOptions.IncludeUsage = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// o1/o1-mini/o1-preview do not support system prompt/max_tokens/temperature
|
||||||
|
if strings.HasPrefix(request.Model, "o1") ||
|
||||||
|
strings.HasPrefix(request.Model, "o3") {
|
||||||
|
temperature := float64(1)
|
||||||
|
request.Temperature = &temperature // Only the default (1) value is supported
|
||||||
|
|
||||||
|
request.MaxTokens = 0
|
||||||
|
request.Messages = func(raw []model.Message) (filtered []model.Message) {
|
||||||
|
for i := range raw {
|
||||||
|
if raw[i].Role != "system" {
|
||||||
|
filtered = append(filtered, raw[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}(request.Messages)
|
||||||
|
}
|
||||||
|
|
||||||
|
if request.Stream && !config.EnforceIncludeUsage &&
|
||||||
|
(strings.HasPrefix(request.Model, "gpt-4o-audio") ||
|
||||||
|
strings.HasPrefix(request.Model, "gpt-4o-mini-audio")) {
|
||||||
|
// TODO: Since it is not clear how to implement billing in stream mode,
|
||||||
|
// it is temporarily not supported
|
||||||
|
return nil, errors.New("set ENFORCE_INCLUDE_USAGE=true to enable stream mode for gpt-4o-audio")
|
||||||
|
}
|
||||||
|
|
||||||
return request, nil
|
return request, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,6 +12,8 @@ var ModelList = []string{
|
|||||||
"gpt-4o-2024-11-20",
|
"gpt-4o-2024-11-20",
|
||||||
"chatgpt-4o-latest",
|
"chatgpt-4o-latest",
|
||||||
"gpt-4o-mini", "gpt-4o-mini-2024-07-18",
|
"gpt-4o-mini", "gpt-4o-mini-2024-07-18",
|
||||||
|
"gpt-4o-mini-audio-preview", "gpt-4o-mini-audio-preview-2024-12-17",
|
||||||
|
"gpt-4o-audio-preview", "gpt-4o-audio-preview-2024-12-17", "gpt-4o-audio-preview-2024-10-01",
|
||||||
"gpt-4-vision-preview",
|
"gpt-4-vision-preview",
|
||||||
"text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large",
|
"text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large",
|
||||||
"text-curie-001", "text-babbage-001", "text-ada-001", "text-davinci-002", "text-davinci-003",
|
"text-curie-001", "text-babbage-001", "text-ada-001", "text-davinci-002", "text-davinci-003",
|
||||||
@ -24,4 +26,5 @@ var ModelList = []string{
|
|||||||
"o1", "o1-2024-12-17",
|
"o1", "o1-2024-12-17",
|
||||||
"o1-preview", "o1-preview-2024-09-12",
|
"o1-preview", "o1-preview-2024-09-12",
|
||||||
"o1-mini", "o1-mini-2024-09-12",
|
"o1-mini", "o1-mini-2024-09-12",
|
||||||
|
"o3-mini", "o3-mini-2025-01-31",
|
||||||
}
|
}
|
||||||
|
@ -23,65 +23,75 @@ const (
|
|||||||
// 1 === ¥0.014 / 1k tokens
|
// 1 === ¥0.014 / 1k tokens
|
||||||
var ModelRatio = map[string]float64{
|
var ModelRatio = map[string]float64{
|
||||||
// https://openai.com/pricing
|
// https://openai.com/pricing
|
||||||
"gpt-4": 15,
|
"gpt-4": 15,
|
||||||
"gpt-4-0314": 15,
|
"gpt-4-0314": 15,
|
||||||
"gpt-4-0613": 15,
|
"gpt-4-0613": 15,
|
||||||
"gpt-4-32k": 30,
|
"gpt-4-32k": 30,
|
||||||
"gpt-4-32k-0314": 30,
|
"gpt-4-32k-0314": 30,
|
||||||
"gpt-4-32k-0613": 30,
|
"gpt-4-32k-0613": 30,
|
||||||
"gpt-4-1106-preview": 5, // $0.01 / 1K tokens
|
"gpt-4-1106-preview": 5, // $0.01 / 1K tokens
|
||||||
"gpt-4-0125-preview": 5, // $0.01 / 1K tokens
|
"gpt-4-0125-preview": 5, // $0.01 / 1K tokens
|
||||||
"gpt-4-turbo-preview": 5, // $0.01 / 1K tokens
|
"gpt-4-turbo-preview": 5, // $0.01 / 1K tokens
|
||||||
"gpt-4-turbo": 5, // $0.01 / 1K tokens
|
"gpt-4-turbo": 5, // $0.01 / 1K tokens
|
||||||
"gpt-4-turbo-2024-04-09": 5, // $0.01 / 1K tokens
|
"gpt-4-turbo-2024-04-09": 5, // $0.01 / 1K tokens
|
||||||
"gpt-4o": 2.5, // $0.005 / 1K tokens
|
"gpt-4o": 2.5, // $0.005 / 1K tokens
|
||||||
"chatgpt-4o-latest": 2.5, // $0.005 / 1K tokens
|
"chatgpt-4o-latest": 2.5, // $0.005 / 1K tokens
|
||||||
"gpt-4o-2024-05-13": 2.5, // $0.005 / 1K tokens
|
"gpt-4o-2024-05-13": 2.5, // $0.005 / 1K tokens
|
||||||
"gpt-4o-2024-08-06": 1.25, // $0.0025 / 1K tokens
|
"gpt-4o-2024-08-06": 1.25, // $0.0025 / 1K tokens
|
||||||
"gpt-4o-2024-11-20": 1.25, // $0.0025 / 1K tokens
|
"gpt-4o-2024-11-20": 1.25, // $0.0025 / 1K tokens
|
||||||
"gpt-4o-mini": 0.075, // $0.00015 / 1K tokens
|
"gpt-4o-mini": 0.075, // $0.00015 / 1K tokens
|
||||||
"gpt-4o-mini-2024-07-18": 0.075, // $0.00015 / 1K tokens
|
"gpt-4o-mini-2024-07-18": 0.075, // $0.00015 / 1K tokens
|
||||||
"gpt-4-vision-preview": 5, // $0.01 / 1K tokens
|
"gpt-4-vision-preview": 5, // $0.01 / 1K tokens
|
||||||
"gpt-3.5-turbo": 0.25, // $0.0005 / 1K tokens
|
// Audio billing will mix text and audio tokens, the unit price is different.
|
||||||
"gpt-3.5-turbo-0301": 0.75,
|
// Here records the cost of text, the cost multiplier of audio
|
||||||
"gpt-3.5-turbo-0613": 0.75,
|
// relative to text is in AudioRatio
|
||||||
"gpt-3.5-turbo-16k": 1.5, // $0.003 / 1K tokens
|
"gpt-4o-audio-preview": 1.25, // $0.0025 / 1K tokens
|
||||||
"gpt-3.5-turbo-16k-0613": 1.5,
|
"gpt-4o-audio-preview-2024-12-17": 1.25, // $0.0025 / 1K tokens
|
||||||
"gpt-3.5-turbo-instruct": 0.75, // $0.0015 / 1K tokens
|
"gpt-4o-audio-preview-2024-10-01": 1.25, // $0.0025 / 1K tokens
|
||||||
"gpt-3.5-turbo-1106": 0.5, // $0.001 / 1K tokens
|
"gpt-4o-mini-audio-preview": 0.15 * MILLI_USD, // $0.15/1M tokens
|
||||||
"gpt-3.5-turbo-0125": 0.25, // $0.0005 / 1K tokens
|
"gpt-4o-mini-audio-preview-2024-12-17": 0.15 * MILLI_USD, // $0.15/1M tokens
|
||||||
"o1": 7.5, // $15.00 / 1M input tokens
|
"gpt-3.5-turbo": 0.25, // $0.0005 / 1K tokens
|
||||||
"o1-2024-12-17": 7.5,
|
"gpt-3.5-turbo-0301": 0.75,
|
||||||
"o1-preview": 7.5, // $15.00 / 1M input tokens
|
"gpt-3.5-turbo-0613": 0.75,
|
||||||
"o1-preview-2024-09-12": 7.5,
|
"gpt-3.5-turbo-16k": 1.5, // $0.003 / 1K tokens
|
||||||
"o1-mini": 1.5, // $3.00 / 1M input tokens
|
"gpt-3.5-turbo-16k-0613": 1.5,
|
||||||
"o1-mini-2024-09-12": 1.5,
|
"gpt-3.5-turbo-instruct": 0.75, // $0.0015 / 1K tokens
|
||||||
"davinci-002": 1, // $0.002 / 1K tokens
|
"gpt-3.5-turbo-1106": 0.5, // $0.001 / 1K tokens
|
||||||
"babbage-002": 0.2, // $0.0004 / 1K tokens
|
"gpt-3.5-turbo-0125": 0.25, // $0.0005 / 1K tokens
|
||||||
"text-ada-001": 0.2,
|
"o1": 7.5, // $15.00 / 1M input tokens
|
||||||
"text-babbage-001": 0.25,
|
"o1-2024-12-17": 7.5,
|
||||||
"text-curie-001": 1,
|
"o1-preview": 7.5, // $15.00 / 1M input tokens
|
||||||
"text-davinci-002": 10,
|
"o1-preview-2024-09-12": 7.5,
|
||||||
"text-davinci-003": 10,
|
"o1-mini": 1.5, // $3.00 / 1M input tokens
|
||||||
"text-davinci-edit-001": 10,
|
"o1-mini-2024-09-12": 1.5,
|
||||||
"code-davinci-edit-001": 10,
|
"o3-mini": 1.1 * MILLI_USD,
|
||||||
"whisper-1": 15, // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
|
"o3-mini-2025-01-31": 1.1 * MILLI_USD,
|
||||||
"tts-1": 7.5, // $0.015 / 1K characters
|
"davinci-002": 1, // $0.002 / 1K tokens
|
||||||
"tts-1-1106": 7.5,
|
"babbage-002": 0.2, // $0.0004 / 1K tokens
|
||||||
"tts-1-hd": 15, // $0.030 / 1K characters
|
"text-ada-001": 0.2,
|
||||||
"tts-1-hd-1106": 15,
|
"text-babbage-001": 0.25,
|
||||||
"davinci": 10,
|
"text-curie-001": 1,
|
||||||
"curie": 10,
|
"text-davinci-002": 10,
|
||||||
"babbage": 10,
|
"text-davinci-003": 10,
|
||||||
"ada": 10,
|
"text-davinci-edit-001": 10,
|
||||||
"text-embedding-ada-002": 0.05,
|
"code-davinci-edit-001": 10,
|
||||||
"text-embedding-3-small": 0.01,
|
"whisper-1": 15,
|
||||||
"text-embedding-3-large": 0.065,
|
"tts-1": 7.5, // $0.015 / 1K characters
|
||||||
"text-search-ada-doc-001": 10,
|
"tts-1-1106": 7.5,
|
||||||
"text-moderation-stable": 0.1,
|
"tts-1-hd": 15, // $0.030 / 1K characters
|
||||||
"text-moderation-latest": 0.1,
|
"tts-1-hd-1106": 15,
|
||||||
"dall-e-2": 0.02 * USD, // $0.016 - $0.020 / image
|
"davinci": 10,
|
||||||
"dall-e-3": 0.04 * USD, // $0.040 - $0.120 / image
|
"curie": 10,
|
||||||
|
"babbage": 10,
|
||||||
|
"ada": 10,
|
||||||
|
"text-embedding-ada-002": 0.05,
|
||||||
|
"text-embedding-3-small": 0.01,
|
||||||
|
"text-embedding-3-large": 0.065,
|
||||||
|
"text-search-ada-doc-001": 10,
|
||||||
|
"text-moderation-stable": 0.1,
|
||||||
|
"text-moderation-latest": 0.1,
|
||||||
|
"dall-e-2": 0.02 * USD, // $0.016 - $0.020 / image
|
||||||
|
"dall-e-3": 0.04 * USD, // $0.040 - $0.120 / image
|
||||||
// https://www.anthropic.com/api#pricing
|
// https://www.anthropic.com/api#pricing
|
||||||
"claude-instant-1.2": 0.8 / 1000 * USD,
|
"claude-instant-1.2": 0.8 / 1000 * USD,
|
||||||
"claude-2.0": 8.0 / 1000 * USD,
|
"claude-2.0": 8.0 / 1000 * USD,
|
||||||
@ -359,6 +369,76 @@ var ModelRatio = map[string]float64{
|
|||||||
"mistralai/mixtral-8x7b-instruct-v0.1": 0.300 * USD,
|
"mistralai/mixtral-8x7b-instruct-v0.1": 0.300 * USD,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AudioRatio represents the price ratio between audio tokens and text tokens
|
||||||
|
var AudioRatio = map[string]float64{
|
||||||
|
"gpt-4o-audio-preview": 16,
|
||||||
|
"gpt-4o-audio-preview-2024-12-17": 16,
|
||||||
|
"gpt-4o-audio-preview-2024-10-01": 40,
|
||||||
|
"gpt-4o-mini-audio-preview": 10 / 0.15,
|
||||||
|
"gpt-4o-mini-audio-preview-2024-12-17": 10 / 0.15,
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAudioPromptRatio returns the audio prompt ratio for the given model.
|
||||||
|
func GetAudioPromptRatio(actualModelName string) float64 {
|
||||||
|
var v float64
|
||||||
|
if ratio, ok := AudioRatio[actualModelName]; ok {
|
||||||
|
v = ratio
|
||||||
|
} else {
|
||||||
|
v = 16
|
||||||
|
}
|
||||||
|
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
// AudioCompletionRatio is the completion ratio for audio models.
|
||||||
|
var AudioCompletionRatio = map[string]float64{
|
||||||
|
"whisper-1": 0,
|
||||||
|
"gpt-4o-audio-preview": 2,
|
||||||
|
"gpt-4o-audio-preview-2024-12-17": 2,
|
||||||
|
"gpt-4o-audio-preview-2024-10-01": 2,
|
||||||
|
"gpt-4o-mini-audio-preview": 2,
|
||||||
|
"gpt-4o-mini-audio-preview-2024-12-17": 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAudioCompletionRatio returns the completion ratio for audio models.
|
||||||
|
func GetAudioCompletionRatio(actualModelName string) float64 {
|
||||||
|
var v float64
|
||||||
|
if ratio, ok := AudioCompletionRatio[actualModelName]; ok {
|
||||||
|
v = ratio
|
||||||
|
} else {
|
||||||
|
v = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
// AudioTokensPerSecond is the number of audio tokens per second for each model.
|
||||||
|
var AudioPromptTokensPerSecond = map[string]float64{
|
||||||
|
// Whisper API price is $0.0001/sec. One-api's historical ratio is 15,
|
||||||
|
// corresponding to $0.03/kilo_tokens.
|
||||||
|
// After conversion, tokens per second should be 0.0001/0.03*1000 = 3.3333.
|
||||||
|
"whisper-1": 0.0001 / 0.03 * 1000,
|
||||||
|
// gpt-4o-audio series processes 10 tokens per second
|
||||||
|
"gpt-4o-audio-preview": 10,
|
||||||
|
"gpt-4o-audio-preview-2024-12-17": 10,
|
||||||
|
"gpt-4o-audio-preview-2024-10-01": 10,
|
||||||
|
"gpt-4o-mini-audio-preview": 10,
|
||||||
|
"gpt-4o-mini-audio-preview-2024-12-17": 10,
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAudioPromptTokensPerSecond returns the number of audio tokens per second
|
||||||
|
// for the given model.
|
||||||
|
func GetAudioPromptTokensPerSecond(actualModelName string) float64 {
|
||||||
|
var v float64
|
||||||
|
if tokensPerSecond, ok := AudioPromptTokensPerSecond[actualModelName]; ok {
|
||||||
|
v = tokensPerSecond
|
||||||
|
} else {
|
||||||
|
v = 10
|
||||||
|
}
|
||||||
|
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
var CompletionRatio = map[string]float64{
|
var CompletionRatio = map[string]float64{
|
||||||
// aws llama3
|
// aws llama3
|
||||||
"llama3-8b-8192(33)": 0.0006 / 0.0003,
|
"llama3-8b-8192(33)": 0.0006 / 0.0003,
|
||||||
@ -497,8 +577,9 @@ func GetCompletionRatio(name string, channelType int) float64 {
|
|||||||
}
|
}
|
||||||
return 2
|
return 2
|
||||||
}
|
}
|
||||||
// including o1, o1-preview, o1-mini
|
// including o1/o1-preview/o1-mini
|
||||||
if strings.HasPrefix(name, "o1") {
|
if strings.HasPrefix(name, "o1") ||
|
||||||
|
strings.HasPrefix(name, "o3") {
|
||||||
return 4
|
return 4
|
||||||
}
|
}
|
||||||
if name == "chatgpt-4o-latest" {
|
if name == "chatgpt-4o-latest" {
|
||||||
|
@ -23,36 +23,39 @@ type StreamOptions struct {
|
|||||||
|
|
||||||
type GeneralOpenAIRequest struct {
|
type GeneralOpenAIRequest struct {
|
||||||
// https://platform.openai.com/docs/api-reference/chat/create
|
// https://platform.openai.com/docs/api-reference/chat/create
|
||||||
Messages []Message `json:"messages,omitempty"`
|
Messages []Message `json:"messages,omitempty"`
|
||||||
Model string `json:"model,omitempty"`
|
Model string `json:"model,omitempty"`
|
||||||
Store *bool `json:"store,omitempty"`
|
Store *bool `json:"store,omitempty"`
|
||||||
Metadata any `json:"metadata,omitempty"`
|
Metadata any `json:"metadata,omitempty"`
|
||||||
FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"`
|
FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"`
|
||||||
LogitBias any `json:"logit_bias,omitempty"`
|
LogitBias any `json:"logit_bias,omitempty"`
|
||||||
Logprobs *bool `json:"logprobs,omitempty"`
|
Logprobs *bool `json:"logprobs,omitempty"`
|
||||||
TopLogprobs *int `json:"top_logprobs,omitempty"`
|
TopLogprobs *int `json:"top_logprobs,omitempty"`
|
||||||
MaxTokens int `json:"max_tokens,omitempty"`
|
MaxTokens int `json:"max_tokens,omitempty"`
|
||||||
MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"`
|
MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"`
|
||||||
N int `json:"n,omitempty"`
|
N int `json:"n,omitempty"`
|
||||||
Modalities []string `json:"modalities,omitempty"`
|
// ReasoningEffort constrains effort on reasoning for reasoning models, reasoning models only.
|
||||||
Prediction any `json:"prediction,omitempty"`
|
ReasoningEffort string `json:"reasoning_effort,omitempty" binding:"omitempty,oneof=low medium high"`
|
||||||
Audio *Audio `json:"audio,omitempty"`
|
// Modalities currently the model only programmatically allows modalities = [“text”, “audio”]
|
||||||
PresencePenalty *float64 `json:"presence_penalty,omitempty"`
|
Modalities []string `json:"modalities,omitempty"`
|
||||||
ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
|
Prediction any `json:"prediction,omitempty"`
|
||||||
Seed float64 `json:"seed,omitempty"`
|
Audio *Audio `json:"audio,omitempty"`
|
||||||
ServiceTier *string `json:"service_tier,omitempty"`
|
PresencePenalty *float64 `json:"presence_penalty,omitempty"`
|
||||||
Stop any `json:"stop,omitempty"`
|
ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
|
||||||
Stream bool `json:"stream,omitempty"`
|
Seed float64 `json:"seed,omitempty"`
|
||||||
StreamOptions *StreamOptions `json:"stream_options,omitempty"`
|
ServiceTier *string `json:"service_tier,omitempty"`
|
||||||
Temperature *float64 `json:"temperature,omitempty"`
|
Stop any `json:"stop,omitempty"`
|
||||||
TopP *float64 `json:"top_p,omitempty"`
|
Stream bool `json:"stream,omitempty"`
|
||||||
TopK int `json:"top_k,omitempty"`
|
StreamOptions *StreamOptions `json:"stream_options,omitempty"`
|
||||||
Tools []Tool `json:"tools,omitempty"`
|
Temperature *float64 `json:"temperature,omitempty"`
|
||||||
ToolChoice any `json:"tool_choice,omitempty"`
|
TopP *float64 `json:"top_p,omitempty"`
|
||||||
ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
|
TopK int `json:"top_k,omitempty"`
|
||||||
User string `json:"user,omitempty"`
|
Tools []Tool `json:"tools,omitempty"`
|
||||||
FunctionCall any `json:"function_call,omitempty"`
|
ToolChoice any `json:"tool_choice,omitempty"`
|
||||||
Functions any `json:"functions,omitempty"`
|
ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
|
||||||
|
User string `json:"user,omitempty"`
|
||||||
|
FunctionCall any `json:"function_call,omitempty"`
|
||||||
|
Functions any `json:"functions,omitempty"`
|
||||||
// https://platform.openai.com/docs/api-reference/embeddings/create
|
// https://platform.openai.com/docs/api-reference/embeddings/create
|
||||||
Input any `json:"input,omitempty"`
|
Input any `json:"input,omitempty"`
|
||||||
EncodingFormat string `json:"encoding_format,omitempty"`
|
EncodingFormat string `json:"encoding_format,omitempty"`
|
||||||
|
Loading…
Reference in New Issue
Block a user