From 413fcde3825e1dd001120a60a2cb1238fbbc9695 Mon Sep 17 00:00:00 2001 From: "Laisky.Cai" Date: Thu, 13 Mar 2025 03:37:38 +0000 Subject: [PATCH] feat: support openai websearch models --- controller/channel-test.go | 2 + relay/adaptor/cloudflare/main.go | 3 +- relay/adaptor/openai/adaptor.go | 61 +++++++++++++- relay/adaptor/openai/constants.go | 4 + relay/billing/ratio/model.go | 136 ++++++++++++++++-------------- relay/controller/helper.go | 16 ++-- relay/controller/text.go | 3 + relay/model/general.go | 102 ++++++++++++++-------- relay/model/message.go | 119 ++++++++++++++++++++++++-- relay/model/misc.go | 15 +++- 10 files changed, 341 insertions(+), 120 deletions(-) diff --git a/controller/channel-test.go b/controller/channel-test.go index 3894691c..849aaef5 100644 --- a/controller/channel-test.go +++ b/controller/channel-test.go @@ -106,6 +106,8 @@ func testChannel(ctx context.Context, channel *model.Channel, request *relaymode if err != nil { return "", err, nil } + c.Set(ctxkey.ConvertedRequest, convertedRequest) + jsonData, err := json.Marshal(convertedRequest) if err != nil { return "", err, nil diff --git a/relay/adaptor/cloudflare/main.go b/relay/adaptor/cloudflare/main.go index 980a2891..e164d473 100644 --- a/relay/adaptor/cloudflare/main.go +++ b/relay/adaptor/cloudflare/main.go @@ -19,9 +19,8 @@ import ( ) func ConvertCompletionsRequest(textRequest model.GeneralOpenAIRequest) *Request { - p, _ := textRequest.Prompt.(string) return &Request{ - Prompt: p, + Prompt: textRequest.Prompt, MaxTokens: textRequest.MaxTokens, Stream: textRequest.Stream, Temperature: textRequest.Temperature, diff --git a/relay/adaptor/openai/adaptor.go b/relay/adaptor/openai/adaptor.go index 4e44e21b..f6fdfe7d 100644 --- a/relay/adaptor/openai/adaptor.go +++ b/relay/adaptor/openai/adaptor.go @@ -1,15 +1,17 @@ package openai import ( - "errors" "fmt" "io" + "math" "net/http" "strings" "github.com/gin-gonic/gin" + "github.com/pkg/errors" "github.com/songquanpeng/one-api/common/config" + "github.com/songquanpeng/one-api/common/ctxkey" "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/alibailian" @@ -19,6 +21,7 @@ import ( "github.com/songquanpeng/one-api/relay/adaptor/minimax" "github.com/songquanpeng/one-api/relay/adaptor/novita" "github.com/songquanpeng/one-api/relay/adaptor/openrouter" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/channeltype" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" @@ -127,11 +130,16 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) return request, nil } -func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) { +func (a *Adaptor) DoRequest(c *gin.Context, + meta *meta.Meta, + requestBody io.Reader) (*http.Response, error) { return adaptor.DoRequestHelper(a, c, meta, requestBody) } -func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) { +func (a *Adaptor) DoResponse(c *gin.Context, + resp *http.Response, + meta *meta.Meta) (usage *model.Usage, + err *model.ErrorWithStatusCode) { if meta.IsStream { var responseText string err, responseText, usage = StreamHandler(c, resp, meta.Mode) @@ -150,6 +158,53 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met err, usage = Handler(c, resp, meta.PromptTokens, meta.ActualModelName) } } + + // ------------------------------------- + // calculate web-search tool cost + // ------------------------------------- + searchContextSize := "medium" + var req *model.GeneralOpenAIRequest + if vi, ok := c.Get(ctxkey.ConvertedRequest); ok { + if req, ok = vi.(*model.GeneralOpenAIRequest); ok { + if req != nil && + req.WebSearchOptions != nil && + req.WebSearchOptions.SearchContextSize != nil { + searchContextSize = *req.WebSearchOptions.SearchContextSize + } + + switch { + case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"): + switch searchContextSize { + case "low": + usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.USD)) + case "medium": + usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.USD)) + case "high": + usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.USD)) + default: + return nil, ErrorWrapper( + errors.Errorf("invalid search context size %q", searchContextSize), + "invalid search context size: "+searchContextSize, + http.StatusBadRequest) + } + case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"): + switch searchContextSize { + case "low": + usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.USD)) + case "medium": + usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.USD)) + case "high": + usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.USD)) + default: + return nil, ErrorWrapper( + errors.Errorf("invalid search context size %q", searchContextSize), + "invalid search context size: "+searchContextSize, + http.StatusBadRequest) + } + } + } + } + return } diff --git a/relay/adaptor/openai/constants.go b/relay/adaptor/openai/constants.go index 8a643bc6..41661adb 100644 --- a/relay/adaptor/openai/constants.go +++ b/relay/adaptor/openai/constants.go @@ -24,4 +24,8 @@ var ModelList = []string{ "o1", "o1-2024-12-17", "o1-preview", "o1-preview-2024-09-12", "o1-mini", "o1-mini-2024-09-12", + "o3-mini", "o3-mini-2025-01-31", + "gpt-4.5-preview", "gpt-4.5-preview-2025-02-27", + // https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat + "gpt-4o-search-preview", "gpt-4o-mini-search-preview", } diff --git a/relay/billing/ratio/model.go b/relay/billing/ratio/model.go index 6ef2a457..ebbab966 100644 --- a/relay/billing/ratio/model.go +++ b/relay/billing/ratio/model.go @@ -26,68 +26,80 @@ var modelRatioLock sync.RWMutex // 1 === ¥0.014 / 1k tokens var ModelRatio = map[string]float64{ // https://openai.com/pricing - "gpt-4": 15, - "gpt-4-0314": 15, - "gpt-4-0613": 15, - "gpt-4-32k": 30, - "gpt-4-32k-0314": 30, - "gpt-4-32k-0613": 30, - "gpt-4-1106-preview": 5, // $0.01 / 1K tokens - "gpt-4-0125-preview": 5, // $0.01 / 1K tokens - "gpt-4-turbo-preview": 5, // $0.01 / 1K tokens - "gpt-4-turbo": 5, // $0.01 / 1K tokens - "gpt-4-turbo-2024-04-09": 5, // $0.01 / 1K tokens - "gpt-4o": 2.5, // $0.005 / 1K tokens - "chatgpt-4o-latest": 2.5, // $0.005 / 1K tokens - "gpt-4o-2024-05-13": 2.5, // $0.005 / 1K tokens - "gpt-4o-2024-08-06": 1.25, // $0.0025 / 1K tokens - "gpt-4o-2024-11-20": 1.25, // $0.0025 / 1K tokens - "gpt-4o-mini": 0.075, // $0.00015 / 1K tokens - "gpt-4o-mini-2024-07-18": 0.075, // $0.00015 / 1K tokens - "gpt-4-vision-preview": 5, // $0.01 / 1K tokens - "gpt-3.5-turbo": 0.25, // $0.0005 / 1K tokens - "gpt-3.5-turbo-0301": 0.75, - "gpt-3.5-turbo-0613": 0.75, - "gpt-3.5-turbo-16k": 1.5, // $0.003 / 1K tokens - "gpt-3.5-turbo-16k-0613": 1.5, - "gpt-3.5-turbo-instruct": 0.75, // $0.0015 / 1K tokens - "gpt-3.5-turbo-1106": 0.5, // $0.001 / 1K tokens - "gpt-3.5-turbo-0125": 0.25, // $0.0005 / 1K tokens - "o1": 7.5, // $15.00 / 1M input tokens - "o1-2024-12-17": 7.5, - "o1-preview": 7.5, // $15.00 / 1M input tokens - "o1-preview-2024-09-12": 7.5, - "o1-mini": 1.5, // $3.00 / 1M input tokens - "o1-mini-2024-09-12": 1.5, - "o3-mini": 1.5, // $3.00 / 1M input tokens - "o3-mini-2025-01-31": 1.5, - "davinci-002": 1, // $0.002 / 1K tokens - "babbage-002": 0.2, // $0.0004 / 1K tokens - "text-ada-001": 0.2, - "text-babbage-001": 0.25, - "text-curie-001": 1, - "text-davinci-002": 10, - "text-davinci-003": 10, - "text-davinci-edit-001": 10, - "code-davinci-edit-001": 10, - "whisper-1": 15, // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens - "tts-1": 7.5, // $0.015 / 1K characters - "tts-1-1106": 7.5, - "tts-1-hd": 15, // $0.030 / 1K characters - "tts-1-hd-1106": 15, - "davinci": 10, - "curie": 10, - "babbage": 10, - "ada": 10, - "text-embedding-ada-002": 0.05, - "text-embedding-3-small": 0.01, - "text-embedding-3-large": 0.065, - "text-search-ada-doc-001": 10, - "text-moderation-stable": 0.1, - "text-moderation-latest": 0.1, - "dall-e-2": 0.02 * USD, // $0.016 - $0.020 / image - "dall-e-3": 0.04 * USD, // $0.040 - $0.120 / image - // https://docs.anthropic.com/en/docs/about-claude/models + "gpt-4.5-preview": 75 * MILLI_USD, + "gpt-4.5-preview-2025-02-27": 75 * MILLI_USD, + "gpt-4": 15, + "gpt-4-0314": 15, + "gpt-4-0613": 15, + "gpt-4-32k": 30, + "gpt-4-32k-0314": 30, + "gpt-4-32k-0613": 30, + "gpt-4-1106-preview": 5, // $0.01 / 1K tokens + "gpt-4-0125-preview": 5, // $0.01 / 1K tokens + "gpt-4-turbo-preview": 5, // $0.01 / 1K tokens + "gpt-4-turbo": 5, // $0.01 / 1K tokens + "gpt-4-turbo-2024-04-09": 5, // $0.01 / 1K tokens + "gpt-4o": 2.5, // $0.005 / 1K tokens + "chatgpt-4o-latest": 2.5, // $0.005 / 1K tokens + "gpt-4o-2024-05-13": 2.5, // $0.005 / 1K tokens + "gpt-4o-2024-08-06": 1.25, // $0.0025 / 1K tokens + "gpt-4o-2024-11-20": 1.25, // $0.0025 / 1K tokens + "gpt-4o-search-preview": 2.5, // $0.005 / 1K tokens + "gpt-4o-mini": 0.075, // $0.00015 / 1K tokens + "gpt-4o-mini-2024-07-18": 0.075, // $0.00015 / 1K tokens + "gpt-4o-mini-search-preview": 0.075, // $0.00015 / 1K tokens + "gpt-4-vision-preview": 5, // $0.01 / 1K tokens + // Audio billing will mix text and audio tokens, the unit price is different. + // Here records the cost of text, the cost multiplier of audio + // relative to text is in AudioRatio + "gpt-4o-audio-preview": 1.25, // $0.0025 / 1K tokens + "gpt-4o-audio-preview-2024-12-17": 1.25, // $0.0025 / 1K tokens + "gpt-4o-audio-preview-2024-10-01": 1.25, // $0.0025 / 1K tokens + "gpt-4o-mini-audio-preview": 0.15 * MILLI_USD, // $0.15/1M tokens + "gpt-4o-mini-audio-preview-2024-12-17": 0.15 * MILLI_USD, // $0.15/1M tokens + "gpt-3.5-turbo": 0.25, // $0.0005 / 1K tokens + "gpt-3.5-turbo-0301": 0.75, + "gpt-3.5-turbo-0613": 0.75, + "gpt-3.5-turbo-16k": 1.5, // $0.003 / 1K tokens + "gpt-3.5-turbo-16k-0613": 1.5, + "gpt-3.5-turbo-instruct": 0.75, // $0.0015 / 1K tokens + "gpt-3.5-turbo-1106": 0.5, // $0.001 / 1K tokens + "gpt-3.5-turbo-0125": 0.25, // $0.0005 / 1K tokens + "o1": 15 * MILLI_USD, // $15.00 / 1M input tokens + "o1-2024-12-17": 15 * MILLI_USD, + "o1-preview": 15 * MILLI_USD, // $15.00 / 1M input tokens + "o1-preview-2024-09-12": 15 * MILLI_USD, + "o1-mini": 1.1 * MILLI_USD, // $3.00 / 1M input tokens + "o1-mini-2024-09-12": 1.1 * MILLI_USD, + "o3-mini": 1.1 * MILLI_USD, + "o3-mini-2025-01-31": 1.1 * MILLI_USD, + "davinci-002": 1, // $0.002 / 1K tokens + "babbage-002": 0.2, // $0.0004 / 1K tokens + "text-ada-001": 0.2, + "text-babbage-001": 0.25, + "text-curie-001": 1, + "text-davinci-002": 10, + "text-davinci-003": 10, + "text-davinci-edit-001": 10, + "code-davinci-edit-001": 10, + "whisper-1": 15, + "tts-1": 7.5, // $0.015 / 1K characters + "tts-1-1106": 7.5, + "tts-1-hd": 15, // $0.030 / 1K characters + "tts-1-hd-1106": 15, + "davinci": 10, + "curie": 10, + "babbage": 10, + "ada": 10, + "text-embedding-ada-002": 0.05, + "text-embedding-3-small": 0.01, + "text-embedding-3-large": 0.065, + "text-search-ada-doc-001": 10, + "text-moderation-stable": 0.1, + "text-moderation-latest": 0.1, + "dall-e-2": 0.02 * USD, // $0.016 - $0.020 / image + "dall-e-3": 0.04 * USD, // $0.040 - $0.120 / image + // https://www.anthropic.com/api#pricing "claude-instant-1.2": 0.8 / 1000 * USD, "claude-2.0": 8.0 / 1000 * USD, "claude-2.1": 8.0 / 1000 * USD, diff --git a/relay/controller/helper.go b/relay/controller/helper.go index 3cbd90c4..f623193f 100644 --- a/relay/controller/helper.go +++ b/relay/controller/helper.go @@ -8,18 +8,16 @@ import ( "net/http" "strings" - "github.com/songquanpeng/one-api/common/helper" - "github.com/songquanpeng/one-api/relay/constant/role" - "github.com/gin-gonic/gin" - "github.com/songquanpeng/one-api/common" "github.com/songquanpeng/one-api/common/config" + "github.com/songquanpeng/one-api/common/helper" "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/model" "github.com/songquanpeng/one-api/relay/adaptor/openai" billingratio "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/channeltype" + "github.com/songquanpeng/one-api/relay/constant/role" "github.com/songquanpeng/one-api/relay/controller/validator" "github.com/songquanpeng/one-api/relay/meta" relaymodel "github.com/songquanpeng/one-api/relay/model" @@ -116,7 +114,7 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M // we cannot just return, because we may have to return the pre-consumed quota quota = 0 } - quotaDelta := quota - preConsumedQuota + quotaDelta := quota - preConsumedQuota + usage.ToolsCost err := model.PostConsumeTokenQuota(meta.TokenId, quotaDelta) if err != nil { logger.Error(ctx, "error consuming token remain quota: "+err.Error()) @@ -125,7 +123,13 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M if err != nil { logger.Error(ctx, "error update user quota cache: "+err.Error()) } - logContent := fmt.Sprintf("倍率:%.2f × %.2f × %.2f", modelRatio, groupRatio, completionRatio) + + var logContent string + if usage.ToolsCost == 0 { + logContent = fmt.Sprintf("倍率:%.2f × %.2f × %.2f", modelRatio, groupRatio, completionRatio) + } else { + logContent = fmt.Sprintf("倍率:%.2f × %.2f × %.2f, tools cost %d", modelRatio, groupRatio, completionRatio, usage.ToolsCost) + } model.RecordConsumeLog(ctx, &model.Log{ UserId: meta.UserId, ChannelId: meta.ChannelId, diff --git a/relay/controller/text.go b/relay/controller/text.go index f912498a..f70fab6b 100644 --- a/relay/controller/text.go +++ b/relay/controller/text.go @@ -10,6 +10,7 @@ import ( "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/common/config" + "github.com/songquanpeng/one-api/common/ctxkey" "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/relay" "github.com/songquanpeng/one-api/relay/adaptor" @@ -104,6 +105,8 @@ func getRequestBody(c *gin.Context, meta *meta.Meta, textRequest *model.GeneralO logger.Debugf(c.Request.Context(), "converted request failed: %s\n", err.Error()) return nil, err } + c.Set(ctxkey.ConvertedRequest, convertedRequest) + jsonData, err := json.Marshal(convertedRequest) if err != nil { logger.Debugf(c.Request.Context(), "converted request json_marshal_failed: %s\n", err.Error()) diff --git a/relay/model/general.go b/relay/model/general.go index a87928bd..1b09012f 100644 --- a/relay/model/general.go +++ b/relay/model/general.go @@ -25,46 +25,50 @@ type StreamOptions struct { type GeneralOpenAIRequest struct { // https://platform.openai.com/docs/api-reference/chat/create - Messages []Message `json:"messages,omitempty"` - Model string `json:"model,omitempty"` - Store *bool `json:"store,omitempty"` - ReasoningEffort *string `json:"reasoning_effort,omitempty"` - Metadata any `json:"metadata,omitempty"` - FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` - LogitBias any `json:"logit_bias,omitempty"` - Logprobs *bool `json:"logprobs,omitempty"` - TopLogprobs *int `json:"top_logprobs,omitempty"` - MaxTokens int `json:"max_tokens,omitempty"` - MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"` - N int `json:"n,omitempty"` - Modalities []string `json:"modalities,omitempty"` - Prediction any `json:"prediction,omitempty"` - Audio *Audio `json:"audio,omitempty"` - PresencePenalty *float64 `json:"presence_penalty,omitempty"` - ResponseFormat *ResponseFormat `json:"response_format,omitempty"` - Seed float64 `json:"seed,omitempty"` - ServiceTier *string `json:"service_tier,omitempty"` - Stop any `json:"stop,omitempty"` - Stream bool `json:"stream,omitempty"` - StreamOptions *StreamOptions `json:"stream_options,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopP *float64 `json:"top_p,omitempty"` - TopK int `json:"top_k,omitempty"` - Tools []Tool `json:"tools,omitempty"` - ToolChoice any `json:"tool_choice,omitempty"` - ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"` - User string `json:"user,omitempty"` - FunctionCall any `json:"function_call,omitempty"` - Functions any `json:"functions,omitempty"` + Messages []Message `json:"messages,omitempty"` + Model string `json:"model,omitempty"` + Store *bool `json:"store,omitempty"` + Metadata any `json:"metadata,omitempty"` + FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` + LogitBias any `json:"logit_bias,omitempty"` + Logprobs *bool `json:"logprobs,omitempty"` + TopLogprobs *int `json:"top_logprobs,omitempty"` + MaxTokens int `json:"max_tokens,omitempty"` + MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"` + N int `json:"n,omitempty"` + // ReasoningEffort constrains effort on reasoning for reasoning models, reasoning models only. + ReasoningEffort *string `json:"reasoning_effort,omitempty" binding:"omitempty,oneof=low medium high"` + // Modalities currently the model only programmatically allows modalities = [“text”, “audio”] + Modalities []string `json:"modalities,omitempty"` + Prediction any `json:"prediction,omitempty"` + Audio *Audio `json:"audio,omitempty"` + PresencePenalty *float64 `json:"presence_penalty,omitempty"` + ResponseFormat *ResponseFormat `json:"response_format,omitempty"` + Seed float64 `json:"seed,omitempty"` + ServiceTier *string `json:"service_tier,omitempty" binding:"omitempty,oneof=default auto"` + Stop any `json:"stop,omitempty"` + Stream bool `json:"stream,omitempty"` + StreamOptions *StreamOptions `json:"stream_options,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopK int `json:"top_k,omitempty"` + Tools []Tool `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` + ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"` + User string `json:"user,omitempty"` + FunctionCall any `json:"function_call,omitempty"` + Functions any `json:"functions,omitempty"` // https://platform.openai.com/docs/api-reference/embeddings/create Input any `json:"input,omitempty"` EncodingFormat string `json:"encoding_format,omitempty"` Dimensions int `json:"dimensions,omitempty"` // https://platform.openai.com/docs/api-reference/images/create - Prompt any `json:"prompt,omitempty"` - Quality *string `json:"quality,omitempty"` - Size string `json:"size,omitempty"` - Style *string `json:"style,omitempty"` + Prompt string `json:"prompt,omitempty"` + Quality *string `json:"quality,omitempty"` + Size string `json:"size,omitempty"` + Style *string `json:"style,omitempty"` + WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"` + // Others Instruction string `json:"instruction,omitempty"` NumCtx int `json:"num_ctx,omitempty"` @@ -79,6 +83,34 @@ type GeneralOpenAIRequest struct { Thinking *Thinking `json:"thinking,omitempty"` } +// WebSearchOptions is the tool searches the web for relevant results to use in a response. +type WebSearchOptions struct { + // SearchContextSize is the high level guidance for the amount of context window space to use for the search, + // default is "medium". + SearchContextSize *string `json:"search_context_size,omitempty" binding:"omitempty,oneof=low medium high"` + UserLocation *UserLocation `json:"user_location,omitempty"` +} + +// UserLocation is a struct that contains the location of the user. +type UserLocation struct { + // Approximate is the approximate location parameters for the search. + Approximate UserLocationApproximate `json:"approximate" binding:"required"` + // Type is the type of location approximation. + Type string `json:"type" binding:"required,oneof=approximate"` +} + +// UserLocationApproximate is a struct that contains the approximate location of the user. +type UserLocationApproximate struct { + // City is the city of the user, e.g. San Francisco. + City *string `json:"city,omitempty"` + // Country is the country of the user, e.g. US. + Country *string `json:"country,omitempty"` + // Region is the region of the user, e.g. California. + Region *string `json:"region,omitempty"` + // Timezone is the IANA timezone of the user, e.g. America/Los_Angeles. + Timezone *string `json:"timezone,omitempty"` +} + // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#implementing-extended-thinking type Thinking struct { Type string `json:"type"` diff --git a/relay/model/message.go b/relay/model/message.go index 8ab54732..a1ffa615 100644 --- a/relay/model/message.go +++ b/relay/model/message.go @@ -1,13 +1,43 @@ package model +import ( + "context" + "strings" + + "github.com/songquanpeng/one-api/common/logger" +) + +// ReasoningFormat is the format of reasoning content, +// can be set by the reasoning_format parameter in the request url. +type ReasoningFormat string + +const ( + ReasoningFormatUnspecified ReasoningFormat = "" + // ReasoningFormatReasoningContent is the reasoning format used by deepseek official API + ReasoningFormatReasoningContent ReasoningFormat = "reasoning_content" + // ReasoningFormatReasoning is the reasoning format used by openrouter + ReasoningFormatReasoning ReasoningFormat = "reasoning" + + // ReasoningFormatThinkTag is the reasoning format used by 3rd party deepseek-r1 providers. + // + // Deprecated: I believe is a very poor format, especially in stream mode, it is difficult to extract and convert. + // Considering that only a few deepseek-r1 third-party providers use this format, it has been decided to no longer support it. + // ReasoningFormatThinkTag ReasoningFormat = "think-tag" + + // ReasoningFormatThinking is the reasoning format used by anthropic + ReasoningFormatThinking ReasoningFormat = "thinking" +) + type Message struct { Role string `json:"role,omitempty"` // Content is a string or a list of objects - Content any `json:"content,omitempty"` - Name *string `json:"name,omitempty"` - ToolCalls []Tool `json:"tool_calls,omitempty"` - ToolCallId string `json:"tool_call_id,omitempty"` - Audio *messageAudio `json:"audio,omitempty"` + Content any `json:"content,omitempty"` + Name *string `json:"name,omitempty"` + ToolCalls []Tool `json:"tool_calls,omitempty"` + ToolCallId string `json:"tool_call_id,omitempty"` + Audio *messageAudio `json:"audio,omitempty"` + Annotation []AnnotationItem `json:"annotation,omitempty"` + // ------------------------------------- // Deepseek 专有的一些字段 // https://api-docs.deepseek.com/api/create-chat-completion @@ -18,11 +48,52 @@ type Message struct { // Prefix Completion feature as the input for the CoT in the last assistant message. // When using this feature, the prefix parameter must be set to true. ReasoningContent *string `json:"reasoning_content,omitempty"` + // ------------------------------------- // Openrouter // ------------------------------------- Reasoning *string `json:"reasoning,omitempty"` Refusal *bool `json:"refusal,omitempty"` + + // ------------------------------------- + // Anthropic + // ------------------------------------- + Thinking *string `json:"thinking,omitempty"` + Signature *string `json:"signature,omitempty"` +} + +type AnnotationItem struct { + Type string `json:"type" binding:"oneof=url_citation"` + UrlCitation UrlCitation `json:"url_citation"` +} + +// UrlCitation is a URL citation when using web search. +type UrlCitation struct { + // Endpoint is the index of the last character of the URL citation in the message. + EndIndex int `json:"end_index"` + // StartIndex is the index of the first character of the URL citation in the message. + StartIndex int `json:"start_index"` + // Title is the title of the web resource. + Title string `json:"title"` + // Url is the URL of the web resource. + Url string `json:"url"` +} + +// SetReasoningContent sets the reasoning content based on the format +func (m *Message) SetReasoningContent(format string, reasoningContent string) { + switch ReasoningFormat(strings.ToLower(strings.TrimSpace(format))) { + case ReasoningFormatReasoningContent: + m.ReasoningContent = &reasoningContent + // case ReasoningFormatThinkTag: + // m.Content = fmt.Sprintf("%s%s", reasoningContent, m.Content) + case ReasoningFormatThinking: + m.Thinking = &reasoningContent + case ReasoningFormatReasoning, + ReasoningFormatUnspecified: + m.Reasoning = &reasoningContent + default: + logger.Warnf(context.TODO(), "unknown reasoning format: %q", format) + } } type messageAudio struct { @@ -50,6 +121,7 @@ func (m Message) StringContent() string { if !ok { continue } + if contentMap["type"] == ContentTypeText { if subStr, ok := contentMap["text"].(string); ok { contentStr += subStr @@ -58,6 +130,7 @@ func (m Message) StringContent() string { } return contentStr } + return "" } @@ -71,6 +144,7 @@ func (m Message) ParseContent() []MessageContent { }) return contentList } + anyList, ok := m.Content.([]any) if ok { for _, contentItem := range anyList { @@ -95,8 +169,21 @@ func (m Message) ParseContent() []MessageContent { }, }) } + case ContentTypeInputAudio: + if subObj, ok := contentMap["input_audio"].(map[string]any); ok { + contentList = append(contentList, MessageContent{ + Type: ContentTypeInputAudio, + InputAudio: &InputAudio{ + Data: subObj["data"].(string), + Format: subObj["format"].(string), + }, + }) + } + default: + logger.Warnf(context.TODO(), "unknown content type: %s", contentMap["type"]) } } + return contentList } return nil @@ -108,7 +195,23 @@ type ImageURL struct { } type MessageContent struct { - Type string `json:"type,omitempty"` - Text string `json:"text"` - ImageURL *ImageURL `json:"image_url,omitempty"` + // Type should be one of the following: text/input_audio + Type string `json:"type,omitempty"` + Text string `json:"text"` + ImageURL *ImageURL `json:"image_url,omitempty"` + InputAudio *InputAudio `json:"input_audio,omitempty"` + // ------------------------------------- + // Anthropic + // ------------------------------------- + Thinking *string `json:"thinking,omitempty"` + Signature *string `json:"signature,omitempty"` +} + +type InputAudio struct { + // Data is the base64 encoded audio data + Data string `json:"data" binding:"required"` + // Format is the audio format, should be one of the + // following: mp3/mp4/mpeg/mpga/m4a/wav/webm/pcm16. + // When stream=true, format should be pcm16 + Format string `json:"format"` } diff --git a/relay/model/misc.go b/relay/model/misc.go index 9d1f7e4f..0f58bdd3 100644 --- a/relay/model/misc.go +++ b/relay/model/misc.go @@ -1,15 +1,22 @@ package model +// Usage is the token usage information returned by OpenAI API. type Usage struct { PromptTokens int `json:"prompt_tokens"` CompletionTokens int `json:"completion_tokens"` TotalTokens int `json:"total_tokens"` // PromptTokensDetails may be empty for some models - PromptTokensDetails *usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"` + PromptTokensDetails *usagePromptTokensDetails `json:"prompt_tokens_details,omitempty"` // CompletionTokensDetails may be empty for some models - CompletionTokensDetails *usageCompletionTokensDetails `gorm:"-" json:"completion_tokens_details,omitempty"` - ServiceTier string `gorm:"-" json:"service_tier,omitempty"` - SystemFingerprint string `gorm:"-" json:"system_fingerprint,omitempty"` + CompletionTokensDetails *usageCompletionTokensDetails `json:"completion_tokens_details,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + SystemFingerprint string `json:"system_fingerprint,omitempty"` + + // ------------------------------------- + // Custom fields + // ------------------------------------- + // ToolsCost is the cost of using tools, in quota. + ToolsCost int64 `json:"tools_cost,omitempty"` } type Error struct {