feat: support openai websearch models

2026-02-12 00:54:24 +08:00 · 2025-03-13 03:37:38 +00:00
parent 6e634b85cf
commit 413fcde382
10 changed files with 341 additions and 120 deletions
--- a/controller/channel-test.go
+++ b/controller/channel-test.go
@@ -106,6 +106,8 @@ func testChannel(ctx context.Context, channel *model.Channel, request *relaymode
 	if err != nil {
 		return "", err, nil
 	}
+	c.Set(ctxkey.ConvertedRequest, convertedRequest)
+
 	jsonData, err := json.Marshal(convertedRequest)
 	if err != nil {
 		return "", err, nil
--- a/relay/adaptor/cloudflare/main.go
+++ b/relay/adaptor/cloudflare/main.go
@@ -19,9 +19,8 @@ import (
 )

 func ConvertCompletionsRequest(textRequest model.GeneralOpenAIRequest) *Request {
-	p, _ := textRequest.Prompt.(string)
 	return &Request{
-		Prompt:      p,
+		Prompt:      textRequest.Prompt,
 		MaxTokens:   textRequest.MaxTokens,
 		Stream:      textRequest.Stream,
 		Temperature: textRequest.Temperature,
--- a/relay/adaptor/openai/adaptor.go
+++ b/relay/adaptor/openai/adaptor.go
@@ -1,15 +1,17 @@
 package openai

 import (
-	"errors"
 	"fmt"
 	"io"
+	"math"
 	"net/http"
 	"strings"

 	"github.com/gin-gonic/gin"
+	"github.com/pkg/errors"

 	"github.com/songquanpeng/one-api/common/config"
+	"github.com/songquanpeng/one-api/common/ctxkey"
 	"github.com/songquanpeng/one-api/common/logger"
 	"github.com/songquanpeng/one-api/relay/adaptor"
 	"github.com/songquanpeng/one-api/relay/adaptor/alibailian"
@@ -19,6 +21,7 @@ import (
 	"github.com/songquanpeng/one-api/relay/adaptor/minimax"
 	"github.com/songquanpeng/one-api/relay/adaptor/novita"
 	"github.com/songquanpeng/one-api/relay/adaptor/openrouter"
+	"github.com/songquanpeng/one-api/relay/billing/ratio"
 	"github.com/songquanpeng/one-api/relay/channeltype"
 	"github.com/songquanpeng/one-api/relay/meta"
 	"github.com/songquanpeng/one-api/relay/model"
@@ -127,11 +130,16 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
 	return request, nil
 }

-func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
+func (a *Adaptor) DoRequest(c *gin.Context,
+	meta *meta.Meta,
+	requestBody io.Reader) (*http.Response, error) {
 	return adaptor.DoRequestHelper(a, c, meta, requestBody)
 }

-func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) {
+func (a *Adaptor) DoResponse(c *gin.Context,
+	resp *http.Response,
+	meta *meta.Meta) (usage *model.Usage,
+	err *model.ErrorWithStatusCode) {
 	if meta.IsStream {
 		var responseText string
 		err, responseText, usage = StreamHandler(c, resp, meta.Mode)
@@ -150,6 +158,53 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
 			err, usage = Handler(c, resp, meta.PromptTokens, meta.ActualModelName)
 		}
 	}
+
+	// -------------------------------------
+	// calculate web-search tool cost
+	// -------------------------------------
+	searchContextSize := "medium"
+	var req *model.GeneralOpenAIRequest
+	if vi, ok := c.Get(ctxkey.ConvertedRequest); ok {
+		if req, ok = vi.(*model.GeneralOpenAIRequest); ok {
+			if req != nil &&
+				req.WebSearchOptions != nil &&
+				req.WebSearchOptions.SearchContextSize != nil {
+				searchContextSize = *req.WebSearchOptions.SearchContextSize
+			}
+
+			switch {
+			case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"):
+				switch searchContextSize {
+				case "low":
+					usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.USD))
+				case "medium":
+					usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.USD))
+				case "high":
+					usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.USD))
+				default:
+					return nil, ErrorWrapper(
+						errors.Errorf("invalid search context size %q", searchContextSize),
+						"invalid search context size: "+searchContextSize,
+						http.StatusBadRequest)
+				}
+			case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"):
+				switch searchContextSize {
+				case "low":
+					usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.USD))
+				case "medium":
+					usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.USD))
+				case "high":
+					usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.USD))
+				default:
+					return nil, ErrorWrapper(
+						errors.Errorf("invalid search context size %q", searchContextSize),
+						"invalid search context size: "+searchContextSize,
+						http.StatusBadRequest)
+				}
+			}
+		}
+	}
+
 	return
 }

--- a/relay/adaptor/openai/constants.go
+++ b/relay/adaptor/openai/constants.go
@@ -24,4 +24,8 @@ var ModelList = []string{
 	"o1", "o1-2024-12-17",
 	"o1-preview", "o1-preview-2024-09-12",
 	"o1-mini", "o1-mini-2024-09-12",
+	"o3-mini", "o3-mini-2025-01-31",
+	"gpt-4.5-preview", "gpt-4.5-preview-2025-02-27",
+	// https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat
+	"gpt-4o-search-preview", "gpt-4o-mini-search-preview",
 }
--- a/relay/billing/ratio/model.go
+++ b/relay/billing/ratio/model.go
@@ -26,68 +26,80 @@ var modelRatioLock sync.RWMutex
 // 1 === ￥0.014 / 1k tokens
 var ModelRatio = map[string]float64{
 	// https://openai.com/pricing
-	"gpt-4":                   15,
-	"gpt-4-0314":              15,
-	"gpt-4-0613":              15,
-	"gpt-4-32k":               30,
-	"gpt-4-32k-0314":          30,
-	"gpt-4-32k-0613":          30,
-	"gpt-4-1106-preview":      5,     // $0.01 / 1K tokens
-	"gpt-4-0125-preview":      5,     // $0.01 / 1K tokens
-	"gpt-4-turbo-preview":     5,     // $0.01 / 1K tokens
-	"gpt-4-turbo":             5,     // $0.01 / 1K tokens
-	"gpt-4-turbo-2024-04-09":  5,     // $0.01 / 1K tokens
-	"gpt-4o":                  2.5,   // $0.005 / 1K tokens
-	"chatgpt-4o-latest":       2.5,   // $0.005 / 1K tokens
-	"gpt-4o-2024-05-13":       2.5,   // $0.005 / 1K tokens
-	"gpt-4o-2024-08-06":       1.25,  // $0.0025 / 1K tokens
-	"gpt-4o-2024-11-20":       1.25,  // $0.0025 / 1K tokens
-	"gpt-4o-mini":             0.075, // $0.00015 / 1K tokens
-	"gpt-4o-mini-2024-07-18":  0.075, // $0.00015 / 1K tokens
-	"gpt-4-vision-preview":    5,     // $0.01 / 1K tokens
-	"gpt-3.5-turbo":           0.25,  // $0.0005 / 1K tokens
-	"gpt-3.5-turbo-0301":      0.75,
-	"gpt-3.5-turbo-0613":      0.75,
-	"gpt-3.5-turbo-16k":       1.5, // $0.003 / 1K tokens
-	"gpt-3.5-turbo-16k-0613":  1.5,
-	"gpt-3.5-turbo-instruct":  0.75, // $0.0015 / 1K tokens
-	"gpt-3.5-turbo-1106":      0.5,  // $0.001 / 1K tokens
-	"gpt-3.5-turbo-0125":      0.25, // $0.0005 / 1K tokens
-	"o1":                      7.5,  // $15.00 / 1M input tokens
-	"o1-2024-12-17":           7.5,
-	"o1-preview":              7.5, // $15.00 / 1M input tokens
-	"o1-preview-2024-09-12":   7.5,
-	"o1-mini":                 1.5, // $3.00 / 1M input tokens
-	"o1-mini-2024-09-12":      1.5,
-	"o3-mini":                 1.5, // $3.00 / 1M input tokens
-	"o3-mini-2025-01-31":      1.5,
-	"davinci-002":             1,   // $0.002 / 1K tokens
-	"babbage-002":             0.2, // $0.0004 / 1K tokens
-	"text-ada-001":            0.2,
-	"text-babbage-001":        0.25,
-	"text-curie-001":          1,
-	"text-davinci-002":        10,
-	"text-davinci-003":        10,
-	"text-davinci-edit-001":   10,
-	"code-davinci-edit-001":   10,
-	"whisper-1":               15,  // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
-	"tts-1":                   7.5, // $0.015 / 1K characters
-	"tts-1-1106":              7.5,
-	"tts-1-hd":                15, // $0.030 / 1K characters
-	"tts-1-hd-1106":           15,
-	"davinci":                 10,
-	"curie":                   10,
-	"babbage":                 10,
-	"ada":                     10,
-	"text-embedding-ada-002":  0.05,
-	"text-embedding-3-small":  0.01,
-	"text-embedding-3-large":  0.065,
-	"text-search-ada-doc-001": 10,
-	"text-moderation-stable":  0.1,
-	"text-moderation-latest":  0.1,
-	"dall-e-2":                0.02 * USD, // $0.016 - $0.020 / image
-	"dall-e-3":                0.04 * USD, // $0.040 - $0.120 / image
-	// https://docs.anthropic.com/en/docs/about-claude/models
+	"gpt-4.5-preview":            75 * MILLI_USD,
+	"gpt-4.5-preview-2025-02-27": 75 * MILLI_USD,
+	"gpt-4":                      15,
+	"gpt-4-0314":                 15,
+	"gpt-4-0613":                 15,
+	"gpt-4-32k":                  30,
+	"gpt-4-32k-0314":             30,
+	"gpt-4-32k-0613":             30,
+	"gpt-4-1106-preview":         5,     // $0.01 / 1K tokens
+	"gpt-4-0125-preview":         5,     // $0.01 / 1K tokens
+	"gpt-4-turbo-preview":        5,     // $0.01 / 1K tokens
+	"gpt-4-turbo":                5,     // $0.01 / 1K tokens
+	"gpt-4-turbo-2024-04-09":     5,     // $0.01 / 1K tokens
+	"gpt-4o":                     2.5,   // $0.005 / 1K tokens
+	"chatgpt-4o-latest":          2.5,   // $0.005 / 1K tokens
+	"gpt-4o-2024-05-13":          2.5,   // $0.005 / 1K tokens
+	"gpt-4o-2024-08-06":          1.25,  // $0.0025 / 1K tokens
+	"gpt-4o-2024-11-20":          1.25,  // $0.0025 / 1K tokens
+	"gpt-4o-search-preview":      2.5,   // $0.005 / 1K tokens
+	"gpt-4o-mini":                0.075, // $0.00015 / 1K tokens
+	"gpt-4o-mini-2024-07-18":     0.075, // $0.00015 / 1K tokens
+	"gpt-4o-mini-search-preview": 0.075, // $0.00015 / 1K tokens
+	"gpt-4-vision-preview":       5,     // $0.01 / 1K tokens
+	// Audio billing will mix text and audio tokens, the unit price is different.
+	// Here records the cost of text, the cost multiplier of audio
+	// relative to text is in AudioRatio
+	"gpt-4o-audio-preview":                 1.25,             // $0.0025 / 1K tokens
+	"gpt-4o-audio-preview-2024-12-17":      1.25,             // $0.0025 / 1K tokens
+	"gpt-4o-audio-preview-2024-10-01":      1.25,             // $0.0025 / 1K tokens
+	"gpt-4o-mini-audio-preview":            0.15 * MILLI_USD, // $0.15/1M tokens
+	"gpt-4o-mini-audio-preview-2024-12-17": 0.15 * MILLI_USD, // $0.15/1M tokens
+	"gpt-3.5-turbo":                        0.25,             // $0.0005 / 1K tokens
+	"gpt-3.5-turbo-0301":                   0.75,
+	"gpt-3.5-turbo-0613":                   0.75,
+	"gpt-3.5-turbo-16k":                    1.5, // $0.003 / 1K tokens
+	"gpt-3.5-turbo-16k-0613":               1.5,
+	"gpt-3.5-turbo-instruct":               0.75,           // $0.0015 / 1K tokens
+	"gpt-3.5-turbo-1106":                   0.5,            // $0.001 / 1K tokens
+	"gpt-3.5-turbo-0125":                   0.25,           // $0.0005 / 1K tokens
+	"o1":                                   15 * MILLI_USD, // $15.00 / 1M input tokens
+	"o1-2024-12-17":                        15 * MILLI_USD,
+	"o1-preview":                           15 * MILLI_USD, // $15.00 / 1M input tokens
+	"o1-preview-2024-09-12":                15 * MILLI_USD,
+	"o1-mini":                              1.1 * MILLI_USD, // $3.00 / 1M input tokens
+	"o1-mini-2024-09-12":                   1.1 * MILLI_USD,
+	"o3-mini":                              1.1 * MILLI_USD,
+	"o3-mini-2025-01-31":                   1.1 * MILLI_USD,
+	"davinci-002":                          1,   // $0.002 / 1K tokens
+	"babbage-002":                          0.2, // $0.0004 / 1K tokens
+	"text-ada-001":                         0.2,
+	"text-babbage-001":                     0.25,
+	"text-curie-001":                       1,
+	"text-davinci-002":                     10,
+	"text-davinci-003":                     10,
+	"text-davinci-edit-001":                10,
+	"code-davinci-edit-001":                10,
+	"whisper-1":                            15,
+	"tts-1":                                7.5, // $0.015 / 1K characters
+	"tts-1-1106":                           7.5,
+	"tts-1-hd":                             15, // $0.030 / 1K characters
+	"tts-1-hd-1106":                        15,
+	"davinci":                              10,
+	"curie":                                10,
+	"babbage":                              10,
+	"ada":                                  10,
+	"text-embedding-ada-002":               0.05,
+	"text-embedding-3-small":               0.01,
+	"text-embedding-3-large":               0.065,
+	"text-search-ada-doc-001":              10,
+	"text-moderation-stable":               0.1,
+	"text-moderation-latest":               0.1,
+	"dall-e-2":                             0.02 * USD, // $0.016 - $0.020 / image
+	"dall-e-3":                             0.04 * USD, // $0.040 - $0.120 / image
+	// https://www.anthropic.com/api#pricing
 	"claude-instant-1.2":         0.8 / 1000 * USD,
 	"claude-2.0":                 8.0 / 1000 * USD,
 	"claude-2.1":                 8.0 / 1000 * USD,
--- a/relay/controller/helper.go
+++ b/relay/controller/helper.go
@@ -8,18 +8,16 @@ import (
 	"net/http"
 	"strings"

-	"github.com/songquanpeng/one-api/common/helper"
-	"github.com/songquanpeng/one-api/relay/constant/role"
-
 	"github.com/gin-gonic/gin"
-
 	"github.com/songquanpeng/one-api/common"
 	"github.com/songquanpeng/one-api/common/config"
+	"github.com/songquanpeng/one-api/common/helper"
 	"github.com/songquanpeng/one-api/common/logger"
 	"github.com/songquanpeng/one-api/model"
 	"github.com/songquanpeng/one-api/relay/adaptor/openai"
 	billingratio "github.com/songquanpeng/one-api/relay/billing/ratio"
 	"github.com/songquanpeng/one-api/relay/channeltype"
+	"github.com/songquanpeng/one-api/relay/constant/role"
 	"github.com/songquanpeng/one-api/relay/controller/validator"
 	"github.com/songquanpeng/one-api/relay/meta"
 	relaymodel "github.com/songquanpeng/one-api/relay/model"
@@ -116,7 +114,7 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M
 		// we cannot just return, because we may have to return the pre-consumed quota
 		quota = 0
 	}
-	quotaDelta := quota - preConsumedQuota
+	quotaDelta := quota - preConsumedQuota + usage.ToolsCost
 	err := model.PostConsumeTokenQuota(meta.TokenId, quotaDelta)
 	if err != nil {
 		logger.Error(ctx, "error consuming token remain quota: "+err.Error())
@@ -125,7 +123,13 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M
 	if err != nil {
 		logger.Error(ctx, "error update user quota cache: "+err.Error())
 	}
-	logContent := fmt.Sprintf("倍率：%.2f × %.2f × %.2f", modelRatio, groupRatio, completionRatio)
+
+	var logContent string
+	if usage.ToolsCost == 0 {
+		logContent = fmt.Sprintf("倍率：%.2f × %.2f × %.2f", modelRatio, groupRatio, completionRatio)
+	} else {
+		logContent = fmt.Sprintf("倍率：%.2f × %.2f × %.2f, tools cost %d", modelRatio, groupRatio, completionRatio, usage.ToolsCost)
+	}
 	model.RecordConsumeLog(ctx, &model.Log{
 		UserId:            meta.UserId,
 		ChannelId:         meta.ChannelId,
--- a/relay/controller/text.go
+++ b/relay/controller/text.go
@@ -10,6 +10,7 @@ import (
 	"github.com/gin-gonic/gin"

 	"github.com/songquanpeng/one-api/common/config"
+	"github.com/songquanpeng/one-api/common/ctxkey"
 	"github.com/songquanpeng/one-api/common/logger"
 	"github.com/songquanpeng/one-api/relay"
 	"github.com/songquanpeng/one-api/relay/adaptor"
@@ -104,6 +105,8 @@ func getRequestBody(c *gin.Context, meta *meta.Meta, textRequest *model.GeneralO
 		logger.Debugf(c.Request.Context(), "converted request failed: %s\n", err.Error())
 		return nil, err
 	}
+	c.Set(ctxkey.ConvertedRequest, convertedRequest)
+
 	jsonData, err := json.Marshal(convertedRequest)
 	if err != nil {
 		logger.Debugf(c.Request.Context(), "converted request json_marshal_failed: %s\n", err.Error())
--- a/relay/model/general.go
+++ b/relay/model/general.go
@@ -25,46 +25,50 @@ type StreamOptions struct {

 type GeneralOpenAIRequest struct {
 	// https://platform.openai.com/docs/api-reference/chat/create
-	Messages            []Message       `json:"messages,omitempty"`
-	Model               string          `json:"model,omitempty"`
-	Store               *bool           `json:"store,omitempty"`
-	ReasoningEffort     *string         `json:"reasoning_effort,omitempty"`
-	Metadata            any             `json:"metadata,omitempty"`
-	FrequencyPenalty    *float64        `json:"frequency_penalty,omitempty"`
-	LogitBias           any             `json:"logit_bias,omitempty"`
-	Logprobs            *bool           `json:"logprobs,omitempty"`
-	TopLogprobs         *int            `json:"top_logprobs,omitempty"`
-	MaxTokens           int             `json:"max_tokens,omitempty"`
-	MaxCompletionTokens *int            `json:"max_completion_tokens,omitempty"`
-	N                   int             `json:"n,omitempty"`
-	Modalities          []string        `json:"modalities,omitempty"`
-	Prediction          any             `json:"prediction,omitempty"`
-	Audio               *Audio          `json:"audio,omitempty"`
-	PresencePenalty     *float64        `json:"presence_penalty,omitempty"`
-	ResponseFormat      *ResponseFormat `json:"response_format,omitempty"`
-	Seed                float64         `json:"seed,omitempty"`
-	ServiceTier         *string         `json:"service_tier,omitempty"`
-	Stop                any             `json:"stop,omitempty"`
-	Stream              bool            `json:"stream,omitempty"`
-	StreamOptions       *StreamOptions  `json:"stream_options,omitempty"`
-	Temperature         *float64        `json:"temperature,omitempty"`
-	TopP                *float64        `json:"top_p,omitempty"`
-	TopK                int             `json:"top_k,omitempty"`
-	Tools               []Tool          `json:"tools,omitempty"`
-	ToolChoice          any             `json:"tool_choice,omitempty"`
-	ParallelTooCalls    *bool           `json:"parallel_tool_calls,omitempty"`
-	User                string          `json:"user,omitempty"`
-	FunctionCall        any             `json:"function_call,omitempty"`
-	Functions           any             `json:"functions,omitempty"`
+	Messages            []Message `json:"messages,omitempty"`
+	Model               string    `json:"model,omitempty"`
+	Store               *bool     `json:"store,omitempty"`
+	Metadata            any       `json:"metadata,omitempty"`
+	FrequencyPenalty    *float64  `json:"frequency_penalty,omitempty"`
+	LogitBias           any       `json:"logit_bias,omitempty"`
+	Logprobs            *bool     `json:"logprobs,omitempty"`
+	TopLogprobs         *int      `json:"top_logprobs,omitempty"`
+	MaxTokens           int       `json:"max_tokens,omitempty"`
+	MaxCompletionTokens *int      `json:"max_completion_tokens,omitempty"`
+	N                   int       `json:"n,omitempty"`
+	// ReasoningEffort constrains effort on reasoning for reasoning models, reasoning models only.
+	ReasoningEffort *string `json:"reasoning_effort,omitempty" binding:"omitempty,oneof=low medium high"`
+	// Modalities currently the model only programmatically allows modalities = [“text”, “audio”]
+	Modalities       []string        `json:"modalities,omitempty"`
+	Prediction       any             `json:"prediction,omitempty"`
+	Audio            *Audio          `json:"audio,omitempty"`
+	PresencePenalty  *float64        `json:"presence_penalty,omitempty"`
+	ResponseFormat   *ResponseFormat `json:"response_format,omitempty"`
+	Seed             float64         `json:"seed,omitempty"`
+	ServiceTier      *string         `json:"service_tier,omitempty" binding:"omitempty,oneof=default auto"`
+	Stop             any             `json:"stop,omitempty"`
+	Stream           bool            `json:"stream,omitempty"`
+	StreamOptions    *StreamOptions  `json:"stream_options,omitempty"`
+	Temperature      *float64        `json:"temperature,omitempty"`
+	TopP             *float64        `json:"top_p,omitempty"`
+	TopK             int             `json:"top_k,omitempty"`
+	Tools            []Tool          `json:"tools,omitempty"`
+	ToolChoice       any             `json:"tool_choice,omitempty"`
+	ParallelTooCalls *bool           `json:"parallel_tool_calls,omitempty"`
+	User             string          `json:"user,omitempty"`
+	FunctionCall     any             `json:"function_call,omitempty"`
+	Functions        any             `json:"functions,omitempty"`
 	// https://platform.openai.com/docs/api-reference/embeddings/create
 	Input          any    `json:"input,omitempty"`
 	EncodingFormat string `json:"encoding_format,omitempty"`
 	Dimensions     int    `json:"dimensions,omitempty"`
 	// https://platform.openai.com/docs/api-reference/images/create
-	Prompt  any     `json:"prompt,omitempty"`
-	Quality *string `json:"quality,omitempty"`
-	Size    string  `json:"size,omitempty"`
-	Style   *string `json:"style,omitempty"`
+	Prompt           string            `json:"prompt,omitempty"`
+	Quality          *string           `json:"quality,omitempty"`
+	Size             string            `json:"size,omitempty"`
+	Style            *string           `json:"style,omitempty"`
+	WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
+
 	// Others
 	Instruction string `json:"instruction,omitempty"`
 	NumCtx      int    `json:"num_ctx,omitempty"`
@@ -79,6 +83,34 @@ type GeneralOpenAIRequest struct {
 	Thinking *Thinking `json:"thinking,omitempty"`
 }

+// WebSearchOptions is the tool searches the web for relevant results to use in a response.
+type WebSearchOptions struct {
+	// SearchContextSize is the high level guidance for the amount of context window space to use for the search,
+	// default is "medium".
+	SearchContextSize *string       `json:"search_context_size,omitempty" binding:"omitempty,oneof=low medium high"`
+	UserLocation      *UserLocation `json:"user_location,omitempty"`
+}
+
+// UserLocation is a struct that contains the location of the user.
+type UserLocation struct {
+	// Approximate is the approximate location parameters for the search.
+	Approximate UserLocationApproximate `json:"approximate" binding:"required"`
+	// Type is the type of location approximation.
+	Type string `json:"type" binding:"required,oneof=approximate"`
+}
+
+// UserLocationApproximate is a struct that contains the approximate location of the user.
+type UserLocationApproximate struct {
+	// City is the city of the user, e.g. San Francisco.
+	City *string `json:"city,omitempty"`
+	// Country is the country of the user, e.g. US.
+	Country *string `json:"country,omitempty"`
+	// Region is the region of the user, e.g. California.
+	Region *string `json:"region,omitempty"`
+	// Timezone is the IANA timezone of the user, e.g. America/Los_Angeles.
+	Timezone *string `json:"timezone,omitempty"`
+}
+
 // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#implementing-extended-thinking
 type Thinking struct {
 	Type         string `json:"type"`
--- a/relay/model/message.go
+++ b/relay/model/message.go
@@ -1,13 +1,43 @@
 package model

+import (
+	"context"
+	"strings"
+
+	"github.com/songquanpeng/one-api/common/logger"
+)
+
+// ReasoningFormat is the format of reasoning content,
+// can be set by the reasoning_format parameter in the request url.
+type ReasoningFormat string
+
+const (
+	ReasoningFormatUnspecified ReasoningFormat = ""
+	// ReasoningFormatReasoningContent is the reasoning format used by deepseek official API
+	ReasoningFormatReasoningContent ReasoningFormat = "reasoning_content"
+	// ReasoningFormatReasoning is the reasoning format used by openrouter
+	ReasoningFormatReasoning ReasoningFormat = "reasoning"
+
+	// ReasoningFormatThinkTag is the reasoning format used by 3rd party deepseek-r1 providers.
+	//
+	// Deprecated: I believe <think> is a very poor format, especially in stream mode, it is difficult to extract and convert.
+	// Considering that only a few deepseek-r1 third-party providers use this format, it has been decided to no longer support it.
+	// ReasoningFormatThinkTag ReasoningFormat = "think-tag"
+
+	// ReasoningFormatThinking is the reasoning format used by anthropic
+	ReasoningFormatThinking ReasoningFormat = "thinking"
+)
+
 type Message struct {
 	Role string `json:"role,omitempty"`
 	// Content is a string or a list of objects
-	Content    any           `json:"content,omitempty"`
-	Name       *string       `json:"name,omitempty"`
-	ToolCalls  []Tool        `json:"tool_calls,omitempty"`
-	ToolCallId string        `json:"tool_call_id,omitempty"`
-	Audio      *messageAudio `json:"audio,omitempty"`
+	Content    any              `json:"content,omitempty"`
+	Name       *string          `json:"name,omitempty"`
+	ToolCalls  []Tool           `json:"tool_calls,omitempty"`
+	ToolCallId string           `json:"tool_call_id,omitempty"`
+	Audio      *messageAudio    `json:"audio,omitempty"`
+	Annotation []AnnotationItem `json:"annotation,omitempty"`
+
 	// -------------------------------------
 	// Deepseek 专有的一些字段
 	// https://api-docs.deepseek.com/api/create-chat-completion
@@ -18,11 +48,52 @@ type Message struct {
 	// Prefix Completion feature as the input for the CoT in the last assistant message.
 	// When using this feature, the prefix parameter must be set to true.
 	ReasoningContent *string `json:"reasoning_content,omitempty"`
+
 	// -------------------------------------
 	// Openrouter
 	// -------------------------------------
 	Reasoning *string `json:"reasoning,omitempty"`
 	Refusal   *bool   `json:"refusal,omitempty"`
+
+	// -------------------------------------
+	// Anthropic
+	// -------------------------------------
+	Thinking  *string `json:"thinking,omitempty"`
+	Signature *string `json:"signature,omitempty"`
+}
+
+type AnnotationItem struct {
+	Type        string      `json:"type" binding:"oneof=url_citation"`
+	UrlCitation UrlCitation `json:"url_citation"`
+}
+
+// UrlCitation is a URL citation when using web search.
+type UrlCitation struct {
+	// Endpoint is the index of the last character of the URL citation in the message.
+	EndIndex int `json:"end_index"`
+	// StartIndex is the index of the first character of the URL citation in the message.
+	StartIndex int `json:"start_index"`
+	// Title is the title of the web resource.
+	Title string `json:"title"`
+	// Url is the URL of the web resource.
+	Url string `json:"url"`
+}
+
+// SetReasoningContent sets the reasoning content based on the format
+func (m *Message) SetReasoningContent(format string, reasoningContent string) {
+	switch ReasoningFormat(strings.ToLower(strings.TrimSpace(format))) {
+	case ReasoningFormatReasoningContent:
+		m.ReasoningContent = &reasoningContent
+		// case ReasoningFormatThinkTag:
+		// 	m.Content = fmt.Sprintf("<think>%s</think>%s", reasoningContent, m.Content)
+	case ReasoningFormatThinking:
+		m.Thinking = &reasoningContent
+	case ReasoningFormatReasoning,
+		ReasoningFormatUnspecified:
+		m.Reasoning = &reasoningContent
+	default:
+		logger.Warnf(context.TODO(), "unknown reasoning format: %q", format)
+	}
 }

 type messageAudio struct {
@@ -50,6 +121,7 @@ func (m Message) StringContent() string {
 			if !ok {
 				continue
 			}
+
 			if contentMap["type"] == ContentTypeText {
 				if subStr, ok := contentMap["text"].(string); ok {
 					contentStr += subStr
@@ -58,6 +130,7 @@ func (m Message) StringContent() string {
 		}
 		return contentStr
 	}
+
 	return ""
 }

@@ -71,6 +144,7 @@ func (m Message) ParseContent() []MessageContent {
 		})
 		return contentList
 	}
+
 	anyList, ok := m.Content.([]any)
 	if ok {
 		for _, contentItem := range anyList {
@@ -95,8 +169,21 @@ func (m Message) ParseContent() []MessageContent {
 						},
 					})
 				}
+			case ContentTypeInputAudio:
+				if subObj, ok := contentMap["input_audio"].(map[string]any); ok {
+					contentList = append(contentList, MessageContent{
+						Type: ContentTypeInputAudio,
+						InputAudio: &InputAudio{
+							Data:   subObj["data"].(string),
+							Format: subObj["format"].(string),
+						},
+					})
+				}
+			default:
+				logger.Warnf(context.TODO(), "unknown content type: %s", contentMap["type"])
 			}
 		}
+
 		return contentList
 	}
 	return nil
@@ -108,7 +195,23 @@ type ImageURL struct {
 }

 type MessageContent struct {
-	Type     string    `json:"type,omitempty"`
-	Text     string    `json:"text"`
-	ImageURL *ImageURL `json:"image_url,omitempty"`
+	// Type should be one of the following: text/input_audio
+	Type       string      `json:"type,omitempty"`
+	Text       string      `json:"text"`
+	ImageURL   *ImageURL   `json:"image_url,omitempty"`
+	InputAudio *InputAudio `json:"input_audio,omitempty"`
+	// -------------------------------------
+	// Anthropic
+	// -------------------------------------
+	Thinking  *string `json:"thinking,omitempty"`
+	Signature *string `json:"signature,omitempty"`
+}
+
+type InputAudio struct {
+	// Data is the base64 encoded audio data
+	Data string `json:"data" binding:"required"`
+	// Format is the audio format, should be one of the
+	// following: mp3/mp4/mpeg/mpga/m4a/wav/webm/pcm16.
+	// When stream=true, format should be pcm16
+	Format string `json:"format"`
 }
--- a/relay/model/misc.go
+++ b/relay/model/misc.go
@@ -1,15 +1,22 @@
 package model

+// Usage is the token usage information returned by OpenAI API.
 type Usage struct {
 	PromptTokens     int `json:"prompt_tokens"`
 	CompletionTokens int `json:"completion_tokens"`
 	TotalTokens      int `json:"total_tokens"`
 	// PromptTokensDetails may be empty for some models
-	PromptTokensDetails *usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"`
+	PromptTokensDetails *usagePromptTokensDetails `json:"prompt_tokens_details,omitempty"`
 	// CompletionTokensDetails may be empty for some models
-	CompletionTokensDetails *usageCompletionTokensDetails `gorm:"-" json:"completion_tokens_details,omitempty"`
-	ServiceTier             string                        `gorm:"-" json:"service_tier,omitempty"`
-	SystemFingerprint       string                        `gorm:"-" json:"system_fingerprint,omitempty"`
+	CompletionTokensDetails *usageCompletionTokensDetails `json:"completion_tokens_details,omitempty"`
+	ServiceTier             string                        `json:"service_tier,omitempty"`
+	SystemFingerprint       string                        `json:"system_fingerprint,omitempty"`
+
+	// -------------------------------------
+	// Custom fields
+	// -------------------------------------
+	// ToolsCost is the cost of using tools, in quota.
+	ToolsCost int64 `json:"tools_cost,omitempty"`
 }

 type Error struct {