diff --git a/controller/channel-test.go b/controller/channel-test.go index f4d72cfd..b5a41462 100644 --- a/controller/channel-test.go +++ b/controller/channel-test.go @@ -105,6 +105,8 @@ func testChannel(ctx context.Context, channel *model.Channel, request *relaymode if err != nil { return "", err, nil } + c.Set(ctxkey.ConvertedRequest, convertedRequest) + jsonData, err := json.Marshal(convertedRequest) if err != nil { return "", err, nil diff --git a/relay/adaptor/openai/adaptor.go b/relay/adaptor/openai/adaptor.go index 13b3eec8..9b30e325 100644 --- a/relay/adaptor/openai/adaptor.go +++ b/relay/adaptor/openai/adaptor.go @@ -3,12 +3,14 @@ package openai import ( "fmt" "io" + "math" "net/http" "strings" "github.com/gin-gonic/gin" "github.com/pkg/errors" "github.com/songquanpeng/one-api/common/config" + "github.com/songquanpeng/one-api/common/ctxkey" "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/alibailian" @@ -18,6 +20,7 @@ import ( "github.com/songquanpeng/one-api/relay/adaptor/minimax" "github.com/songquanpeng/one-api/relay/adaptor/novita" "github.com/songquanpeng/one-api/relay/adaptor/openrouter" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/channeltype" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" @@ -161,11 +164,16 @@ func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageReques return request, nil } -func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) { +func (a *Adaptor) DoRequest(c *gin.Context, + meta *meta.Meta, + requestBody io.Reader) (*http.Response, error) { return adaptor.DoRequestHelper(a, c, meta, requestBody) } -func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) { +func (a *Adaptor) DoResponse(c *gin.Context, + resp *http.Response, + meta *meta.Meta) (usage *model.Usage, + err *model.ErrorWithStatusCode) { if meta.IsStream { var responseText string err, responseText, usage = StreamHandler(c, resp, meta.Mode) @@ -187,6 +195,52 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met } } + // ------------------------------------- + // calculate web-search tool cost + // ------------------------------------- + searchContextSize := "medium" + var req *model.GeneralOpenAIRequest + if vi, ok := c.Get(ctxkey.ConvertedRequest); ok { + if req, ok = vi.(*model.GeneralOpenAIRequest); ok { + if req != nil && + req.WebSearchOptions != nil && + req.WebSearchOptions.SearchContextSize != nil { + searchContextSize = *req.WebSearchOptions.SearchContextSize + } + + switch { + case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"): + switch searchContextSize { + case "low": + usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.USD)) + case "medium": + usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.USD)) + case "high": + usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.USD)) + default: + return nil, ErrorWrapper( + errors.Errorf("invalid search context size %q", searchContextSize), + "invalid search context size: "+searchContextSize, + http.StatusBadRequest) + } + case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"): + switch searchContextSize { + case "low": + usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.USD)) + case "medium": + usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.USD)) + case "high": + usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.USD)) + default: + return nil, ErrorWrapper( + errors.Errorf("invalid search context size %q", searchContextSize), + "invalid search context size: "+searchContextSize, + http.StatusBadRequest) + } + } + } + } + return } diff --git a/relay/adaptor/openai/constants.go b/relay/adaptor/openai/constants.go index 96ca10a8..f7518894 100644 --- a/relay/adaptor/openai/constants.go +++ b/relay/adaptor/openai/constants.go @@ -25,4 +25,6 @@ var ModelList = []string{ "o1-mini", "o1-mini-2024-09-12", "o3-mini", "o3-mini-2025-01-31", "gpt-4.5-preview", "gpt-4.5-preview-2025-02-27", + // https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat + "gpt-4o-search-preview", "gpt-4o-mini-search-preview", } diff --git a/relay/billing/ratio/model.go b/relay/billing/ratio/model.go index 3cca5199..d2b22a9c 100644 --- a/relay/billing/ratio/model.go +++ b/relay/billing/ratio/model.go @@ -44,8 +44,10 @@ var ModelRatio = map[string]float64{ "gpt-4o-2024-05-13": 2.5, // $0.005 / 1K tokens "gpt-4o-2024-08-06": 1.25, // $0.0025 / 1K tokens "gpt-4o-2024-11-20": 1.25, // $0.0025 / 1K tokens + "gpt-4o-search-preview": 2.5, // $0.005 / 1K tokens "gpt-4o-mini": 0.075, // $0.00015 / 1K tokens "gpt-4o-mini-2024-07-18": 0.075, // $0.00015 / 1K tokens + "gpt-4o-mini-search-preview": 0.075, // $0.00015 / 1K tokens "gpt-4-vision-preview": 5, // $0.01 / 1K tokens // Audio billing will mix text and audio tokens, the unit price is different. // Here records the cost of text, the cost multiplier of audio diff --git a/relay/controller/helper.go b/relay/controller/helper.go index 021f703c..1b368998 100644 --- a/relay/controller/helper.go +++ b/relay/controller/helper.go @@ -118,7 +118,7 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M // we cannot just return, because we may have to return the pre-consumed quota quota = 0 } - quotaDelta := quota - preConsumedQuota + quotaDelta := quota - preConsumedQuota + usage.ToolsCost err := model.PostConsumeTokenQuota(meta.TokenId, quotaDelta) if err != nil { logger.Error(ctx, "error consuming token remain quota: "+err.Error()) @@ -127,7 +127,13 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M if err != nil { logger.Error(ctx, "error update user quota cache: "+err.Error()) } - logContent := fmt.Sprintf("model rate %.2f, group rate %.2f, completion rate %.2f", modelRatio, groupRatio, completionRatio) + + var logContent string + if usage.ToolsCost == 0 { + logContent = fmt.Sprintf("model rate %.2f, group rate %.2f, completion rate %.2f", modelRatio, groupRatio, completionRatio) + } else { + logContent = fmt.Sprintf("model rate %.2f, group rate %.2f, completion rate %.2f, tools cost %d", modelRatio, groupRatio, completionRatio, usage.ToolsCost) + } model.RecordConsumeLog(ctx, &model.Log{ UserId: meta.UserId, ChannelId: meta.ChannelId, diff --git a/relay/controller/text.go b/relay/controller/text.go index c0d94c61..b93094ed 100644 --- a/relay/controller/text.go +++ b/relay/controller/text.go @@ -138,6 +138,8 @@ func getRequestBody(c *gin.Context, meta *metalib.Meta, textRequest *relaymodel. logger.Debugf(c.Request.Context(), "converted request failed: %s\n", err.Error()) return nil, err } + c.Set(ctxkey.ConvertedRequest, convertedRequest) + jsonData, err := json.Marshal(convertedRequest) if err != nil { logger.Debugf(c.Request.Context(), "converted request json_marshal_failed: %s\n", err.Error()) diff --git a/relay/model/general.go b/relay/model/general.go index f4f4b6d0..1b09012f 100644 --- a/relay/model/general.go +++ b/relay/model/general.go @@ -45,7 +45,7 @@ type GeneralOpenAIRequest struct { PresencePenalty *float64 `json:"presence_penalty,omitempty"` ResponseFormat *ResponseFormat `json:"response_format,omitempty"` Seed float64 `json:"seed,omitempty"` - ServiceTier *string `json:"service_tier,omitempty"` + ServiceTier *string `json:"service_tier,omitempty" binding:"omitempty,oneof=default auto"` Stop any `json:"stop,omitempty"` Stream bool `json:"stream,omitempty"` StreamOptions *StreamOptions `json:"stream_options,omitempty"` @@ -63,10 +63,12 @@ type GeneralOpenAIRequest struct { EncodingFormat string `json:"encoding_format,omitempty"` Dimensions int `json:"dimensions,omitempty"` // https://platform.openai.com/docs/api-reference/images/create - Prompt string `json:"prompt,omitempty"` - Quality *string `json:"quality,omitempty"` - Size string `json:"size,omitempty"` - Style *string `json:"style,omitempty"` + Prompt string `json:"prompt,omitempty"` + Quality *string `json:"quality,omitempty"` + Size string `json:"size,omitempty"` + Style *string `json:"style,omitempty"` + WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"` + // Others Instruction string `json:"instruction,omitempty"` NumCtx int `json:"num_ctx,omitempty"` @@ -81,6 +83,34 @@ type GeneralOpenAIRequest struct { Thinking *Thinking `json:"thinking,omitempty"` } +// WebSearchOptions is the tool searches the web for relevant results to use in a response. +type WebSearchOptions struct { + // SearchContextSize is the high level guidance for the amount of context window space to use for the search, + // default is "medium". + SearchContextSize *string `json:"search_context_size,omitempty" binding:"omitempty,oneof=low medium high"` + UserLocation *UserLocation `json:"user_location,omitempty"` +} + +// UserLocation is a struct that contains the location of the user. +type UserLocation struct { + // Approximate is the approximate location parameters for the search. + Approximate UserLocationApproximate `json:"approximate" binding:"required"` + // Type is the type of location approximation. + Type string `json:"type" binding:"required,oneof=approximate"` +} + +// UserLocationApproximate is a struct that contains the approximate location of the user. +type UserLocationApproximate struct { + // City is the city of the user, e.g. San Francisco. + City *string `json:"city,omitempty"` + // Country is the country of the user, e.g. US. + Country *string `json:"country,omitempty"` + // Region is the region of the user, e.g. California. + Region *string `json:"region,omitempty"` + // Timezone is the IANA timezone of the user, e.g. America/Los_Angeles. + Timezone *string `json:"timezone,omitempty"` +} + // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#implementing-extended-thinking type Thinking struct { Type string `json:"type"` diff --git a/relay/model/message.go b/relay/model/message.go index d5c56ad9..a1ffa615 100644 --- a/relay/model/message.go +++ b/relay/model/message.go @@ -31,11 +31,13 @@ const ( type Message struct { Role string `json:"role,omitempty"` // Content is a string or a list of objects - Content any `json:"content,omitempty"` - Name *string `json:"name,omitempty"` - ToolCalls []Tool `json:"tool_calls,omitempty"` - ToolCallId string `json:"tool_call_id,omitempty"` - Audio *messageAudio `json:"audio,omitempty"` + Content any `json:"content,omitempty"` + Name *string `json:"name,omitempty"` + ToolCalls []Tool `json:"tool_calls,omitempty"` + ToolCallId string `json:"tool_call_id,omitempty"` + Audio *messageAudio `json:"audio,omitempty"` + Annotation []AnnotationItem `json:"annotation,omitempty"` + // ------------------------------------- // Deepseek 专有的一些字段 // https://api-docs.deepseek.com/api/create-chat-completion @@ -46,11 +48,13 @@ type Message struct { // Prefix Completion feature as the input for the CoT in the last assistant message. // When using this feature, the prefix parameter must be set to true. ReasoningContent *string `json:"reasoning_content,omitempty"` + // ------------------------------------- // Openrouter // ------------------------------------- Reasoning *string `json:"reasoning,omitempty"` Refusal *bool `json:"refusal,omitempty"` + // ------------------------------------- // Anthropic // ------------------------------------- @@ -58,6 +62,23 @@ type Message struct { Signature *string `json:"signature,omitempty"` } +type AnnotationItem struct { + Type string `json:"type" binding:"oneof=url_citation"` + UrlCitation UrlCitation `json:"url_citation"` +} + +// UrlCitation is a URL citation when using web search. +type UrlCitation struct { + // Endpoint is the index of the last character of the URL citation in the message. + EndIndex int `json:"end_index"` + // StartIndex is the index of the first character of the URL citation in the message. + StartIndex int `json:"start_index"` + // Title is the title of the web resource. + Title string `json:"title"` + // Url is the URL of the web resource. + Url string `json:"url"` +} + // SetReasoningContent sets the reasoning content based on the format func (m *Message) SetReasoningContent(format string, reasoningContent string) { switch ReasoningFormat(strings.ToLower(strings.TrimSpace(format))) { diff --git a/relay/model/misc.go b/relay/model/misc.go index 9d1f7e4f..0f58bdd3 100644 --- a/relay/model/misc.go +++ b/relay/model/misc.go @@ -1,15 +1,22 @@ package model +// Usage is the token usage information returned by OpenAI API. type Usage struct { PromptTokens int `json:"prompt_tokens"` CompletionTokens int `json:"completion_tokens"` TotalTokens int `json:"total_tokens"` // PromptTokensDetails may be empty for some models - PromptTokensDetails *usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"` + PromptTokensDetails *usagePromptTokensDetails `json:"prompt_tokens_details,omitempty"` // CompletionTokensDetails may be empty for some models - CompletionTokensDetails *usageCompletionTokensDetails `gorm:"-" json:"completion_tokens_details,omitempty"` - ServiceTier string `gorm:"-" json:"service_tier,omitempty"` - SystemFingerprint string `gorm:"-" json:"system_fingerprint,omitempty"` + CompletionTokensDetails *usageCompletionTokensDetails `json:"completion_tokens_details,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + SystemFingerprint string `json:"system_fingerprint,omitempty"` + + // ------------------------------------- + // Custom fields + // ------------------------------------- + // ToolsCost is the cost of using tools, in quota. + ToolsCost int64 `json:"tools_cost,omitempty"` } type Error struct {