feat: support openai websearch models

2025-12-27 18:25:57 +08:00 · 2025-03-13 03:37:38 +00:00
parent 6e634b85cf
commit 413fcde382
10 changed files with 341 additions and 120 deletions
--- a/controller/channel-test.go
+++ b/controller/channel-test.go
@@ -106,6 +106,8 @@ func testChannel(ctx context.Context, channel *model.Channel, request *relaymode
 	if err != nil {
 		return "", err, nil
 	}
 	c.Set(ctxkey.ConvertedRequest, convertedRequest)
 	jsonData, err := json.Marshal(convertedRequest)
 	if err != nil {
 		return "", err, nil
--- a/relay/adaptor/cloudflare/main.go
+++ b/relay/adaptor/cloudflare/main.go
@@ -19,9 +19,8 @@ import (
 )
 func ConvertCompletionsRequest(textRequest model.GeneralOpenAIRequest) *Request {
 	p, _ := textRequest.Prompt.(string)
 	return &Request{
-		Prompt:      p,
+		Prompt:      textRequest.Prompt,
 		MaxTokens:   textRequest.MaxTokens,
 		Stream:      textRequest.Stream,
 		Temperature: textRequest.Temperature,
--- a/relay/adaptor/openai/adaptor.go
+++ b/relay/adaptor/openai/adaptor.go
@@ -1,15 +1,17 @@
 package openai
 import (
 	"errors"
 	"fmt"
 	"io"
 	"math"
 	"net/http"
 	"strings"
 	"github.com/gin-gonic/gin"
 	"github.com/pkg/errors"
 	"github.com/songquanpeng/one-api/common/config"
 	"github.com/songquanpeng/one-api/common/ctxkey"
 	"github.com/songquanpeng/one-api/common/logger"
 	"github.com/songquanpeng/one-api/relay/adaptor"
 	"github.com/songquanpeng/one-api/relay/adaptor/alibailian"
@@ -19,6 +21,7 @@ import (
 	"github.com/songquanpeng/one-api/relay/adaptor/minimax"
 	"github.com/songquanpeng/one-api/relay/adaptor/novita"
 	"github.com/songquanpeng/one-api/relay/adaptor/openrouter"
 	"github.com/songquanpeng/one-api/relay/billing/ratio"
 	"github.com/songquanpeng/one-api/relay/channeltype"
 	"github.com/songquanpeng/one-api/relay/meta"
 	"github.com/songquanpeng/one-api/relay/model"
@@ -127,11 +130,16 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
 	return request, nil
 }
-func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
+func (a *Adaptor) DoRequest(c *gin.Context,
 	meta *meta.Meta,
 	requestBody io.Reader) (*http.Response, error) {
 	return adaptor.DoRequestHelper(a, c, meta, requestBody)
 }
-func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) {
+func (a *Adaptor) DoResponse(c *gin.Context,
 	resp *http.Response,
 	meta *meta.Meta) (usage *model.Usage,
 	err *model.ErrorWithStatusCode) {
 	if meta.IsStream {
 		var responseText string
 		err, responseText, usage = StreamHandler(c, resp, meta.Mode)
@@ -150,6 +158,53 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
 			err, usage = Handler(c, resp, meta.PromptTokens, meta.ActualModelName)
 		}
 	}
 	// -------------------------------------
 	// calculate web-search tool cost
 	// -------------------------------------
 	searchContextSize := "medium"
 	var req *model.GeneralOpenAIRequest
 	if vi, ok := c.Get(ctxkey.ConvertedRequest); ok {
 		if req, ok = vi.(*model.GeneralOpenAIRequest); ok {
 			if req != nil &&
 				req.WebSearchOptions != nil &&
 				req.WebSearchOptions.SearchContextSize != nil {
 				searchContextSize = *req.WebSearchOptions.SearchContextSize
 			}
 			switch {
 			case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"):
 				switch searchContextSize {
 				case "low":
 					usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.USD))
 				case "medium":
 					usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.USD))
 				case "high":
 					usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.USD))
 				default:
 					return nil, ErrorWrapper(
 						errors.Errorf("invalid search context size %q", searchContextSize),
 						"invalid search context size: "+searchContextSize,
 						http.StatusBadRequest)
 				}
 			case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"):
 				switch searchContextSize {
 				case "low":
 					usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.USD))
 				case "medium":
 					usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.USD))
 				case "high":
 					usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.USD))
 				default:
 					return nil, ErrorWrapper(
 						errors.Errorf("invalid search context size %q", searchContextSize),
 						"invalid search context size: "+searchContextSize,
 						http.StatusBadRequest)
 				}
 			}
 		}
 	}
 	return
 }
--- a/relay/adaptor/openai/constants.go
+++ b/relay/adaptor/openai/constants.go
@@ -24,4 +24,8 @@ var ModelList = []string{
 	"o1", "o1-2024-12-17",
 	"o1-preview", "o1-preview-2024-09-12",
 	"o1-mini", "o1-mini-2024-09-12",
 	"o3-mini", "o3-mini-2025-01-31",
 	"gpt-4.5-preview", "gpt-4.5-preview-2025-02-27",
 	// https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat
 	"gpt-4o-search-preview", "gpt-4o-mini-search-preview",
 }
--- a/relay/billing/ratio/model.go
+++ b/relay/billing/ratio/model.go
@@ -26,6 +26,8 @@ var modelRatioLock sync.RWMutex
 // 1 === ￥0.014 / 1k tokens
 var ModelRatio = map[string]float64{
 	// https://openai.com/pricing
 	"gpt-4.5-preview":            75 * MILLI_USD,
 	"gpt-4.5-preview-2025-02-27": 75 * MILLI_USD,
 	"gpt-4":                      15,
 	"gpt-4-0314":                 15,
 	"gpt-4-0613":                 15,
@@ -42,9 +44,19 @@ var ModelRatio = map[string]float64{
 	"gpt-4o-2024-05-13":          2.5,   // $0.005 / 1K tokens
 	"gpt-4o-2024-08-06":          1.25,  // $0.0025 / 1K tokens
 	"gpt-4o-2024-11-20":          1.25,  // $0.0025 / 1K tokens
 	"gpt-4o-search-preview":      2.5,   // $0.005 / 1K tokens
 	"gpt-4o-mini":                0.075, // $0.00015 / 1K tokens
 	"gpt-4o-mini-2024-07-18":     0.075, // $0.00015 / 1K tokens
 	"gpt-4o-mini-search-preview": 0.075, // $0.00015 / 1K tokens
 	"gpt-4-vision-preview":       5,     // $0.01 / 1K tokens
 	// Audio billing will mix text and audio tokens, the unit price is different.
 	// Here records the cost of text, the cost multiplier of audio
 	// relative to text is in AudioRatio
 	"gpt-4o-audio-preview":                 1.25,             // $0.0025 / 1K tokens
 	"gpt-4o-audio-preview-2024-12-17":      1.25,             // $0.0025 / 1K tokens
 	"gpt-4o-audio-preview-2024-10-01":      1.25,             // $0.0025 / 1K tokens
 	"gpt-4o-mini-audio-preview":            0.15 * MILLI_USD, // $0.15/1M tokens
 	"gpt-4o-mini-audio-preview-2024-12-17": 0.15 * MILLI_USD, // $0.15/1M tokens
 	"gpt-3.5-turbo":                        0.25,             // $0.0005 / 1K tokens
 	"gpt-3.5-turbo-0301":                   0.75,
 	"gpt-3.5-turbo-0613":                   0.75,
@@ -53,14 +65,14 @@ var ModelRatio = map[string]float64{
 	"gpt-3.5-turbo-instruct":               0.75,           // $0.0015 / 1K tokens
 	"gpt-3.5-turbo-1106":                   0.5,            // $0.001 / 1K tokens
 	"gpt-3.5-turbo-0125":                   0.25,           // $0.0005 / 1K tokens
-	"o1":                      7.5,  // $15.00 / 1M input tokens
+	"o1":                                   15 * MILLI_USD, // $15.00 / 1M input tokens
-	"o1-2024-12-17":           7.5,
+	"o1-2024-12-17":                        15 * MILLI_USD,
-	"o1-preview":              7.5, // $15.00 / 1M input tokens
+	"o1-preview":                           15 * MILLI_USD, // $15.00 / 1M input tokens
-	"o1-preview-2024-09-12":   7.5,
+	"o1-preview-2024-09-12":                15 * MILLI_USD,
-	"o1-mini":                 1.5, // $3.00 / 1M input tokens
+	"o1-mini":                              1.1 * MILLI_USD, // $3.00 / 1M input tokens
-	"o1-mini-2024-09-12":      1.5,
+	"o1-mini-2024-09-12":                   1.1 * MILLI_USD,
-	"o3-mini":                 1.5, // $3.00 / 1M input tokens
+	"o3-mini":                              1.1 * MILLI_USD,
-	"o3-mini-2025-01-31":      1.5,
+	"o3-mini-2025-01-31":                   1.1 * MILLI_USD,
 	"davinci-002":                          1,   // $0.002 / 1K tokens
 	"babbage-002":                          0.2, // $0.0004 / 1K tokens
 	"text-ada-001":                         0.2,
@@ -70,7 +82,7 @@ var ModelRatio = map[string]float64{
 	"text-davinci-003":                     10,
 	"text-davinci-edit-001":                10,
 	"code-davinci-edit-001":                10,
-	"whisper-1":               15,  // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
+	"whisper-1":                            15,
 	"tts-1":                                7.5, // $0.015 / 1K characters
 	"tts-1-1106":                           7.5,
 	"tts-1-hd":                             15, // $0.030 / 1K characters
@@ -87,7 +99,7 @@ var ModelRatio = map[string]float64{
 	"text-moderation-latest":               0.1,
 	"dall-e-2":                             0.02 * USD, // $0.016 - $0.020 / image
 	"dall-e-3":                             0.04 * USD, // $0.040 - $0.120 / image
-	// https://docs.anthropic.com/en/docs/about-claude/models
+	// https://www.anthropic.com/api#pricing
 	"claude-instant-1.2":         0.8 / 1000 * USD,
 	"claude-2.0":                 8.0 / 1000 * USD,
 	"claude-2.1":                 8.0 / 1000 * USD,
--- a/relay/controller/helper.go
+++ b/relay/controller/helper.go
@@ -8,18 +8,16 @@ import (
 	"net/http"
 	"strings"
 	"github.com/songquanpeng/one-api/common/helper"
 	"github.com/songquanpeng/one-api/relay/constant/role"
 	"github.com/gin-gonic/gin"
 	"github.com/songquanpeng/one-api/common"
 	"github.com/songquanpeng/one-api/common/config"
 	"github.com/songquanpeng/one-api/common/helper"
 	"github.com/songquanpeng/one-api/common/logger"
 	"github.com/songquanpeng/one-api/model"
 	"github.com/songquanpeng/one-api/relay/adaptor/openai"
 	billingratio "github.com/songquanpeng/one-api/relay/billing/ratio"
 	"github.com/songquanpeng/one-api/relay/channeltype"
 	"github.com/songquanpeng/one-api/relay/constant/role"
 	"github.com/songquanpeng/one-api/relay/controller/validator"
 	"github.com/songquanpeng/one-api/relay/meta"
 	relaymodel "github.com/songquanpeng/one-api/relay/model"
@@ -116,7 +114,7 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M
 		// we cannot just return, because we may have to return the pre-consumed quota
 		quota = 0
 	}
-	quotaDelta := quota - preConsumedQuota
+	quotaDelta := quota - preConsumedQuota + usage.ToolsCost
 	err := model.PostConsumeTokenQuota(meta.TokenId, quotaDelta)
 	if err != nil {
 		logger.Error(ctx, "error consuming token remain quota: "+err.Error())
@@ -125,7 +123,13 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M
 	if err != nil {
 		logger.Error(ctx, "error update user quota cache: "+err.Error())
 	}
-	logContent := fmt.Sprintf("倍率：%.2f × %.2f × %.2f", modelRatio, groupRatio, completionRatio)
+
 	var logContent string
 	if usage.ToolsCost == 0 {
 		logContent = fmt.Sprintf("倍率：%.2f × %.2f × %.2f", modelRatio, groupRatio, completionRatio)
 	} else {
 		logContent = fmt.Sprintf("倍率：%.2f × %.2f × %.2f, tools cost %d", modelRatio, groupRatio, completionRatio, usage.ToolsCost)
 	}
 	model.RecordConsumeLog(ctx, &model.Log{
 		UserId:            meta.UserId,
 		ChannelId:         meta.ChannelId,
--- a/relay/controller/text.go
+++ b/relay/controller/text.go
@@ -10,6 +10,7 @@ import (
 	"github.com/gin-gonic/gin"
 	"github.com/songquanpeng/one-api/common/config"
 	"github.com/songquanpeng/one-api/common/ctxkey"
 	"github.com/songquanpeng/one-api/common/logger"
 	"github.com/songquanpeng/one-api/relay"
 	"github.com/songquanpeng/one-api/relay/adaptor"
@@ -104,6 +105,8 @@ func getRequestBody(c *gin.Context, meta *meta.Meta, textRequest *model.GeneralO
 		logger.Debugf(c.Request.Context(), "converted request failed: %s\n", err.Error())
 		return nil, err
 	}
 	c.Set(ctxkey.ConvertedRequest, convertedRequest)
 	jsonData, err := json.Marshal(convertedRequest)
 	if err != nil {
 		logger.Debugf(c.Request.Context(), "converted request json_marshal_failed: %s\n", err.Error())
--- a/relay/model/general.go
+++ b/relay/model/general.go
@@ -28,7 +28,6 @@ type GeneralOpenAIRequest struct {
 	Messages            []Message `json:"messages,omitempty"`
 	Model               string    `json:"model,omitempty"`
 	Store               *bool     `json:"store,omitempty"`
 	ReasoningEffort     *string         `json:"reasoning_effort,omitempty"`
 	Metadata            any       `json:"metadata,omitempty"`
 	FrequencyPenalty    *float64  `json:"frequency_penalty,omitempty"`
 	LogitBias           any       `json:"logit_bias,omitempty"`
@@ -37,13 +36,16 @@ type GeneralOpenAIRequest struct {
 	MaxTokens           int       `json:"max_tokens,omitempty"`
 	MaxCompletionTokens *int      `json:"max_completion_tokens,omitempty"`
 	N                   int       `json:"n,omitempty"`
 	// ReasoningEffort constrains effort on reasoning for reasoning models, reasoning models only.
 	ReasoningEffort *string `json:"reasoning_effort,omitempty" binding:"omitempty,oneof=low medium high"`
 	// Modalities currently the model only programmatically allows modalities = [“text”, “audio”]
 	Modalities       []string        `json:"modalities,omitempty"`
 	Prediction       any             `json:"prediction,omitempty"`
 	Audio            *Audio          `json:"audio,omitempty"`
 	PresencePenalty  *float64        `json:"presence_penalty,omitempty"`
 	ResponseFormat   *ResponseFormat `json:"response_format,omitempty"`
 	Seed             float64         `json:"seed,omitempty"`
-	ServiceTier         *string         `json:"service_tier,omitempty"`
+	ServiceTier      *string         `json:"service_tier,omitempty" binding:"omitempty,oneof=default auto"`
 	Stop             any             `json:"stop,omitempty"`
 	Stream           bool            `json:"stream,omitempty"`
 	StreamOptions    *StreamOptions  `json:"stream_options,omitempty"`
@@ -61,10 +63,12 @@ type GeneralOpenAIRequest struct {
 	EncodingFormat string `json:"encoding_format,omitempty"`
 	Dimensions     int    `json:"dimensions,omitempty"`
 	// https://platform.openai.com/docs/api-reference/images/create
-	Prompt  any     `json:"prompt,omitempty"`
+	Prompt           string            `json:"prompt,omitempty"`
 	Quality          *string           `json:"quality,omitempty"`
 	Size             string            `json:"size,omitempty"`
 	Style            *string           `json:"style,omitempty"`
 	WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
 	// Others
 	Instruction string `json:"instruction,omitempty"`
 	NumCtx      int    `json:"num_ctx,omitempty"`
@@ -79,6 +83,34 @@ type GeneralOpenAIRequest struct {
 	Thinking *Thinking `json:"thinking,omitempty"`
 }
 // WebSearchOptions is the tool searches the web for relevant results to use in a response.
 type WebSearchOptions struct {
 	// SearchContextSize is the high level guidance for the amount of context window space to use for the search,
 	// default is "medium".
 	SearchContextSize *string       `json:"search_context_size,omitempty" binding:"omitempty,oneof=low medium high"`
 	UserLocation      *UserLocation `json:"user_location,omitempty"`
 }
 // UserLocation is a struct that contains the location of the user.
 type UserLocation struct {
 	// Approximate is the approximate location parameters for the search.
 	Approximate UserLocationApproximate `json:"approximate" binding:"required"`
 	// Type is the type of location approximation.
 	Type string `json:"type" binding:"required,oneof=approximate"`
 }
 // UserLocationApproximate is a struct that contains the approximate location of the user.
 type UserLocationApproximate struct {
 	// City is the city of the user, e.g. San Francisco.
 	City *string `json:"city,omitempty"`
 	// Country is the country of the user, e.g. US.
 	Country *string `json:"country,omitempty"`
 	// Region is the region of the user, e.g. California.
 	Region *string `json:"region,omitempty"`
 	// Timezone is the IANA timezone of the user, e.g. America/Los_Angeles.
 	Timezone *string `json:"timezone,omitempty"`
 }
 // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#implementing-extended-thinking
 type Thinking struct {
 	Type         string `json:"type"`
--- a/relay/model/message.go
+++ b/relay/model/message.go
@@ -1,5 +1,33 @@
 package model
 import (
 	"context"
 	"strings"
 	"github.com/songquanpeng/one-api/common/logger"
 )
 // ReasoningFormat is the format of reasoning content,
 // can be set by the reasoning_format parameter in the request url.
 type ReasoningFormat string
 const (
 	ReasoningFormatUnspecified ReasoningFormat = ""
 	// ReasoningFormatReasoningContent is the reasoning format used by deepseek official API
 	ReasoningFormatReasoningContent ReasoningFormat = "reasoning_content"
 	// ReasoningFormatReasoning is the reasoning format used by openrouter
 	ReasoningFormatReasoning ReasoningFormat = "reasoning"
 	// ReasoningFormatThinkTag is the reasoning format used by 3rd party deepseek-r1 providers.
 	//
 	// Deprecated: I believe <think> is a very poor format, especially in stream mode, it is difficult to extract and convert.
 	// Considering that only a few deepseek-r1 third-party providers use this format, it has been decided to no longer support it.
 	// ReasoningFormatThinkTag ReasoningFormat = "think-tag"
 	// ReasoningFormatThinking is the reasoning format used by anthropic
 	ReasoningFormatThinking ReasoningFormat = "thinking"
 )
 type Message struct {
 	Role string `json:"role,omitempty"`
 	// Content is a string or a list of objects
@@ -8,6 +36,8 @@ type Message struct {
 	ToolCalls  []Tool           `json:"tool_calls,omitempty"`
 	ToolCallId string           `json:"tool_call_id,omitempty"`
 	Audio      *messageAudio    `json:"audio,omitempty"`
 	Annotation []AnnotationItem `json:"annotation,omitempty"`
 	// -------------------------------------
 	// Deepseek 专有的一些字段
 	// https://api-docs.deepseek.com/api/create-chat-completion
@@ -18,11 +48,52 @@ type Message struct {
 	// Prefix Completion feature as the input for the CoT in the last assistant message.
 	// When using this feature, the prefix parameter must be set to true.
 	ReasoningContent *string `json:"reasoning_content,omitempty"`
 	// -------------------------------------
 	// Openrouter
 	// -------------------------------------
 	Reasoning *string `json:"reasoning,omitempty"`
 	Refusal   *bool   `json:"refusal,omitempty"`
 	// -------------------------------------
 	// Anthropic
 	// -------------------------------------
 	Thinking  *string `json:"thinking,omitempty"`
 	Signature *string `json:"signature,omitempty"`
 }
 type AnnotationItem struct {
 	Type        string      `json:"type" binding:"oneof=url_citation"`
 	UrlCitation UrlCitation `json:"url_citation"`
 }
 // UrlCitation is a URL citation when using web search.
 type UrlCitation struct {
 	// Endpoint is the index of the last character of the URL citation in the message.
 	EndIndex int `json:"end_index"`
 	// StartIndex is the index of the first character of the URL citation in the message.
 	StartIndex int `json:"start_index"`
 	// Title is the title of the web resource.
 	Title string `json:"title"`
 	// Url is the URL of the web resource.
 	Url string `json:"url"`
 }
 // SetReasoningContent sets the reasoning content based on the format
 func (m *Message) SetReasoningContent(format string, reasoningContent string) {
 	switch ReasoningFormat(strings.ToLower(strings.TrimSpace(format))) {
 	case ReasoningFormatReasoningContent:
 		m.ReasoningContent = &reasoningContent
 		// case ReasoningFormatThinkTag:
 		// 	m.Content = fmt.Sprintf("<think>%s</think>%s", reasoningContent, m.Content)
 	case ReasoningFormatThinking:
 		m.Thinking = &reasoningContent
 	case ReasoningFormatReasoning,
 		ReasoningFormatUnspecified:
 		m.Reasoning = &reasoningContent
 	default:
 		logger.Warnf(context.TODO(), "unknown reasoning format: %q", format)
 	}
 }
 type messageAudio struct {
@@ -50,6 +121,7 @@ func (m Message) StringContent() string {
 			if !ok {
 				continue
 			}
 			if contentMap["type"] == ContentTypeText {
 				if subStr, ok := contentMap["text"].(string); ok {
 					contentStr += subStr
@@ -58,6 +130,7 @@ func (m Message) StringContent() string {
 		}
 		return contentStr
 	}
 	return ""
 }
@@ -71,6 +144,7 @@ func (m Message) ParseContent() []MessageContent {
 		})
 		return contentList
 	}
 	anyList, ok := m.Content.([]any)
 	if ok {
 		for _, contentItem := range anyList {
@@ -95,8 +169,21 @@ func (m Message) ParseContent() []MessageContent {
 						},
 					})
 				}
 			case ContentTypeInputAudio:
 				if subObj, ok := contentMap["input_audio"].(map[string]any); ok {
 					contentList = append(contentList, MessageContent{
 						Type: ContentTypeInputAudio,
 						InputAudio: &InputAudio{
 							Data:   subObj["data"].(string),
 							Format: subObj["format"].(string),
 						},
 					})
 				}
 			default:
 				logger.Warnf(context.TODO(), "unknown content type: %s", contentMap["type"])
 			}
 		}
 		return contentList
 	}
 	return nil
@@ -108,7 +195,23 @@ type ImageURL struct {
 }
 type MessageContent struct {
 	// Type should be one of the following: text/input_audio
 	Type       string      `json:"type,omitempty"`
 	Text       string      `json:"text"`
 	ImageURL   *ImageURL   `json:"image_url,omitempty"`
 	InputAudio *InputAudio `json:"input_audio,omitempty"`
 	// -------------------------------------
 	// Anthropic
 	// -------------------------------------
 	Thinking  *string `json:"thinking,omitempty"`
 	Signature *string `json:"signature,omitempty"`
 }
 type InputAudio struct {
 	// Data is the base64 encoded audio data
 	Data string `json:"data" binding:"required"`
 	// Format is the audio format, should be one of the
 	// following: mp3/mp4/mpeg/mpga/m4a/wav/webm/pcm16.
 	// When stream=true, format should be pcm16
 	Format string `json:"format"`
 }
--- a/relay/model/misc.go
+++ b/relay/model/misc.go
@@ -1,15 +1,22 @@
 package model
 // Usage is the token usage information returned by OpenAI API.
 type Usage struct {
 	PromptTokens     int `json:"prompt_tokens"`
 	CompletionTokens int `json:"completion_tokens"`
 	TotalTokens      int `json:"total_tokens"`
 	// PromptTokensDetails may be empty for some models
-	PromptTokensDetails *usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"`
+	PromptTokensDetails *usagePromptTokensDetails `json:"prompt_tokens_details,omitempty"`
 	// CompletionTokensDetails may be empty for some models
-	CompletionTokensDetails *usageCompletionTokensDetails `gorm:"-" json:"completion_tokens_details,omitempty"`
+	CompletionTokensDetails *usageCompletionTokensDetails `json:"completion_tokens_details,omitempty"`
-	ServiceTier             string                        `gorm:"-" json:"service_tier,omitempty"`
+	ServiceTier             string                        `json:"service_tier,omitempty"`
-	SystemFingerprint       string                        `gorm:"-" json:"system_fingerprint,omitempty"`
+	SystemFingerprint       string                        `json:"system_fingerprint,omitempty"`
 	// -------------------------------------
 	// Custom fields
 	// -------------------------------------
 	// ToolsCost is the cost of using tools, in quota.
 	ToolsCost int64 `json:"tools_cost,omitempty"`
 }
 type Error struct {