From 76e81990260e320412c78288b26dcc1c340257ce Mon Sep 17 00:00:00 2001 From: "Laisky.Cai" Date: Thu, 13 Mar 2025 08:22:04 +0000 Subject: [PATCH] fix: add support for OpenAI web search models in documentation and request handling --- relay/adaptor/openai/adaptor.go | 37 +++++++++++++++++++++++++++++++++ relay/adaptor/palm/palm.go | 14 +++++++++---- relay/adaptor/xunfei/main.go | 7 ++++++- relay/model/general.go | 37 ++++++++++++++++++++------------- 4 files changed, 75 insertions(+), 20 deletions(-) diff --git a/relay/adaptor/openai/adaptor.go b/relay/adaptor/openai/adaptor.go index f6fdfe7d..a340833c 100644 --- a/relay/adaptor/openai/adaptor.go +++ b/relay/adaptor/openai/adaptor.go @@ -120,6 +120,43 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G } request.StreamOptions.IncludeUsage = true } + + // o1/o1-mini/o1-preview do not support system prompt/max_tokens/temperature + if strings.HasPrefix(meta.ActualModelName, "o1") || + strings.HasPrefix(meta.ActualModelName, "o3") { + temperature := float64(1) + request.Temperature = &temperature // Only the default (1) value is supported + + request.MaxTokens = 0 + request.Messages = func(raw []model.Message) (filtered []model.Message) { + for i := range raw { + if raw[i].Role != "system" { + filtered = append(filtered, raw[i]) + } + } + + return + }(request.Messages) + } + + // web search do not support system prompt/max_tokens/temperature + if strings.HasPrefix(meta.ActualModelName, "gpt-4o-search") || + strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search") { + request.Temperature = nil + request.TopP = nil + request.PresencePenalty = nil + request.N = nil + request.FrequencyPenalty = nil + } + + if request.Stream && !config.EnforceIncludeUsage && + (strings.HasPrefix(request.Model, "gpt-4o-audio") || + strings.HasPrefix(request.Model, "gpt-4o-mini-audio")) { + // TODO: Since it is not clear how to implement billing in stream mode, + // it is temporarily not supported + return nil, errors.New("set ENFORCE_INCLUDE_USAGE=true to enable stream mode for gpt-4o-audio") + } + return request, nil } diff --git a/relay/adaptor/palm/palm.go b/relay/adaptor/palm/palm.go index d31784ec..f3875417 100644 --- a/relay/adaptor/palm/palm.go +++ b/relay/adaptor/palm/palm.go @@ -25,11 +25,17 @@ func ConvertRequest(textRequest model.GeneralOpenAIRequest) *ChatRequest { Prompt: Prompt{ Messages: make([]ChatMessage, 0, len(textRequest.Messages)), }, - Temperature: textRequest.Temperature, - CandidateCount: textRequest.N, - TopP: textRequest.TopP, - TopK: textRequest.MaxTokens, + Temperature: textRequest.Temperature, + TopP: textRequest.TopP, + TopK: textRequest.MaxTokens, } + + if textRequest.N != nil { + palmRequest.CandidateCount = *textRequest.N + } else { + palmRequest.CandidateCount = 1 + } + for _, message := range textRequest.Messages { palmMessage := ChatMessage{ Content: message.StringContent(), diff --git a/relay/adaptor/xunfei/main.go b/relay/adaptor/xunfei/main.go index 9a8aef15..b02462fa 100644 --- a/relay/adaptor/xunfei/main.go +++ b/relay/adaptor/xunfei/main.go @@ -41,10 +41,15 @@ func requestOpenAI2Xunfei(request model.GeneralOpenAIRequest, xunfeiAppId string xunfeiRequest.Header.AppId = xunfeiAppId xunfeiRequest.Parameter.Chat.Domain = domain xunfeiRequest.Parameter.Chat.Temperature = request.Temperature - xunfeiRequest.Parameter.Chat.TopK = request.N xunfeiRequest.Parameter.Chat.MaxTokens = request.MaxTokens xunfeiRequest.Payload.Message.Text = messages + if request.N != nil { + xunfeiRequest.Parameter.Chat.TopK = *request.N + } else { + xunfeiRequest.Parameter.Chat.TopK = 1 + } + if strings.HasPrefix(domain, "generalv3") || domain == "4.0Ultra" { functions := make([]model.Function, len(request.Tools)) for i, tool := range request.Tools { diff --git a/relay/model/general.go b/relay/model/general.go index 1b09012f..fd4e5641 100644 --- a/relay/model/general.go +++ b/relay/model/general.go @@ -25,24 +25,31 @@ type StreamOptions struct { type GeneralOpenAIRequest struct { // https://platform.openai.com/docs/api-reference/chat/create - Messages []Message `json:"messages,omitempty"` - Model string `json:"model,omitempty"` - Store *bool `json:"store,omitempty"` - Metadata any `json:"metadata,omitempty"` - FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` - LogitBias any `json:"logit_bias,omitempty"` - Logprobs *bool `json:"logprobs,omitempty"` - TopLogprobs *int `json:"top_logprobs,omitempty"` - MaxTokens int `json:"max_tokens,omitempty"` - MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"` - N int `json:"n,omitempty"` + Messages []Message `json:"messages,omitempty"` + Model string `json:"model,omitempty"` + Store *bool `json:"store,omitempty"` + Metadata any `json:"metadata,omitempty"` + // FrequencyPenalty is a number between -2.0 and 2.0 that penalizes + // new tokens based on their existing frequency in the text so far, + // default is 0. + FrequencyPenalty *float64 `json:"frequency_penalty,omitempty" binding:"omitempty,min=-2,max=2"` + LogitBias any `json:"logit_bias,omitempty"` + Logprobs *bool `json:"logprobs,omitempty"` + TopLogprobs *int `json:"top_logprobs,omitempty"` + MaxTokens int `json:"max_tokens,omitempty"` + MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"` + // N is how many chat completion choices to generate for each input message, + // default to 1. + N *int `json:"n,omitempty" binding:"omitempty,min=1"` // ReasoningEffort constrains effort on reasoning for reasoning models, reasoning models only. ReasoningEffort *string `json:"reasoning_effort,omitempty" binding:"omitempty,oneof=low medium high"` // Modalities currently the model only programmatically allows modalities = [“text”, “audio”] - Modalities []string `json:"modalities,omitempty"` - Prediction any `json:"prediction,omitempty"` - Audio *Audio `json:"audio,omitempty"` - PresencePenalty *float64 `json:"presence_penalty,omitempty"` + Modalities []string `json:"modalities,omitempty"` + Prediction any `json:"prediction,omitempty"` + Audio *Audio `json:"audio,omitempty"` + // PresencePenalty is a number between -2.0 and 2.0 that penalizes + // new tokens based on whether they appear in the text so far, default is 0. + PresencePenalty *float64 `json:"presence_penalty,omitempty" binding:"omitempty,min=-2,max=2"` ResponseFormat *ResponseFormat `json:"response_format,omitempty"` Seed float64 `json:"seed,omitempty"` ServiceTier *string `json:"service_tier,omitempty" binding:"omitempty,oneof=default auto"`