From 76e81990260e320412c78288b26dcc1c340257ce Mon Sep 17 00:00:00 2001
From: "Laisky.Cai" <github@laisky.com>
Date: Thu, 13 Mar 2025 08:22:04 +0000
Subject: [PATCH] fix: add support for OpenAI web search models in
 documentation and request handling

---
 relay/adaptor/openai/adaptor.go | 37 +++++++++++++++++++++++++++++++++
 relay/adaptor/palm/palm.go      | 14 +++++++++----
 relay/adaptor/xunfei/main.go    |  7 ++++++-
 relay/model/general.go          | 37 ++++++++++++++++++++-------------
 4 files changed, 75 insertions(+), 20 deletions(-)

diff --git a/relay/adaptor/openai/adaptor.go b/relay/adaptor/openai/adaptor.go
index f6fdfe7d..a340833c 100644
--- a/relay/adaptor/openai/adaptor.go
+++ b/relay/adaptor/openai/adaptor.go
@@ -120,6 +120,43 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
 		}
 		request.StreamOptions.IncludeUsage = true
 	}
+
+	// o1/o1-mini/o1-preview do not support system prompt/max_tokens/temperature
+	if strings.HasPrefix(meta.ActualModelName, "o1") ||
+		strings.HasPrefix(meta.ActualModelName, "o3") {
+		temperature := float64(1)
+		request.Temperature = &temperature // Only the default (1) value is supported
+
+		request.MaxTokens = 0
+		request.Messages = func(raw []model.Message) (filtered []model.Message) {
+			for i := range raw {
+				if raw[i].Role != "system" {
+					filtered = append(filtered, raw[i])
+				}
+			}
+
+			return
+		}(request.Messages)
+	}
+
+	// web search do not support system prompt/max_tokens/temperature
+	if strings.HasPrefix(meta.ActualModelName, "gpt-4o-search") ||
+		strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search") {
+		request.Temperature = nil
+		request.TopP = nil
+		request.PresencePenalty = nil
+		request.N = nil
+		request.FrequencyPenalty = nil
+	}
+
+	if request.Stream && !config.EnforceIncludeUsage &&
+		(strings.HasPrefix(request.Model, "gpt-4o-audio") ||
+			strings.HasPrefix(request.Model, "gpt-4o-mini-audio")) {
+		// TODO: Since it is not clear how to implement billing in stream mode,
+		// it is temporarily not supported
+		return nil, errors.New("set ENFORCE_INCLUDE_USAGE=true to enable stream mode for gpt-4o-audio")
+	}
+
 	return request, nil
 }
 
diff --git a/relay/adaptor/palm/palm.go b/relay/adaptor/palm/palm.go
index d31784ec..f3875417 100644
--- a/relay/adaptor/palm/palm.go
+++ b/relay/adaptor/palm/palm.go
@@ -25,11 +25,17 @@ func ConvertRequest(textRequest model.GeneralOpenAIRequest) *ChatRequest {
 		Prompt: Prompt{
 			Messages: make([]ChatMessage, 0, len(textRequest.Messages)),
 		},
-		Temperature:    textRequest.Temperature,
-		CandidateCount: textRequest.N,
-		TopP:           textRequest.TopP,
-		TopK:           textRequest.MaxTokens,
+		Temperature: textRequest.Temperature,
+		TopP:        textRequest.TopP,
+		TopK:        textRequest.MaxTokens,
 	}
+
+	if textRequest.N != nil {
+		palmRequest.CandidateCount = *textRequest.N
+	} else {
+		palmRequest.CandidateCount = 1
+	}
+
 	for _, message := range textRequest.Messages {
 		palmMessage := ChatMessage{
 			Content: message.StringContent(),
diff --git a/relay/adaptor/xunfei/main.go b/relay/adaptor/xunfei/main.go
index 9a8aef15..b02462fa 100644
--- a/relay/adaptor/xunfei/main.go
+++ b/relay/adaptor/xunfei/main.go
@@ -41,10 +41,15 @@ func requestOpenAI2Xunfei(request model.GeneralOpenAIRequest, xunfeiAppId string
 	xunfeiRequest.Header.AppId = xunfeiAppId
 	xunfeiRequest.Parameter.Chat.Domain = domain
 	xunfeiRequest.Parameter.Chat.Temperature = request.Temperature
-	xunfeiRequest.Parameter.Chat.TopK = request.N
 	xunfeiRequest.Parameter.Chat.MaxTokens = request.MaxTokens
 	xunfeiRequest.Payload.Message.Text = messages
 
+	if request.N != nil {
+		xunfeiRequest.Parameter.Chat.TopK = *request.N
+	} else {
+		xunfeiRequest.Parameter.Chat.TopK = 1
+	}
+
 	if strings.HasPrefix(domain, "generalv3") || domain == "4.0Ultra" {
 		functions := make([]model.Function, len(request.Tools))
 		for i, tool := range request.Tools {
diff --git a/relay/model/general.go b/relay/model/general.go
index 1b09012f..fd4e5641 100644
--- a/relay/model/general.go
+++ b/relay/model/general.go
@@ -25,24 +25,31 @@ type StreamOptions struct {
 
 type GeneralOpenAIRequest struct {
 	// https://platform.openai.com/docs/api-reference/chat/create
-	Messages            []Message `json:"messages,omitempty"`
-	Model               string    `json:"model,omitempty"`
-	Store               *bool     `json:"store,omitempty"`
-	Metadata            any       `json:"metadata,omitempty"`
-	FrequencyPenalty    *float64  `json:"frequency_penalty,omitempty"`
-	LogitBias           any       `json:"logit_bias,omitempty"`
-	Logprobs            *bool     `json:"logprobs,omitempty"`
-	TopLogprobs         *int      `json:"top_logprobs,omitempty"`
-	MaxTokens           int       `json:"max_tokens,omitempty"`
-	MaxCompletionTokens *int      `json:"max_completion_tokens,omitempty"`
-	N                   int       `json:"n,omitempty"`
+	Messages []Message `json:"messages,omitempty"`
+	Model    string    `json:"model,omitempty"`
+	Store    *bool     `json:"store,omitempty"`
+	Metadata any       `json:"metadata,omitempty"`
+	// FrequencyPenalty is a number between -2.0 and 2.0 that penalizes
+	// new tokens based on their existing frequency in the text so far,
+	// default is 0.
+	FrequencyPenalty    *float64 `json:"frequency_penalty,omitempty" binding:"omitempty,min=-2,max=2"`
+	LogitBias           any      `json:"logit_bias,omitempty"`
+	Logprobs            *bool    `json:"logprobs,omitempty"`
+	TopLogprobs         *int     `json:"top_logprobs,omitempty"`
+	MaxTokens           int      `json:"max_tokens,omitempty"`
+	MaxCompletionTokens *int     `json:"max_completion_tokens,omitempty"`
+	// N is how many chat completion choices to generate for each input message,
+	// default to 1.
+	N *int `json:"n,omitempty" binding:"omitempty,min=1"`
 	// ReasoningEffort constrains effort on reasoning for reasoning models, reasoning models only.
 	ReasoningEffort *string `json:"reasoning_effort,omitempty" binding:"omitempty,oneof=low medium high"`
 	// Modalities currently the model only programmatically allows modalities = [“text”, “audio”]
-	Modalities       []string        `json:"modalities,omitempty"`
-	Prediction       any             `json:"prediction,omitempty"`
-	Audio            *Audio          `json:"audio,omitempty"`
-	PresencePenalty  *float64        `json:"presence_penalty,omitempty"`
+	Modalities []string `json:"modalities,omitempty"`
+	Prediction any      `json:"prediction,omitempty"`
+	Audio      *Audio   `json:"audio,omitempty"`
+	// PresencePenalty is a number between -2.0 and 2.0 that penalizes
+	// new tokens based on whether they appear in the text so far, default is 0.
+	PresencePenalty  *float64        `json:"presence_penalty,omitempty" binding:"omitempty,min=-2,max=2"`
 	ResponseFormat   *ResponseFormat `json:"response_format,omitempty"`
 	Seed             float64         `json:"seed,omitempty"`
 	ServiceTier      *string         `json:"service_tier,omitempty" binding:"omitempty,oneof=default auto"`