From 3a8924d7aff617c79fde8cbd7156947d3e03ba38 Mon Sep 17 00:00:00 2001 From: "Laisky.Cai" Date: Tue, 25 Feb 2025 02:57:37 +0000 Subject: [PATCH] feat: add support for extended reasoning in Claude 3.7 model --- relay/adaptor/anthropic/adaptor.go | 4 ++-- relay/adaptor/anthropic/main.go | 38 ++++++++++++++++++++++++++---- relay/adaptor/anthropic/model.go | 8 +++++++ relay/model/general.go | 10 ++++++++ 4 files changed, 54 insertions(+), 6 deletions(-) diff --git a/relay/adaptor/anthropic/adaptor.go b/relay/adaptor/anthropic/adaptor.go index bd0949be..fe4e2ef0 100644 --- a/relay/adaptor/anthropic/adaptor.go +++ b/relay/adaptor/anthropic/adaptor.go @@ -36,8 +36,8 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Request, meta *me // https://x.com/alexalbert__/status/1812921642143900036 // claude-3-5-sonnet can support 8k context - if strings.HasPrefix(meta.ActualModelName, "claude-3-5-sonnet") { - req.Header.Set("anthropic-beta", "max-tokens-3-5-sonnet-2024-07-15") + if strings.HasPrefix(meta.ActualModelName, "claude-3-7-sonnet") { + req.Header.Set("anthropic-beta", "output-128k-2025-02-19") } return nil diff --git a/relay/adaptor/anthropic/main.go b/relay/adaptor/anthropic/main.go index d3e306c8..9601164b 100644 --- a/relay/adaptor/anthropic/main.go +++ b/relay/adaptor/anthropic/main.go @@ -4,11 +4,12 @@ import ( "bufio" "encoding/json" "fmt" - "github.com/songquanpeng/one-api/common/render" "io" "net/http" "strings" + "github.com/songquanpeng/one-api/common/render" + "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/common" "github.com/songquanpeng/one-api/common/helper" @@ -61,6 +62,7 @@ func ConvertRequest(textRequest model.GeneralOpenAIRequest) *Request { TopK: textRequest.TopK, Stream: textRequest.Stream, Tools: claudeTools, + Thinking: textRequest.Thinking, } if len(claudeTools) > 0 { claudeToolChoice := struct { @@ -149,6 +151,7 @@ func ConvertRequest(textRequest model.GeneralOpenAIRequest) *Request { func StreamResponseClaude2OpenAI(claudeResponse *StreamResponse) (*openai.ChatCompletionsStreamResponse, *Response) { var response *Response var responseText string + var reasoningText string var stopReason string tools := make([]model.Tool, 0) @@ -158,6 +161,10 @@ func StreamResponseClaude2OpenAI(claudeResponse *StreamResponse) (*openai.ChatCo case "content_block_start": if claudeResponse.ContentBlock != nil { responseText = claudeResponse.ContentBlock.Text + if claudeResponse.ContentBlock.Thinking != nil { + reasoningText = *claudeResponse.ContentBlock.Thinking + } + if claudeResponse.ContentBlock.Type == "tool_use" { tools = append(tools, model.Tool{ Id: claudeResponse.ContentBlock.Id, @@ -172,6 +179,10 @@ func StreamResponseClaude2OpenAI(claudeResponse *StreamResponse) (*openai.ChatCo case "content_block_delta": if claudeResponse.Delta != nil { responseText = claudeResponse.Delta.Text + if claudeResponse.Delta.Thinking != nil { + reasoningText = *claudeResponse.Delta.Thinking + } + if claudeResponse.Delta.Type == "input_json_delta" { tools = append(tools, model.Tool{ Function: model.Function{ @@ -189,9 +200,20 @@ func StreamResponseClaude2OpenAI(claudeResponse *StreamResponse) (*openai.ChatCo if claudeResponse.Delta != nil && claudeResponse.Delta.StopReason != nil { stopReason = *claudeResponse.Delta.StopReason } + case "thinking_delta": + if claudeResponse.Delta != nil && claudeResponse.Delta.Thinking != nil { + reasoningText = *claudeResponse.Delta.Thinking + } + case "ping", + "message_stop", + "content_block_stop": + default: + logger.SysErrorf("unknown stream response type %q", claudeResponse.Type) } + var choice openai.ChatCompletionsStreamResponseChoice choice.Delta.Content = responseText + choice.Delta.Reasoning = &reasoningText if len(tools) > 0 { choice.Delta.Content = nil // compatible with other OpenAI derivative applications, like LobeOpenAICompatibleFactory ... choice.Delta.ToolCalls = tools @@ -209,11 +231,15 @@ func StreamResponseClaude2OpenAI(claudeResponse *StreamResponse) (*openai.ChatCo func ResponseClaude2OpenAI(claudeResponse *Response) *openai.TextResponse { var responseText string - if len(claudeResponse.Content) > 0 { - responseText = claudeResponse.Content[0].Text - } + var reasoningText string + tools := make([]model.Tool, 0) for _, v := range claudeResponse.Content { + reasoningText += v.Text + if v.Thinking != nil { + reasoningText += *v.Thinking + } + if v.Type == "tool_use" { args, _ := json.Marshal(v.Input) tools = append(tools, model.Tool{ @@ -231,6 +257,7 @@ func ResponseClaude2OpenAI(claudeResponse *Response) *openai.TextResponse { Message: model.Message{ Role: "assistant", Content: responseText, + Reasoning: &reasoningText, Name: nil, ToolCalls: tools, }, @@ -277,6 +304,8 @@ func StreamHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithStatusC data = strings.TrimPrefix(data, "data:") data = strings.TrimSpace(data) + logger.Debugf(c.Request.Context(), "stream <- %q\n", data) + var claudeResponse StreamResponse err := json.Unmarshal([]byte(data), &claudeResponse) if err != nil { @@ -344,6 +373,7 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st if err != nil { return openai.ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil } + var claudeResponse Response err = json.Unmarshal(responseBody, &claudeResponse) if err != nil { diff --git a/relay/adaptor/anthropic/model.go b/relay/adaptor/anthropic/model.go index 47f193fa..6dd299c4 100644 --- a/relay/adaptor/anthropic/model.go +++ b/relay/adaptor/anthropic/model.go @@ -1,5 +1,7 @@ package anthropic +import "github.com/songquanpeng/one-api/relay/model" + // https://docs.anthropic.com/claude/reference/messages_post type Metadata struct { @@ -22,6 +24,9 @@ type Content struct { Input any `json:"input,omitempty"` Content string `json:"content,omitempty"` ToolUseId string `json:"tool_use_id,omitempty"` + // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#implementing-extended-thinking + Thinking *string `json:"thinking,omitempty"` + Signature *string `json:"signature,omitempty"` } type Message struct { @@ -54,6 +59,7 @@ type Request struct { Tools []Tool `json:"tools,omitempty"` ToolChoice any `json:"tool_choice,omitempty"` //Metadata `json:"metadata,omitempty"` + Thinking *model.Thinking `json:"thinking,omitempty"` } type Usage struct { @@ -84,6 +90,8 @@ type Delta struct { PartialJson string `json:"partial_json,omitempty"` StopReason *string `json:"stop_reason"` StopSequence *string `json:"stop_sequence"` + Thinking *string `json:"thinking,omitempty"` + Signature *string `json:"signature,omitempty"` } type StreamResponse struct { diff --git a/relay/model/general.go b/relay/model/general.go index c26688cd..a87928bd 100644 --- a/relay/model/general.go +++ b/relay/model/general.go @@ -73,6 +73,16 @@ type GeneralOpenAIRequest struct { // ------------------------------------- Provider *openrouter.RequestProvider `json:"provider,omitempty"` IncludeReasoning *bool `json:"include_reasoning,omitempty"` + // ------------------------------------- + // Anthropic + // ------------------------------------- + Thinking *Thinking `json:"thinking,omitempty"` +} + +// https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#implementing-extended-thinking +type Thinking struct { + Type string `json:"type"` + BudgetTokens int `json:"budget_tokens" binding:"omitempty,min=1024"` } func (r GeneralOpenAIRequest) ParseInput() []string {