diff --git a/Dockerfile b/Dockerfile index 346d9c5b..4a48bdc2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,28 @@ FROM --platform=$BUILDPLATFORM node:16 AS builder +# 配置npm使用淘宝镜像源 +RUN npm config set registry https://registry.npmmirror.com && \ + npm config set disturl https://npmmirror.com/dist && \ + npm config set sass_binary_site https://npmmirror.com/mirrors/node-sass/ && \ + npm config set electron_mirror https://npmmirror.com/mirrors/electron/ && \ + npm config set puppeteer_download_host https://npmmirror.com/mirrors && \ + npm config set chromedriver_cdnurl https://npmmirror.com/mirrors/chromedriver && \ + npm config set operadriver_cdnurl https://npmmirror.com/mirrors/operadriver && \ + npm config set phantomjs_cdnurl https://npmmirror.com/mirrors/phantomjs && \ + npm config set selenium_cdnurl https://npmmirror.com/mirrors/selenium && \ + npm config set node_inspector_cdnurl https://npmmirror.com/mirrors/node-inspector + WORKDIR /web COPY ./VERSION . COPY ./web . -RUN npm install --prefix /web/default & \ - npm install --prefix /web/berry & \ - npm install --prefix /web/air & \ +# 并行安装npm依赖,提高构建速度 +RUN npm install --prefix /web/default --prefer-offline --no-audit & \ + npm install --prefix /web/berry --prefer-offline --no-audit & \ + npm install --prefix /web/air --prefer-offline --no-audit & \ wait +# 并行构建前端项目,提高构建速度 RUN DISABLE_ESLINT_PLUGIN='true' REACT_APP_VERSION=$(cat ./VERSION) npm run build --prefix /web/default & \ DISABLE_ESLINT_PLUGIN='true' REACT_APP_VERSION=$(cat ./VERSION) npm run build --prefix /web/berry & \ DISABLE_ESLINT_PLUGIN='true' REACT_APP_VERSION=$(cat ./VERSION) npm run build --prefix /web/air & \ @@ -16,16 +30,22 @@ RUN DISABLE_ESLINT_PLUGIN='true' REACT_APP_VERSION=$(cat ./VERSION) npm run buil FROM golang:alpine AS builder2 -RUN apk add --no-cache \ +# 配置Go使用国内镜像源 +ENV GOPROXY=https://goproxy.cn,direct \ + GOSUMDB=sum.golang.google.cn \ + GO111MODULE=on \ + CGO_ENABLED=1 \ + GOOS=linux + +# 使用阿里云 Alpine 源以加速 apk 包安装 +RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories && \ + apk update && \ + apk add --no-cache \ gcc \ musl-dev \ sqlite-dev \ build-base -ENV GO111MODULE=on \ - CGO_ENABLED=1 \ - GOOS=linux - WORKDIR /build ADD go.mod go.sum ./ @@ -38,7 +58,10 @@ RUN go build -trimpath -ldflags "-s -w -X 'github.com/songquanpeng/one-api/commo FROM alpine:latest -RUN apk add --no-cache ca-certificates tzdata +# 使用阿里云 Alpine 源以加速 apk 包安装 +RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories && \ + apk update && \ + apk add --no-cache ca-certificates tzdata COPY --from=builder2 /build/one-api / diff --git a/controller/relay.go b/controller/relay.go index 038123b3..1376623a 100644 --- a/controller/relay.go +++ b/controller/relay.go @@ -36,6 +36,8 @@ func relayHelper(c *gin.Context, relayMode int) *model.ErrorWithStatusCode { err = controller.RelayAudioHelper(c, relayMode) case relaymode.Proxy: err = controller.RelayProxyHelper(c, relayMode) + case relaymode.AnthropicMessages: + err = controller.RelayAnthropicHelper(c) default: err = controller.RelayTextHelper(c) } diff --git a/docker-compose.yml b/docker-compose.yml index 1325a818..ab8a1673 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,9 +1,9 @@ -version: '3.4' - services: one-api: - image: "${REGISTRY:-docker.io}/justsong/one-api:latest" + # image: "${REGISTRY:-docker.io}/justsong/one-api:latest" + build: . container_name: one-api + platform: linux/amd64 restart: always command: --log-dir /app/logs ports: diff --git a/relay/adaptor/anthropic/adaptor.go b/relay/adaptor/anthropic/adaptor.go index bd0949be..8dbb20c0 100644 --- a/relay/adaptor/anthropic/adaptor.go +++ b/relay/adaptor/anthropic/adaptor.go @@ -11,6 +11,14 @@ import ( "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" + "github.com/songquanpeng/one-api/relay/relaymode" +) + +const ( + // NativeAnthropicEndpoint is the endpoint for native Anthropic API + NativeAnthropicEndpoint = "/v1/messages" + // ThirdPartyAnthropicEndpoint is the endpoint for third-party providers supporting Anthropic protocol + ThirdPartyAnthropicEndpoint = "/anthropic/v1/messages" ) type Adaptor struct { @@ -21,7 +29,15 @@ func (a *Adaptor) Init(meta *meta.Meta) { } func (a *Adaptor) GetRequestURL(meta *meta.Meta) (string, error) { - return fmt.Sprintf("%s/v1/messages", meta.BaseURL), nil + // For native Anthropic API + if strings.Contains(meta.BaseURL, "api.anthropic.com") { + return fmt.Sprintf("%s%s", meta.BaseURL, NativeAnthropicEndpoint), nil + } + + // For third-party providers supporting Anthropic protocol (like DeepSeek) + // They typically expose the endpoint at /anthropic/v1/messages + baseURL := strings.TrimSuffix(meta.BaseURL, "/") + return fmt.Sprintf("%s%s", baseURL, ThirdPartyAnthropicEndpoint), nil } func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Request, meta *meta.Meta) error { @@ -47,6 +63,15 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G if request == nil { return nil, errors.New("request is nil") } + + // For native Anthropic protocol requests, return the request as-is (no conversion needed) + if relayMode == relaymode.AnthropicMessages { + // The request should already be in Anthropic format, so we pass it through + // This will be handled by the caller which already has the anthropic request + return request, nil + } + + // For OpenAI to Anthropic conversion (existing functionality) return ConvertRequest(*request), nil } @@ -62,6 +87,17 @@ func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Read } func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) { + // For native Anthropic protocol requests, handle response directly without conversion + if meta.Mode == relaymode.AnthropicMessages { + if meta.IsStream { + err, usage = DirectStreamHandler(c, resp) + } else { + err, usage = DirectHandler(c, resp, meta.PromptTokens, meta.ActualModelName) + } + return + } + + // For OpenAI to Anthropic conversion (existing functionality) if meta.IsStream { err, usage = StreamHandler(c, resp) } else { diff --git a/relay/adaptor/anthropic/main.go b/relay/adaptor/anthropic/main.go index d3e306c8..a52d97d5 100644 --- a/relay/adaptor/anthropic/main.go +++ b/relay/adaptor/anthropic/main.go @@ -4,11 +4,12 @@ import ( "bufio" "encoding/json" "fmt" - "github.com/songquanpeng/one-api/common/render" "io" "net/http" "strings" + "github.com/songquanpeng/one-api/common/render" + "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/common" "github.com/songquanpeng/one-api/common/helper" @@ -89,8 +90,18 @@ func ConvertRequest(textRequest model.GeneralOpenAIRequest) *Request { claudeRequest.Model = "claude-2.1" } for _, message := range textRequest.Messages { - if message.Role == "system" && claudeRequest.System == "" { - claudeRequest.System = message.StringContent() + if message.Role == "system" && claudeRequest.System.IsEmpty() { + // Create a SystemPrompt from the string content + systemPrompt := SystemPrompt{} + systemData, err := json.Marshal(message.StringContent()) // Safely escape string for JSON + if err != nil { + logger.SysError(fmt.Sprintf("Failed to marshal system prompt: %v", err)) + } else { + if err := systemPrompt.UnmarshalJSON(systemData); err != nil { + logger.SysError(fmt.Sprintf("Failed to unmarshal system prompt: %v", err)) + } + claudeRequest.System = systemPrompt + } continue } claudeMessage := Message{ @@ -377,3 +388,128 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st _, err = c.Writer.Write(jsonResponse) return nil, &usage } + +// DirectHandler handles native Anthropic API responses without conversion to OpenAI format +func DirectHandler(c *gin.Context, resp *http.Response, promptTokens int, modelName string) (*model.ErrorWithStatusCode, *model.Usage) { + ctx := c.Request.Context() + logger.Debugf(ctx, "DirectHandler - Response status: %d", resp.StatusCode) + + responseBody, err := io.ReadAll(resp.Body) + if err != nil { + logger.Errorf(ctx, "Failed to read response body: %s", err.Error()) + return openai.ErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil + } + err = resp.Body.Close() + if err != nil { + logger.Errorf(ctx, "Failed to close response body: %s", err.Error()) + return openai.ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil + } + + logger.Debugf(ctx, "Raw response body: %s", string(responseBody)) + + var claudeResponse Response + err = json.Unmarshal(responseBody, &claudeResponse) + if err != nil { + logger.Errorf(ctx, "Failed to unmarshal response: %s", err.Error()) + // If we can't parse as Anthropic response, maybe it's an error response + // Let's try to write it directly and see what happens + c.Writer.Header().Set("Content-Type", "application/json") + c.Writer.WriteHeader(resp.StatusCode) + _, writeErr := c.Writer.Write(responseBody) + if writeErr != nil { + logger.Errorf(ctx, "Failed to write raw response: %s", writeErr.Error()) + return openai.ErrorWrapper(writeErr, "write_response_failed", http.StatusInternalServerError), nil + } + // Return a minimal usage for tracking + usage := &model.Usage{PromptTokens: promptTokens, CompletionTokens: 0, TotalTokens: promptTokens} + return nil, usage + } + + logger.Debugf(ctx, "Parsed response - ID: %s, Model: %s, Usage: %+v", + claudeResponse.Id, claudeResponse.Model, claudeResponse.Usage) + + if claudeResponse.Error.Type != "" { + logger.Errorf(ctx, "Anthropic API error: %s - %s", claudeResponse.Error.Type, claudeResponse.Error.Message) + return &model.ErrorWithStatusCode{ + Error: model.Error{ + Message: claudeResponse.Error.Message, + Type: claudeResponse.Error.Type, + Param: "", + Code: claudeResponse.Error.Type, + }, + StatusCode: resp.StatusCode, + }, nil + } + + // For direct mode, return the response as-is without conversion + usage := model.Usage{ + PromptTokens: claudeResponse.Usage.InputTokens, + CompletionTokens: claudeResponse.Usage.OutputTokens, + TotalTokens: claudeResponse.Usage.InputTokens + claudeResponse.Usage.OutputTokens, + } + + logger.Debugf(ctx, "Usage calculated: %+v", usage) + + // Write the original Anthropic response directly + c.Writer.Header().Set("Content-Type", "application/json") + c.Writer.WriteHeader(resp.StatusCode) + _, err = c.Writer.Write(responseBody) + if err != nil { + logger.Errorf(ctx, "Failed to write response: %s", err.Error()) + return openai.ErrorWrapper(err, "write_response_failed", http.StatusInternalServerError), nil + } + + logger.Debugf(ctx, "Response written successfully") + return nil, &usage +} + +// DirectStreamHandler handles native Anthropic API streaming responses without conversion +func DirectStreamHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithStatusCode, *model.Usage) { + defer resp.Body.Close() + + // Set headers for streaming + c.Writer.Header().Set("Content-Type", "text/event-stream") + c.Writer.Header().Set("Cache-Control", "no-cache") + c.Writer.Header().Set("Connection", "keep-alive") + c.Writer.Header().Set("Access-Control-Allow-Origin", "*") + c.Writer.Header().Set("Access-Control-Allow-Headers", "Content-Type") + c.Writer.WriteHeader(resp.StatusCode) + + // Stream the response directly without conversion + var usage model.Usage + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + data := scanner.Text() + if len(data) < 6 || !strings.HasPrefix(data, "data:") { + continue + } + + // Parse usage information if available + if strings.Contains(data, "\"usage\":") { + var eventData map[string]interface{} + jsonData := strings.TrimPrefix(data, "data:") + jsonData = strings.TrimSpace(jsonData) + if err := json.Unmarshal([]byte(jsonData), &eventData); err == nil { + if usageData, ok := eventData["usage"].(map[string]interface{}); ok { + if inputTokens, ok := usageData["input_tokens"].(float64); ok { + usage.PromptTokens = int(inputTokens) + } + if outputTokens, ok := usageData["output_tokens"].(float64); ok { + usage.CompletionTokens = int(outputTokens) + usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens + } + } + } + } + + // Write data directly to the response + c.Writer.WriteString(data + "\n") + c.Writer.Flush() + } + + if err := scanner.Err(); err != nil { + return openai.ErrorWrapper(err, "stream_read_failed", http.StatusInternalServerError), nil + } + + return nil, &usage +} diff --git a/relay/adaptor/anthropic/model.go b/relay/adaptor/anthropic/model.go index 47f193fa..3078ecbc 100644 --- a/relay/adaptor/anthropic/model.go +++ b/relay/adaptor/anthropic/model.go @@ -1,5 +1,10 @@ package anthropic +import ( + "encoding/json" + "fmt" +) + // https://docs.anthropic.com/claude/reference/messages_post type Metadata struct { @@ -41,18 +46,92 @@ type InputSchema struct { Required any `json:"required,omitempty"` } +// SystemPrompt can handle both string and array formats for the system field +type SystemPrompt struct { + value interface{} +} + +// UnmarshalJSON implements json.Unmarshaler to handle both string and array formats +func (s *SystemPrompt) UnmarshalJSON(data []byte) error { + // Try to unmarshal as string first + var str string + if err := json.Unmarshal(data, &str); err == nil { + s.value = str + return nil + } + + // If that fails, try to unmarshal as array + var arr []interface{} + if err := json.Unmarshal(data, &arr); err == nil { + s.value = arr + return nil + } + + return fmt.Errorf("system field must be either a string or an array") +} + +// MarshalJSON implements json.Marshaler +func (s SystemPrompt) MarshalJSON() ([]byte, error) { + return json.Marshal(s.value) +} + +// String returns the system prompt as a string +func (s SystemPrompt) String() string { + if s.value == nil { + return "" + } + + switch v := s.value.(type) { + case string: + return v + case []interface{}: + // Convert array to string by concatenating text content + var result string + for _, item := range v { + if itemMap, ok := item.(map[string]interface{}); ok { + if text, exists := itemMap["text"]; exists { + if textStr, ok := text.(string); ok { + result += textStr + " " + } + } + } else if str, ok := item.(string); ok { + result += str + " " + } + } + return result + default: + return fmt.Sprintf("%v", v) + } +} + +// IsEmpty returns true if the system prompt is empty +func (s SystemPrompt) IsEmpty() bool { + if s.value == nil { + return true + } + + switch v := s.value.(type) { + case string: + return v == "" + case []interface{}: + return len(v) == 0 + default: + return false + } +} + type Request struct { - Model string `json:"model"` - Messages []Message `json:"messages"` - System string `json:"system,omitempty"` - MaxTokens int `json:"max_tokens,omitempty"` - StopSequences []string `json:"stop_sequences,omitempty"` - Stream bool `json:"stream,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopP *float64 `json:"top_p,omitempty"` - TopK int `json:"top_k,omitempty"` - Tools []Tool `json:"tools,omitempty"` - ToolChoice any `json:"tool_choice,omitempty"` + Model string `json:"model"` + Messages []Message `json:"messages"` + System SystemPrompt `json:"system,omitempty"` + MaxTokens int `json:"max_tokens,omitempty"` + StopSequences []string `json:"stop_sequences,omitempty"` + Stream bool `json:"stream,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopK int `json:"top_k,omitempty"` + Tools []Tool `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` //Metadata `json:"metadata,omitempty"` } diff --git a/relay/adaptor/vertexai/claude/adapter.go b/relay/adaptor/vertexai/claude/adapter.go index cb911cfe..05f767f4 100644 --- a/relay/adaptor/vertexai/claude/adapter.go +++ b/relay/adaptor/vertexai/claude/adapter.go @@ -36,7 +36,7 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G AnthropicVersion: anthropicVersion, // Model: claudeReq.Model, Messages: claudeReq.Messages, - System: claudeReq.System, + System: claudeReq.System.String(), // Convert SystemPrompt to string MaxTokens: claudeReq.MaxTokens, Temperature: claudeReq.Temperature, TopP: claudeReq.TopP, diff --git a/relay/controller/anthropic.go b/relay/controller/anthropic.go new file mode 100644 index 00000000..641eb0f2 --- /dev/null +++ b/relay/controller/anthropic.go @@ -0,0 +1,224 @@ +package controller + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + + "github.com/gin-gonic/gin" + + "github.com/songquanpeng/one-api/common" + "github.com/songquanpeng/one-api/common/config" + "github.com/songquanpeng/one-api/common/logger" + dbmodel "github.com/songquanpeng/one-api/model" + "github.com/songquanpeng/one-api/relay" + "github.com/songquanpeng/one-api/relay/adaptor/anthropic" + "github.com/songquanpeng/one-api/relay/adaptor/openai" + "github.com/songquanpeng/one-api/relay/billing" + billingratio "github.com/songquanpeng/one-api/relay/billing/ratio" + "github.com/songquanpeng/one-api/relay/meta" + "github.com/songquanpeng/one-api/relay/model" +) + +// RelayAnthropicHelper handles native Anthropic API requests (anthropic -> anthropic passthrough) +func RelayAnthropicHelper(c *gin.Context) *model.ErrorWithStatusCode { + ctx := c.Request.Context() + meta := meta.GetByContext(c) + + logger.Infof(ctx, "Anthropic request received - URL: %s", c.Request.URL.String()) + + // get & validate anthropic request + anthropicRequest, err := getAndValidateAnthropicRequest(c) + if err != nil { + logger.Errorf(ctx, "getAndValidateAnthropicRequest failed: %s", err.Error()) + return openai.ErrorWrapper(err, "invalid_anthropic_request", http.StatusBadRequest) + } + logger.Debugf(ctx, "Parsed anthropic request - Model: %s, Stream: %v, Messages: %d", + anthropicRequest.Model, anthropicRequest.Stream, len(anthropicRequest.Messages)) + meta.IsStream = anthropicRequest.Stream + + // map model name + meta.OriginModelName = anthropicRequest.Model + mappedModel, _ := getMappedModelName(anthropicRequest.Model, meta.ModelMapping) + anthropicRequest.Model = mappedModel + meta.ActualModelName = anthropicRequest.Model + + // estimate token usage for anthropic request + promptTokens := estimateAnthropicTokens(anthropicRequest) + meta.PromptTokens = promptTokens + + // get model ratio & group ratio + modelRatio := billingratio.GetModelRatio(anthropicRequest.Model, meta.ChannelType) + groupRatio := billingratio.GetGroupRatio(meta.Group) + ratio := modelRatio * groupRatio + + // pre-consume quota + preConsumedQuota, bizErr := preConsumeQuotaForAnthropic(ctx, anthropicRequest, promptTokens, ratio, meta) + if bizErr != nil { + logger.Warnf(ctx, "preConsumeQuota failed: %+v", *bizErr) + return bizErr + } + + logger.Debugf(ctx, "Meta info - APIType: %d, ChannelType: %d, BaseURL: %s", meta.APIType, meta.ChannelType, meta.BaseURL) + + adaptor := relay.GetAdaptor(meta.APIType) + if adaptor == nil { + logger.Errorf(ctx, "Failed to get adaptor for API type: %d", meta.APIType) + return openai.ErrorWrapper(fmt.Errorf("invalid api type: %d", meta.APIType), "invalid_api_type", http.StatusBadRequest) + } + logger.Debugf(ctx, "Using adaptor: %s", adaptor.GetChannelName()) + adaptor.Init(meta) + + // get request body - for anthropic passthrough, we directly use the request body + requestBody, err := getAnthropicRequestBody(c, anthropicRequest) + if err != nil { + return openai.ErrorWrapper(err, "convert_anthropic_request_failed", http.StatusInternalServerError) + } + + // do request + resp, err := adaptor.DoRequest(c, meta, requestBody) + if err != nil { + logger.Errorf(ctx, "DoRequest failed: %s", err.Error()) + return openai.ErrorWrapper(err, "do_request_failed", http.StatusInternalServerError) + } + logger.Debugf(ctx, "Received response - Status: %d", resp.StatusCode) + + if isErrorHappened(meta, resp) { + logger.Errorf(ctx, "Error detected in response") + billing.ReturnPreConsumedQuota(ctx, preConsumedQuota, meta.TokenId) + return RelayErrorHandler(resp) + } + + // do response - for anthropic native requests, we need to handle the response directly + usage, respErr := handleAnthropicResponse(c, resp, meta) + if respErr != nil { + logger.Errorf(ctx, "respErr is not nil: %+v", respErr) + billing.ReturnPreConsumedQuota(ctx, preConsumedQuota, meta.TokenId) + return respErr + } + + logger.Infof(ctx, "Anthropic request completed - Usage: %+v", usage) + + // post-consume quota - for anthropic, we create a placeholder GeneralOpenAIRequest + placeholderRequest := &model.GeneralOpenAIRequest{ + Model: anthropicRequest.Model, + } + go postConsumeQuota(ctx, usage, meta, placeholderRequest, ratio, preConsumedQuota, modelRatio, groupRatio, false) + return nil +} + +func getAndValidateAnthropicRequest(c *gin.Context) (*anthropic.Request, error) { + anthropicRequest := &anthropic.Request{} + err := common.UnmarshalBodyReusable(c, anthropicRequest) + if err != nil { + return nil, err + } + + // Basic validation + if anthropicRequest.Model == "" { + return nil, fmt.Errorf("model is required") + } + if len(anthropicRequest.Messages) == 0 { + return nil, fmt.Errorf("messages are required") + } + if anthropicRequest.MaxTokens == 0 { + anthropicRequest.MaxTokens = 4096 // default max tokens + } + + return anthropicRequest, nil +} + +func getAnthropicRequestBody(c *gin.Context, anthropicRequest *anthropic.Request) (io.Reader, error) { + // For anthropic native requests, we marshal the request back to JSON + jsonData, err := json.Marshal(anthropicRequest) + if err != nil { + logger.Debugf(c.Request.Context(), "anthropic request json_marshal_failed: %s\n", err.Error()) + return nil, err + } + logger.Debugf(c.Request.Context(), "anthropic request: \n%s", string(jsonData)) + return bytes.NewBuffer(jsonData), nil +} + +const ( + // CHARS_PER_TOKEN represents the rough character-to-token ratio for Anthropic models + // This is a conservative estimate: approximately 1 token per 4 characters + CHARS_PER_TOKEN = 4 +) + +func estimateAnthropicTokens(request *anthropic.Request) int { + // Simple token estimation for Anthropic requests + // This is a rough estimation, actual implementation might need more sophisticated logic + totalTokens := 0 + + // Count tokens in system prompt + if !request.System.IsEmpty() { + systemText := request.System.String() + totalTokens += len(systemText) / CHARS_PER_TOKEN // rough estimate: 1 token per 4 characters + } + + // Count tokens in messages + for _, message := range request.Messages { + for _, content := range message.Content { + if content.Type == "text" { + totalTokens += len(content.Text) / CHARS_PER_TOKEN + } + } + } + + return totalTokens +} + +func handleAnthropicResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (*model.Usage, *model.ErrorWithStatusCode) { + // For anthropic native requests, use direct handlers to maintain Anthropic format + if meta.IsStream { + // Handle streaming response - note: DirectStreamHandler returns (error, usage) + err, usage := anthropic.DirectStreamHandler(c, resp) + return usage, err + } else { + // Handle non-streaming response - note: DirectHandler returns (error, usage) + err, usage := anthropic.DirectHandler(c, resp, meta.PromptTokens, meta.ActualModelName) + return usage, err + } +} + +func preConsumeQuotaForAnthropic(ctx context.Context, request *anthropic.Request, promptTokens int, ratio float64, meta *meta.Meta) (int64, *model.ErrorWithStatusCode) { + // Use the same quota logic as text requests but adapted for Anthropic + preConsumedTokens := config.PreConsumedQuota + int64(promptTokens) + if request.MaxTokens != 0 { + preConsumedTokens += int64(request.MaxTokens) + } + preConsumedQuota := int64(float64(preConsumedTokens) * ratio) + + userQuota, err := dbmodel.CacheGetUserQuota(ctx, meta.UserId) + if err != nil { + return preConsumedQuota, openai.ErrorWrapper(err, "get_user_quota_failed", http.StatusInternalServerError) + } + + if userQuota-preConsumedQuota < 0 { + return preConsumedQuota, openai.ErrorWrapper(fmt.Errorf("user quota is not enough"), "insufficient_user_quota", http.StatusForbidden) + } + + err = dbmodel.CacheDecreaseUserQuota(meta.UserId, preConsumedQuota) + if err != nil { + return preConsumedQuota, openai.ErrorWrapper(err, "decrease_user_quota_failed", http.StatusInternalServerError) + } + + if userQuota > 100*preConsumedQuota { + // in this case, we do not pre-consume quota + // because the user has enough quota + preConsumedQuota = 0 + logger.Info(ctx, fmt.Sprintf("user %d has enough quota %d, trusted and no need to pre-consume", meta.UserId, userQuota)) + } + + if preConsumedQuota > 0 { + err := dbmodel.PreConsumeTokenQuota(meta.TokenId, preConsumedQuota) + if err != nil { + return preConsumedQuota, openai.ErrorWrapper(err, "pre_consume_token_quota_failed", http.StatusForbidden) + } + } + + return preConsumedQuota, nil +} diff --git a/relay/meta/relay_meta.go b/relay/meta/relay_meta.go index 8c74ef8a..53c4365a 100644 --- a/relay/meta/relay_meta.go +++ b/relay/meta/relay_meta.go @@ -8,6 +8,7 @@ import ( "github.com/songquanpeng/one-api/common/ctxkey" "github.com/songquanpeng/one-api/model" + "github.com/songquanpeng/one-api/relay/apitype" "github.com/songquanpeng/one-api/relay/channeltype" "github.com/songquanpeng/one-api/relay/relaymode" ) @@ -62,5 +63,11 @@ func GetByContext(c *gin.Context) *Meta { meta.BaseURL = channeltype.ChannelBaseURLs[meta.ChannelType] } meta.APIType = channeltype.ToAPIType(meta.ChannelType) + + // Force Anthropic API type for native Anthropic protocol requests + if meta.Mode == relaymode.AnthropicMessages { + meta.APIType = apitype.Anthropic + } + return &meta } diff --git a/relay/relaymode/define.go b/relay/relaymode/define.go index aa771205..a453e997 100644 --- a/relay/relaymode/define.go +++ b/relay/relaymode/define.go @@ -13,4 +13,6 @@ const ( AudioTranslation // Proxy is a special relay mode for proxying requests to custom upstream Proxy + // AnthropicMessages is for native Anthropic API messages endpoint + AnthropicMessages ) diff --git a/relay/relaymode/helper.go b/relay/relaymode/helper.go index 2cde5b85..e97768f8 100644 --- a/relay/relaymode/helper.go +++ b/relay/relaymode/helper.go @@ -26,6 +26,8 @@ func GetByPath(path string) int { relayMode = AudioTranslation } else if strings.HasPrefix(path, "/v1/oneapi/proxy") { relayMode = Proxy + } else if strings.HasPrefix(path, "/anthropic/v1/messages") { + relayMode = AnthropicMessages } return relayMode } diff --git a/router/relay.go b/router/relay.go index 8f3c7303..b6551766 100644 --- a/router/relay.go +++ b/router/relay.go @@ -71,4 +71,16 @@ func SetRelayRouter(router *gin.Engine) { relayV1Router.GET("/threads/:id/runs/:runsId/steps/:stepId", controller.RelayNotImplemented) relayV1Router.GET("/threads/:id/runs/:runsId/steps", controller.RelayNotImplemented) } + + // Anthropic API compatibility - https://docs.anthropic.com/claude/reference/ + anthropicRouter := router.Group("/anthropic") + anthropicRouter.Use(middleware.RelayPanicRecover(), middleware.TokenAuth(), middleware.Distribute()) + { + // Models API + anthropicRouter.GET("/v1/models", controller.ListModels) + anthropicRouter.GET("/v1/models/:model", controller.RetrieveModel) + + // Messages API - main endpoint for chat completions + anthropicRouter.POST("/v1/messages", controller.Relay) + } }