Merge remote-tracking branch 'origin/upstream/main'

2025-12-26 17:55:58 +08:00 · 2024-10-29 01:15:54 +00:00
parent 8160de80ef 7e51b04221
commit 2e5e6faad7
19 changed files with 112 additions and 37 deletions
--- a/relay/adaptor/anthropic/constants.go
+++ b/relay/adaptor/anthropic/constants.go
@@ -6,4 +6,5 @@ var ModelList = []string{
 	"claude-3-sonnet-20240229",
 	"claude-3-opus-20240229",
 	"claude-3-5-sonnet-20240620",
+	"claude-3-5-sonnet-20241022",
 }
--- a/relay/adaptor/gemini/main.go
+++ b/relay/adaptor/gemini/main.go
@@ -4,18 +4,19 @@ import (
 	"bufio"
 	"encoding/json"
 	"fmt"
-	"github.com/songquanpeng/one-api/common/render"
 	"io"
 	"net/http"
 	"strings"

 	"github.com/gin-gonic/gin"
+
 	"github.com/songquanpeng/one-api/common"
 	"github.com/songquanpeng/one-api/common/config"
 	"github.com/songquanpeng/one-api/common/helper"
 	"github.com/songquanpeng/one-api/common/image"
 	"github.com/songquanpeng/one-api/common/logger"
 	"github.com/songquanpeng/one-api/common/random"
+	"github.com/songquanpeng/one-api/common/render"
 	"github.com/songquanpeng/one-api/relay/adaptor/openai"
 	"github.com/songquanpeng/one-api/relay/constant"
 	"github.com/songquanpeng/one-api/relay/model"
@@ -27,6 +28,11 @@ const (
 	VisionMaxImageNum = 16
 )

+var mimeTypeMap = map[string]string{
+	"json_object": "application/json",
+	"text":        "text/plain",
+}
+
 // Setting safety to the lowest possible values since Gemini is already powerless enough
 func ConvertRequest(textRequest model.GeneralOpenAIRequest) *ChatRequest {
 	geminiRequest := ChatRequest{
@@ -55,6 +61,15 @@ func ConvertRequest(textRequest model.GeneralOpenAIRequest) *ChatRequest {
 			MaxOutputTokens: textRequest.MaxTokens,
 		},
 	}
+	if textRequest.ResponseFormat != nil {
+		if mimeType, ok := mimeTypeMap[textRequest.ResponseFormat.Type]; ok {
+			geminiRequest.GenerationConfig.ResponseMimeType = mimeType
+		}
+		if textRequest.ResponseFormat.JsonSchema != nil {
+			geminiRequest.GenerationConfig.ResponseSchema = textRequest.ResponseFormat.JsonSchema.Schema
+			geminiRequest.GenerationConfig.ResponseMimeType = mimeTypeMap["json_object"]
+		}
+	}
 	if textRequest.Tools != nil {
 		functions := make([]model.Function, 0, len(textRequest.Tools))
 		for _, tool := range textRequest.Tools {
--- a/relay/adaptor/gemini/model.go
+++ b/relay/adaptor/gemini/model.go
@@ -65,10 +65,12 @@ type ChatTools struct {
 }

 type ChatGenerationConfig struct {
-	Temperature     float64  `json:"temperature,omitempty"`
-	TopP            float64  `json:"topP,omitempty"`
-	TopK            float64  `json:"topK,omitempty"`
-	MaxOutputTokens int      `json:"maxOutputTokens,omitempty"`
-	CandidateCount  int      `json:"candidateCount,omitempty"`
-	StopSequences   []string `json:"stopSequences,omitempty"`
+	ResponseMimeType string   `json:"responseMimeType,omitempty"`
+	ResponseSchema   any      `json:"responseSchema,omitempty"`
+	Temperature      float64  `json:"temperature,omitempty"`
+	TopP             float64  `json:"topP,omitempty"`
+	TopK             float64  `json:"topK,omitempty"`
+	MaxOutputTokens  int      `json:"maxOutputTokens,omitempty"`
+	CandidateCount   int      `json:"candidateCount,omitempty"`
+	StopSequences    []string `json:"stopSequences,omitempty"`
 }
--- a/relay/adaptor/groq/constants.go
+++ b/relay/adaptor/groq/constants.go
@@ -4,14 +4,21 @@ package groq

 var ModelList = []string{
 	"gemma-7b-it",
-	"mixtral-8x7b-32768",
-	"llama3-8b-8192",
-	"llama3-70b-8192",
 	"gemma2-9b-it",
-	"llama-3.1-405b-reasoning",
 	"llama-3.1-70b-versatile",
 	"llama-3.1-8b-instant",
+	"llama-3.2-11b-text-preview",
+	"llama-3.2-11b-vision-preview",
+	"llama-3.2-1b-preview",
+	"llama-3.2-3b-preview",
+	"llama-3.2-90b-text-preview",
+	"llama-guard-3-8b",
+	"llama3-70b-8192",
+	"llama3-8b-8192",
 	"llama3-groq-70b-8192-tool-use-preview",
 	"llama3-groq-8b-8192-tool-use-preview",
+	"llava-v1.5-7b-4096-preview",
+	"mixtral-8x7b-32768",
+	"distil-whisper-large-v3-en",
 	"whisper-large-v3",
 }
--- a/relay/adaptor/openai/adaptor.go
+++ b/relay/adaptor/openai/adaptor.go
@@ -75,6 +75,13 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
+	if request.Stream {
+		// always return usage in stream mode
+		if request.StreamOptions == nil {
+			request.StreamOptions = &model.StreamOptions{}
+		}
+		request.StreamOptions.IncludeUsage = true
+	}
 	return request, nil
 }

--- a/relay/adaptor/xunfei/constants.go
+++ b/relay/adaptor/xunfei/constants.go
@@ -7,5 +7,6 @@ var ModelList = []string{
 	"SparkDesk-v3.1",
 	"SparkDesk-v3.1-128K",
 	"SparkDesk-v3.5",
+	"SparkDesk-v3.5-32K",
 	"SparkDesk-v4.0",
 }
--- a/relay/adaptor/xunfei/main.go
+++ b/relay/adaptor/xunfei/main.go
@@ -292,6 +292,8 @@ func apiVersion2domain(apiVersion string) string {
 		return "pro-128k"
 	case "v3.5":
 		return "generalv3.5"
+	case "v3.5-32K":
+		return "max-32k"
 	case "v4.0":
 		return "4.0Ultra"
 	}
@@ -303,7 +305,10 @@ func getXunfeiAuthUrl(apiVersion string, apiKey string, apiSecret string) (strin
 	domain := apiVersion2domain(apiVersion)
 	switch apiVersion {
 	case "v3.1-128K":
-		authUrl = buildXunfeiAuthUrl(fmt.Sprintf("wss://spark-api.xf-yun.com/%s/pro-128k", apiVersion), apiKey, apiSecret)
+		authUrl = buildXunfeiAuthUrl(fmt.Sprintf("wss://spark-api.xf-yun.com/chat/pro-128k"), apiKey, apiSecret)
+		break
+	case "v3.5-32K":
+		authUrl = buildXunfeiAuthUrl(fmt.Sprintf("wss://spark-api.xf-yun.com/chat/max-32k"), apiKey, apiSecret)
 		break
 	default:
 		authUrl = buildXunfeiAuthUrl(fmt.Sprintf("wss://spark-api.xf-yun.com/%s/chat", apiVersion), apiKey, apiSecret)
--- a/relay/billing/ratio/model.go
+++ b/relay/billing/ratio/model.go
@@ -81,6 +81,7 @@ var ModelRatio = map[string]float64{
 	"claude-3-haiku-20240307":    0.25 / 1000 * USD,
 	"claude-3-sonnet-20240229":   3.0 / 1000 * USD,
 	"claude-3-5-sonnet-20240620": 3.0 / 1000 * USD,
+	"claude-3-5-sonnet-20241022": 3.0 / 1000 * USD,
 	"claude-3-opus-20240229":     15.0 / 1000 * USD,
 	// https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
 	"ERNIE-4.0-8K":       0.120 * RMB,
@@ -130,6 +131,7 @@ var ModelRatio = map[string]float64{
 	"SparkDesk-v3.1":            1.2858, // ￥0.018 / 1k tokens
 	"SparkDesk-v3.1-128K":       1.2858, // ￥0.018 / 1k tokens
 	"SparkDesk-v3.5":            1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v3.5-32K":        1.2858, // ￥0.018 / 1k tokens
 	"SparkDesk-v4.0":            1.2858, // ￥0.018 / 1k tokens
 	"360GPT_S2_V9":              0.8572, // ¥0.012 / 1k tokens
 	"embedding-bert-512-v1":     0.0715, // ¥0.001 / 1k tokens
@@ -161,15 +163,21 @@ var ModelRatio = map[string]float64{
 	"mistral-embed":         0.1 / 1000 * USD,
 	// https://wow.groq.com/#:~:text=inquiries%C2%A0here.-,Model,-Current%20Speed
 	"gemma-7b-it":                           0.07 / 1000000 * USD,
-	"mixtral-8x7b-32768":                    0.24 / 1000000 * USD,
-	"llama3-8b-8192":                        0.05 / 1000000 * USD,
-	"llama3-70b-8192":                       0.59 / 1000000 * USD,
 	"gemma2-9b-it":                          0.20 / 1000000 * USD,
-	"llama-3.1-405b-reasoning":              0.89 / 1000000 * USD,
 	"llama-3.1-70b-versatile":               0.59 / 1000000 * USD,
 	"llama-3.1-8b-instant":                  0.05 / 1000000 * USD,
+	"llama-3.2-11b-text-preview":            0.05 / 1000000 * USD,
+	"llama-3.2-11b-vision-preview":          0.05 / 1000000 * USD,
+	"llama-3.2-1b-preview":                  0.05 / 1000000 * USD,
+	"llama-3.2-3b-preview":                  0.05 / 1000000 * USD,
+	"llama-3.2-90b-text-preview":            0.59 / 1000000 * USD,
+	"llama-guard-3-8b":                      0.05 / 1000000 * USD,
+	"llama3-70b-8192":                       0.59 / 1000000 * USD,
+	"llama3-8b-8192":                        0.05 / 1000000 * USD,
 	"llama3-groq-70b-8192-tool-use-preview": 0.89 / 1000000 * USD,
 	"llama3-groq-8b-8192-tool-use-preview":  0.19 / 1000000 * USD,
+	"mixtral-8x7b-32768":                    0.24 / 1000000 * USD,
+
 	// https://platform.lingyiwanwu.com/docs#-计费单元
 	"yi-34b-chat-0205": 2.5 / 1000 * RMB,
 	"yi-34b-chat-200k": 12.0 / 1000 * RMB,
--- a/relay/model/constant.go
+++ b/relay/model/constant.go
@@ -1,6 +1,7 @@
 package model

 const (
-	ContentTypeText     = "text"
-	ContentTypeImageURL = "image_url"
+	ContentTypeText       = "text"
+	ContentTypeImageURL   = "image_url"
+	ContentTypeInputAudio = "input_audio"
 )
--- a/relay/model/general.go
+++ b/relay/model/general.go
@@ -12,9 +12,20 @@ type JSONSchema struct {
 	Strict      *bool                  `json:"strict,omitempty"`
 }

+type Audio struct {
+	Voice  string `json:"voice,omitempty"`
+	Format string `json:"format,omitempty"`
+}
+
+type StreamOptions struct {
+	IncludeUsage bool `json:"include_usage,omitempty"`
+}
+
 type GeneralOpenAIRequest struct {
 	Messages         []Message       `json:"messages,omitempty"`
 	Model            string          `json:"model,omitempty"`
+	Modalities       []string        `json:"modalities,omitempty"`
+	Audio            *Audio          `json:"audio,omitempty"`
 	FrequencyPenalty float64         `json:"frequency_penalty,omitempty"`
 	MaxTokens        int             `json:"max_tokens,omitempty"`
 	N                int             `json:"n,omitempty"`
@@ -23,6 +34,7 @@ type GeneralOpenAIRequest struct {
 	Seed             float64         `json:"seed,omitempty"`
 	Stop             any             `json:"stop,omitempty"`
 	Stream           bool            `json:"stream,omitempty"`
+	StreamOptions    *StreamOptions  `json:"stream_options,omitempty"`
 	Temperature      float64         `json:"temperature,omitempty"`
 	TopP             float64         `json:"top_p,omitempty"`
 	TopK             int             `json:"top_k,omitempty"`
@@ -37,7 +49,7 @@ type GeneralOpenAIRequest struct {
 	Dimensions       int             `json:"dimensions,omitempty"`
 	Instruction      string          `json:"instruction,omitempty"`
 	Size             string          `json:"size,omitempty"`
-	NumCtx           int         	 `json:"num_ctx,omitempty"`
+	NumCtx           int             `json:"num_ctx,omitempty"`
 }

 func (r GeneralOpenAIRequest) ParseInput() []string {