diff --git a/.gitignore b/.gitignore index e1e018ea..8282d4b0 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,5 @@ cmd.md .env /one-api temp -.DS_Store \ No newline at end of file +.DS_Store +/__debug_bin* diff --git a/Dockerfile b/Dockerfile index 346d9c5b..66edb360 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,4 +44,4 @@ COPY --from=builder2 /build/one-api / EXPOSE 3000 WORKDIR /data -ENTRYPOINT ["/one-api"] \ No newline at end of file +ENTRYPOINT ["/one-api"] diff --git a/README.md b/README.md index 5decf662..33ffb385 100644 --- a/README.md +++ b/README.md @@ -385,7 +385,7 @@ graph LR + 例子:`NODE_TYPE=slave` 9. `CHANNEL_UPDATE_FREQUENCY`:设置之后将定期更新渠道余额,单位为分钟,未设置则不进行更新。 + 例子:`CHANNEL_UPDATE_FREQUENCY=1440` -10. `CHANNEL_TEST_FREQUENCY`:设置之后将定期检查渠道,单位为分钟,未设置则不进行检查。 +10. `CHANNEL_TEST_FREQUENCY`:设置之后将定期检查渠道,单位为分钟,未设置则不进行检查。 +例子:`CHANNEL_TEST_FREQUENCY=1440` 11. `POLLING_INTERVAL`:批量更新渠道余额以及测试可用性时的请求间隔,单位为秒,默认无间隔。 + 例子:`POLLING_INTERVAL=5` diff --git a/common/config/config.go b/common/config/config.go index a235a8df..c881e097 100644 --- a/common/config/config.go +++ b/common/config/config.go @@ -109,6 +109,9 @@ var RequestInterval = time.Duration(requestInterval) * time.Second var SyncFrequency = env.Int("SYNC_FREQUENCY", 10*60) // unit is second +// ForceEmailTLSVerify is used to determine whether to force TLS verification for email +var ForceEmailTLSVerify = env.Bool("FORCE_EMAIL_TLS_VERIFY", false) + var BatchUpdateEnabled = false var BatchUpdateInterval = env.Int("BATCH_UPDATE_INTERVAL", 5) @@ -164,3 +167,6 @@ var UserContentRequestTimeout = env.Int("USER_CONTENT_REQUEST_TIMEOUT", 30) var EnforceIncludeUsage = env.Bool("ENFORCE_INCLUDE_USAGE", false) var TestPrompt = env.String("TEST_PROMPT", "Output only your specific model name with no additional text.") + +// OpenrouterProviderSort is used to determine the order of the providers in the openrouter +var OpenrouterProviderSort = env.String("OPENROUTER_PROVIDER_SORT", "") diff --git a/common/conv/any.go b/common/conv/any.go index 467e8bb7..33d34aa7 100644 --- a/common/conv/any.go +++ b/common/conv/any.go @@ -1,6 +1,9 @@ package conv func AsString(v any) string { - str, _ := v.(string) - return str + if str, ok := v.(string); ok { + return str + } + + return "" } diff --git a/common/env/helper.go b/common/env/helper.go index fdb9f827..44af5efd 100644 --- a/common/env/helper.go +++ b/common/env/helper.go @@ -3,13 +3,14 @@ package env import ( "os" "strconv" + "strings" ) func Bool(env string, defaultValue bool) bool { if env == "" || os.Getenv(env) == "" { return defaultValue } - return os.Getenv(env) == "true" + return strings.ToLower(os.Getenv(env)) == "true" } func Int(env string, defaultValue int) int { diff --git a/common/helper/audio.go b/common/helper/audio.go new file mode 100644 index 00000000..9e99a947 --- /dev/null +++ b/common/helper/audio.go @@ -0,0 +1,62 @@ +package helper + +import ( + "bytes" + "context" + "io" + "os" + "os/exec" + "strconv" + + "github.com/pkg/errors" +) + +// SaveTmpFile saves data to a temporary file. The filename would be apppended with a random string. +func SaveTmpFile(filename string, data io.Reader) (string, error) { + if data == nil { + return "", errors.New("data is nil") + } + + f, err := os.CreateTemp("", "*-"+filename) + if err != nil { + return "", errors.Wrapf(err, "failed to create temporary file %s", filename) + } + defer f.Close() + + _, err = io.Copy(f, data) + if err != nil { + return "", errors.Wrapf(err, "failed to copy data to temporary file %s", filename) + } + + return f.Name(), nil +} + +// GetAudioTokens returns the number of tokens in an audio file. +func GetAudioTokens(ctx context.Context, audio io.Reader, tokensPerSecond float64) (float64, error) { + filename, err := SaveTmpFile("audio", audio) + if err != nil { + return 0, errors.Wrap(err, "failed to save audio to temporary file") + } + defer os.Remove(filename) + + duration, err := GetAudioDuration(ctx, filename) + if err != nil { + return 0, errors.Wrap(err, "failed to get audio tokens") + } + + return duration * tokensPerSecond, nil +} + +// GetAudioDuration returns the duration of an audio file in seconds. +func GetAudioDuration(ctx context.Context, filename string) (float64, error) { + // ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {{input}} + c := exec.CommandContext(ctx, "/usr/bin/ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", filename) + output, err := c.Output() + if err != nil { + return 0, errors.Wrap(err, "failed to get audio duration") + } + + // Actually gpt-4-audio calculates tokens with 0.1s precision, + // while whisper calculates tokens with 1s precision + return strconv.ParseFloat(string(bytes.TrimSpace(output)), 64) +} diff --git a/common/helper/audio_test.go b/common/helper/audio_test.go new file mode 100644 index 00000000..2fdede4d --- /dev/null +++ b/common/helper/audio_test.go @@ -0,0 +1,68 @@ +package helper + +import ( + "context" + "io" + "net/http" + "os" + "os/exec" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestGetAudioDuration(t *testing.T) { + // skip if there is no ffmpeg installed + _, err := exec.LookPath("ffmpeg") + if err != nil { + t.Skip("ffmpeg not installed, skipping test") + } + + t.Run("should return correct duration for a valid audio file", func(t *testing.T) { + tmpFile, err := os.CreateTemp("", "test_audio*.mp3") + require.NoError(t, err) + defer os.Remove(tmpFile.Name()) + + // download test audio file + resp, err := http.Get("https://s3.laisky.com/uploads/2025/01/audio-sample.m4a") + require.NoError(t, err) + defer resp.Body.Close() + + _, err = io.Copy(tmpFile, resp.Body) + require.NoError(t, err) + require.NoError(t, tmpFile.Close()) + + duration, err := GetAudioDuration(context.Background(), tmpFile.Name()) + require.NoError(t, err) + require.Equal(t, duration, 3.904) + }) + + t.Run("should return an error for a non-existent file", func(t *testing.T) { + _, err := GetAudioDuration(context.Background(), "non_existent_file.mp3") + require.Error(t, err) + }) +} + +func TestGetAudioTokens(t *testing.T) { + // skip if there is no ffmpeg installed + _, err := exec.LookPath("ffmpeg") + if err != nil { + t.Skip("ffmpeg not installed, skipping test") + } + + t.Run("should return correct tokens for a valid audio file", func(t *testing.T) { + // download test audio file + resp, err := http.Get("https://s3.laisky.com/uploads/2025/01/audio-sample.m4a") + require.NoError(t, err) + defer resp.Body.Close() + + tokens, err := GetAudioTokens(context.Background(), resp.Body, 50) + require.NoError(t, err) + require.Equal(t, tokens, 195.2) + }) + + t.Run("should return an error for a non-existent file", func(t *testing.T) { + _, err := GetAudioTokens(context.Background(), nil, 1) + require.Error(t, err) + }) +} diff --git a/common/helper/helper.go b/common/helper/helper.go index 65f4fd29..c80ee0a3 100644 --- a/common/helper/helper.go +++ b/common/helper/helper.go @@ -6,13 +6,13 @@ import ( "html/template" "log" "net" + "net/http" "os/exec" "runtime" "strconv" "strings" "github.com/gin-gonic/gin" - "github.com/songquanpeng/one-api/common/random" ) @@ -32,6 +32,14 @@ func OpenBrowser(url string) { } } +// RespondError sends a JSON response with a success status and an error message. +func RespondError(c *gin.Context, err error) { + c.JSON(http.StatusOK, gin.H{ + "success": false, + "message": err.Error(), + }) +} + func GetIp() (ip string) { ips, err := net.InterfaceAddrs() if err != nil { diff --git a/common/helper/time.go b/common/helper/time.go index f0bc6021..c1cda8d1 100644 --- a/common/helper/time.go +++ b/common/helper/time.go @@ -5,6 +5,7 @@ import ( "time" ) +// GetTimestamp get current timestamp in seconds func GetTimestamp() int64 { return time.Now().Unix() } diff --git a/common/image/image.go b/common/image/image.go index beebd0c6..14488627 100644 --- a/common/image/image.go +++ b/common/image/image.go @@ -3,7 +3,6 @@ package image import ( "bytes" "encoding/base64" - "github.com/songquanpeng/one-api/common/client" "image" _ "image/gif" _ "image/jpeg" @@ -13,6 +12,8 @@ import ( "strings" "sync" + "github.com/pkg/errors" + "github.com/songquanpeng/one-api/common/client" _ "golang.org/x/image/webp" ) @@ -22,27 +23,54 @@ var dataURLPattern = regexp.MustCompile(`data:image/([^;]+);base64,(.*)`) func IsImageUrl(url string) (bool, error) { resp, err := client.UserContentRequestHTTPClient.Head(url) if err != nil { - return false, err + return false, errors.Wrapf(err, "failed to fetch image URL: %s", url) } - if !strings.HasPrefix(resp.Header.Get("Content-Type"), "image/") { - return false, nil + defer resp.Body.Close() + + if resp.StatusCode == http.StatusNotFound { + // this file may not support HEAD method + resp, err = client.UserContentRequestHTTPClient.Get(url) + if err != nil { + return false, errors.Wrapf(err, "failed to fetch image URL: %s", url) + } + defer resp.Body.Close() } + + if resp.StatusCode != http.StatusOK { + return false, errors.Errorf("failed to fetch image URL: %s, status code: %d", url, resp.StatusCode) + } + + if resp.ContentLength > 10*1024*1024 { + return false, errors.Errorf("image size should not exceed 10MB: %s, size: %d", url, resp.ContentLength) + } + + contentType := strings.ToLower(resp.Header.Get("Content-Type")) + if !strings.HasPrefix(contentType, "image/") && + !strings.Contains(contentType, "application/octet-stream") { + return false, + errors.Errorf("invalid content type: %s, expected image type", contentType) + } + return true, nil } func GetImageSizeFromUrl(url string) (width int, height int, err error) { isImage, err := IsImageUrl(url) + if err != nil { + return 0, 0, errors.Wrap(err, "failed to fetch image URL") + } if !isImage { - return + return 0, 0, errors.New("not an image URL") } resp, err := client.UserContentRequestHTTPClient.Get(url) if err != nil { - return + return 0, 0, errors.Wrap(err, "failed to get image from URL") } defer resp.Body.Close() + img, _, err := image.DecodeConfig(resp.Body) if err != nil { - return + return 0, 0, errors.Wrap(err, "failed to decode image") } return img.Width, img.Height, nil } @@ -58,22 +86,35 @@ func GetImageFromUrl(url string) (mimeType string, data string, err error) { } isImage, err := IsImageUrl(url) - if !isImage { - return + if err != nil { + return mimeType, data, errors.Wrap(err, "failed to fetch image URL") } + if !isImage { + return mimeType, data, errors.New("not an image URL") + } + resp, err := http.Get(url) if err != nil { - return + return mimeType, data, errors.Wrap(err, "failed to get image from URL") } defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return mimeType, data, errors.Errorf("failed to fetch image URL: %s, status code: %d", url, resp.StatusCode) + } + if resp.ContentLength > 10*1024*1024 { + return mimeType, data, errors.Errorf("image size should not exceed 10MB: %s, size: %d", url, resp.ContentLength) + } + buffer := bytes.NewBuffer(nil) _, err = buffer.ReadFrom(resp.Body) if err != nil { - return + return mimeType, data, errors.Wrap(err, "failed to read image data from response") } + mimeType = resp.Header.Get("Content-Type") data = base64.StdEncoding.EncodeToString(buffer.Bytes()) - return + return mimeType, data, nil } var ( diff --git a/common/image/image_test.go b/common/image/image_test.go index 5b669b51..5774ef1d 100644 --- a/common/image/image_test.go +++ b/common/image/image_test.go @@ -1,8 +1,8 @@ package image_test import ( + "bytes" "encoding/base64" - "github.com/songquanpeng/one-api/common/client" "image" _ "image/gif" _ "image/jpeg" @@ -13,8 +13,8 @@ import ( "strings" "testing" + "github.com/songquanpeng/one-api/common/client" img "github.com/songquanpeng/one-api/common/image" - "github.com/stretchr/testify/assert" _ "golang.org/x/image/webp" ) @@ -51,6 +51,8 @@ func TestMain(m *testing.M) { } func TestDecode(t *testing.T) { + t.Parallel() + // Bytes read: varies sometimes // jpeg: 1063892 // png: 294462 @@ -96,6 +98,8 @@ func TestDecode(t *testing.T) { } func TestBase64(t *testing.T) { + t.Parallel() + // Bytes read: // jpeg: 1063892 // png: 294462 @@ -149,6 +153,8 @@ func TestBase64(t *testing.T) { } func TestGetImageSize(t *testing.T) { + t.Parallel() + for i, c := range cases { t.Run("Decode:"+strconv.Itoa(i), func(t *testing.T) { width, height, err := img.GetImageSize(c.url) @@ -160,6 +166,8 @@ func TestGetImageSize(t *testing.T) { } func TestGetImageSizeFromBase64(t *testing.T) { + t.Parallel() + for i, c := range cases { t.Run("Decode:"+strconv.Itoa(i), func(t *testing.T) { resp, err := http.Get(c.url) @@ -175,3 +183,83 @@ func TestGetImageSizeFromBase64(t *testing.T) { }) } } + +func TestGetImageFromUrl(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + wantMime string + wantErr bool + errMessage string + }{ + { + name: "Valid JPEG URL", + input: cases[0].url, // Using the existing JPEG test case + wantMime: "image/jpeg", + wantErr: false, + }, + { + name: "Valid PNG URL", + input: cases[1].url, // Using the existing PNG test case + wantMime: "image/png", + wantErr: false, + }, + { + name: "Valid Data URL", + input: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==", + wantMime: "image/png", + wantErr: false, + }, + { + name: "Invalid URL", + input: "https://invalid.example.com/nonexistent.jpg", + wantErr: true, + errMessage: "failed to fetch image URL", + }, + { + name: "Non-image URL", + input: "https://ario.laisky.com/alias/doc", + wantErr: true, + errMessage: "invalid content type", + }, + } + + for _, tt := range tests { + tt := tt // capture range variable + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + mimeType, data, err := img.GetImageFromUrl(tt.input) + + if tt.wantErr { + assert.Error(t, err) + if tt.errMessage != "" { + assert.Contains(t, err.Error(), tt.errMessage) + } + return + } + + assert.NoError(t, err) + assert.NotEmpty(t, data) + + // For data URLs, we should verify the mime type matches the input + if strings.HasPrefix(tt.input, "data:image/") { + assert.Equal(t, tt.wantMime, mimeType) + return + } + + // For regular URLs, verify the base64 data is valid and can be decoded + decoded, err := base64.StdEncoding.DecodeString(data) + assert.NoError(t, err) + assert.NotEmpty(t, decoded) + + // Verify the decoded data is a valid image + reader := bytes.NewReader(decoded) + _, format, err := image.DecodeConfig(reader) + assert.NoError(t, err) + assert.Equal(t, strings.TrimPrefix(tt.wantMime, "image/"), format) + }) + } +} diff --git a/common/message/email.go b/common/message/email.go index 85a83d6d..02074e8b 100644 --- a/common/message/email.go +++ b/common/message/email.go @@ -10,6 +10,7 @@ import ( "strings" "time" + "github.com/pkg/errors" "github.com/songquanpeng/one-api/common/config" "github.com/songquanpeng/one-api/common/logger" ) @@ -27,17 +28,17 @@ func SendEmail(subject string, receiver string, content string) error { } encodedSubject := fmt.Sprintf("=?UTF-8?B?%s?=", base64.StdEncoding.EncodeToString([]byte(subject))) - // Extract domain from SMTPFrom + // Extract domain from SMTPFrom with fallback + domain := "localhost" parts := strings.Split(config.SMTPFrom, "@") - var domain string - if len(parts) > 1 { + if len(parts) > 1 && parts[1] != "" { domain = parts[1] } + // Generate a unique Message-ID buf := make([]byte, 16) - _, err := rand.Read(buf) - if err != nil { - return err + if _, err := rand.Read(buf); err != nil { + return errors.Wrap(err, "failed to generate random bytes for Message-ID") } messageId := fmt.Sprintf("<%x@%s>", buf, domain) @@ -50,59 +51,85 @@ func SendEmail(subject string, receiver string, content string) error { receiver, config.SystemName, config.SMTPFrom, encodedSubject, messageId, time.Now().Format(time.RFC1123Z), content)) auth := smtp.PlainAuth("", config.SMTPAccount, config.SMTPToken, config.SMTPServer) - addr := fmt.Sprintf("%s:%d", config.SMTPServer, config.SMTPPort) - to := strings.Split(receiver, ";") + addr := net.JoinHostPort(config.SMTPServer, fmt.Sprintf("%d", config.SMTPPort)) + + // Clean up recipient addresses + receiverEmails := []string{} + for _, email := range strings.Split(receiver, ";") { + email = strings.TrimSpace(email) + if email != "" { + receiverEmails = append(receiverEmails, email) + } + } + + if len(receiverEmails) == 0 { + return errors.New("no valid recipient email addresses") + } if config.SMTPPort == 465 || !shouldAuth() { // need advanced client var conn net.Conn var err error + + // Add connection timeout + dialer := &net.Dialer{ + Timeout: 30 * time.Second, + } + if config.SMTPPort == 465 { tlsConfig := &tls.Config{ - InsecureSkipVerify: true, + InsecureSkipVerify: !config.ForceEmailTLSVerify, ServerName: config.SMTPServer, } - conn, err = tls.Dial("tcp", fmt.Sprintf("%s:%d", config.SMTPServer, config.SMTPPort), tlsConfig) + conn, err = tls.DialWithDialer(dialer, "tcp", addr, tlsConfig) } else { - conn, err = net.Dial("tcp", fmt.Sprintf("%s:%d", config.SMTPServer, config.SMTPPort)) + conn, err = dialer.Dial("tcp", addr) } + if err != nil { - return err + return errors.Wrap(err, "failed to connect to SMTP server") } + client, err := smtp.NewClient(conn, config.SMTPServer) if err != nil { - return err + return errors.Wrap(err, "failed to create SMTP client") } defer client.Close() + if shouldAuth() { if err = client.Auth(auth); err != nil { - return err + return errors.Wrap(err, "SMTP authentication failed") } } + if err = client.Mail(config.SMTPFrom); err != nil { - return err + return errors.Wrap(err, "failed to set MAIL FROM") } - receiverEmails := strings.Split(receiver, ";") + for _, receiver := range receiverEmails { if err = client.Rcpt(receiver); err != nil { - return err + return errors.Wrapf(err, "failed to add recipient: %s", receiver) } } + w, err := client.Data() if err != nil { - return err + return errors.Wrap(err, "failed to create message data writer") } - _, err = w.Write(mail) - if err != nil { - return err + + if _, err = w.Write(mail); err != nil { + return errors.Wrap(err, "failed to write email content") } - err = w.Close() - if err != nil { - return err + + if err = w.Close(); err != nil { + return errors.Wrap(err, "failed to close message data writer") } + return nil } - err = smtp.SendMail(addr, auth, config.SMTPAccount, to, mail) + + // Use the same sender address in the SMTP protocol as in the From header + err := smtp.SendMail(addr, auth, config.SMTPFrom, receiverEmails, mail) if err != nil && strings.Contains(err.Error(), "short response") { // 部分提供商返回该错误,但实际上邮件已经发送成功 logger.SysWarnf("short response from SMTP server, return nil instead of error: %s", err.Error()) return nil diff --git a/controller/channel-test.go b/controller/channel-test.go index 3894691c..849aaef5 100644 --- a/controller/channel-test.go +++ b/controller/channel-test.go @@ -106,6 +106,8 @@ func testChannel(ctx context.Context, channel *model.Channel, request *relaymode if err != nil { return "", err, nil } + c.Set(ctxkey.ConvertedRequest, convertedRequest) + jsonData, err := json.Marshal(convertedRequest) if err != nil { return "", err, nil diff --git a/model/ability.go b/model/ability.go index 5cfb9949..6180fbe0 100644 --- a/model/ability.go +++ b/model/ability.go @@ -7,6 +7,7 @@ import ( "gorm.io/gorm" + "github.com/pkg/errors" "github.com/songquanpeng/one-api/common" "github.com/songquanpeng/one-api/common/utils" ) @@ -42,7 +43,7 @@ func GetRandomSatisfiedChannel(group string, model string, ignoreFirstPriority b err = channelQuery.Order("RAND()").First(&ability).Error } if err != nil { - return nil, err + return nil, errors.Wrap(err, "get random satisfied channel") } channel := Channel{} channel.Id = ability.ChannelId diff --git a/relay/adaptor/aiproxy/adaptor.go b/relay/adaptor/aiproxy/adaptor.go index 42d49c0a..d85f42d1 100644 --- a/relay/adaptor/aiproxy/adaptor.go +++ b/relay/adaptor/aiproxy/adaptor.go @@ -38,7 +38,7 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G return aiProxyLibraryRequest, nil } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } diff --git a/relay/adaptor/ali/adaptor.go b/relay/adaptor/ali/adaptor.go index 4aa8a11a..79b51ac3 100644 --- a/relay/adaptor/ali/adaptor.go +++ b/relay/adaptor/ali/adaptor.go @@ -67,7 +67,7 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G } } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } diff --git a/relay/adaptor/anthropic/adaptor.go b/relay/adaptor/anthropic/adaptor.go index bd0949be..cd005b7c 100644 --- a/relay/adaptor/anthropic/adaptor.go +++ b/relay/adaptor/anthropic/adaptor.go @@ -36,8 +36,8 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Request, meta *me // https://x.com/alexalbert__/status/1812921642143900036 // claude-3-5-sonnet can support 8k context - if strings.HasPrefix(meta.ActualModelName, "claude-3-5-sonnet") { - req.Header.Set("anthropic-beta", "max-tokens-3-5-sonnet-2024-07-15") + if strings.HasPrefix(meta.ActualModelName, "claude-3-7-sonnet") { + req.Header.Set("anthropic-beta", "output-128k-2025-02-19") } return nil @@ -47,10 +47,10 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G if request == nil { return nil, errors.New("request is nil") } - return ConvertRequest(*request), nil + return ConvertRequest(c, *request) } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } diff --git a/relay/adaptor/anthropic/constants.go b/relay/adaptor/anthropic/constants.go index 9b515c1c..b3a05ee8 100644 --- a/relay/adaptor/anthropic/constants.go +++ b/relay/adaptor/anthropic/constants.go @@ -3,11 +3,13 @@ package anthropic var ModelList = []string{ "claude-instant-1.2", "claude-2.0", "claude-2.1", "claude-3-haiku-20240307", - "claude-3-5-haiku-20241022", "claude-3-5-haiku-latest", + "claude-3-5-haiku-20241022", "claude-3-sonnet-20240229", "claude-3-opus-20240229", + "claude-3-5-sonnet-latest", "claude-3-5-sonnet-20240620", "claude-3-5-sonnet-20241022", - "claude-3-5-sonnet-latest", + "claude-3-7-sonnet-latest", + "claude-3-7-sonnet-20250219", } diff --git a/relay/adaptor/anthropic/main.go b/relay/adaptor/anthropic/main.go index d3e306c8..78765ed6 100644 --- a/relay/adaptor/anthropic/main.go +++ b/relay/adaptor/anthropic/main.go @@ -2,18 +2,21 @@ package anthropic import ( "bufio" + "context" "encoding/json" "fmt" - "github.com/songquanpeng/one-api/common/render" "io" + "math" "net/http" "strings" "github.com/gin-gonic/gin" + "github.com/pkg/errors" "github.com/songquanpeng/one-api/common" "github.com/songquanpeng/one-api/common/helper" "github.com/songquanpeng/one-api/common/image" "github.com/songquanpeng/one-api/common/logger" + "github.com/songquanpeng/one-api/common/render" "github.com/songquanpeng/one-api/relay/adaptor/openai" "github.com/songquanpeng/one-api/relay/model" ) @@ -36,7 +39,16 @@ func stopReasonClaude2OpenAI(reason *string) string { } } -func ConvertRequest(textRequest model.GeneralOpenAIRequest) *Request { +// isModelSupportThinking is used to check if the model supports extended thinking +func isModelSupportThinking(model string) bool { + if strings.Contains(model, "claude-3-7-sonnet") { + return true + } + + return false +} + +func ConvertRequest(c *gin.Context, textRequest model.GeneralOpenAIRequest) (*Request, error) { claudeTools := make([]Tool, 0, len(textRequest.Tools)) for _, tool := range textRequest.Tools { @@ -61,7 +73,27 @@ func ConvertRequest(textRequest model.GeneralOpenAIRequest) *Request { TopK: textRequest.TopK, Stream: textRequest.Stream, Tools: claudeTools, + Thinking: textRequest.Thinking, } + + if isModelSupportThinking(textRequest.Model) && + c.Request.URL.Query().Has("thinking") && claudeRequest.Thinking == nil { + claudeRequest.Thinking = &model.Thinking{ + Type: "enabled", + BudgetTokens: int(math.Min(1024, float64(claudeRequest.MaxTokens/2))), + } + } + + if isModelSupportThinking(textRequest.Model) && + claudeRequest.Thinking != nil { + if claudeRequest.MaxTokens <= 1024 { + return nil, errors.New("max_tokens must be greater than 1024 when using extended thinking") + } + + // top_p must be nil when using extended thinking + claudeRequest.TopP = nil + } + if len(claudeTools) > 0 { claudeToolChoice := struct { Type string `json:"type"` @@ -127,7 +159,9 @@ func ConvertRequest(textRequest model.GeneralOpenAIRequest) *Request { var content Content if part.Type == model.ContentTypeText { content.Type = "text" - content.Text = part.Text + if part.Text != nil { + content.Text = *part.Text + } } else if part.Type == model.ContentTypeImageURL { content.Type = "image" content.Source = &ImageSource{ @@ -142,13 +176,14 @@ func ConvertRequest(textRequest model.GeneralOpenAIRequest) *Request { claudeMessage.Content = contents claudeRequest.Messages = append(claudeRequest.Messages, claudeMessage) } - return &claudeRequest + return &claudeRequest, nil } // https://docs.anthropic.com/claude/reference/messages-streaming func StreamResponseClaude2OpenAI(claudeResponse *StreamResponse) (*openai.ChatCompletionsStreamResponse, *Response) { var response *Response var responseText string + var reasoningText string var stopReason string tools := make([]model.Tool, 0) @@ -158,6 +193,10 @@ func StreamResponseClaude2OpenAI(claudeResponse *StreamResponse) (*openai.ChatCo case "content_block_start": if claudeResponse.ContentBlock != nil { responseText = claudeResponse.ContentBlock.Text + if claudeResponse.ContentBlock.Thinking != nil { + reasoningText = *claudeResponse.ContentBlock.Thinking + } + if claudeResponse.ContentBlock.Type == "tool_use" { tools = append(tools, model.Tool{ Id: claudeResponse.ContentBlock.Id, @@ -172,6 +211,10 @@ func StreamResponseClaude2OpenAI(claudeResponse *StreamResponse) (*openai.ChatCo case "content_block_delta": if claudeResponse.Delta != nil { responseText = claudeResponse.Delta.Text + if claudeResponse.Delta.Thinking != nil { + reasoningText = *claudeResponse.Delta.Thinking + } + if claudeResponse.Delta.Type == "input_json_delta" { tools = append(tools, model.Tool{ Function: model.Function{ @@ -189,9 +232,20 @@ func StreamResponseClaude2OpenAI(claudeResponse *StreamResponse) (*openai.ChatCo if claudeResponse.Delta != nil && claudeResponse.Delta.StopReason != nil { stopReason = *claudeResponse.Delta.StopReason } + case "thinking_delta": + if claudeResponse.Delta != nil && claudeResponse.Delta.Thinking != nil { + reasoningText = *claudeResponse.Delta.Thinking + } + case "ping", + "message_stop", + "content_block_stop": + default: + logger.SysErrorf("unknown stream response type %q", claudeResponse.Type) } + var choice openai.ChatCompletionsStreamResponseChoice choice.Delta.Content = responseText + choice.Delta.Reasoning = &reasoningText if len(tools) > 0 { choice.Delta.Content = nil // compatible with other OpenAI derivative applications, like LobeOpenAICompatibleFactory ... choice.Delta.ToolCalls = tools @@ -209,11 +263,23 @@ func StreamResponseClaude2OpenAI(claudeResponse *StreamResponse) (*openai.ChatCo func ResponseClaude2OpenAI(claudeResponse *Response) *openai.TextResponse { var responseText string - if len(claudeResponse.Content) > 0 { - responseText = claudeResponse.Content[0].Text - } + var reasoningText string + tools := make([]model.Tool, 0) for _, v := range claudeResponse.Content { + switch v.Type { + case "thinking": + if v.Thinking != nil { + reasoningText += *v.Thinking + } else { + logger.Errorf(context.Background(), "thinking is nil in response") + } + case "text": + responseText += v.Text + default: + logger.Warnf(context.Background(), "unknown response type %q", v.Type) + } + if v.Type == "tool_use" { args, _ := json.Marshal(v.Input) tools = append(tools, model.Tool{ @@ -226,11 +292,13 @@ func ResponseClaude2OpenAI(claudeResponse *Response) *openai.TextResponse { }) } } + choice := openai.TextResponseChoice{ Index: 0, Message: model.Message{ Role: "assistant", Content: responseText, + Reasoning: &reasoningText, Name: nil, ToolCalls: tools, }, @@ -277,6 +345,8 @@ func StreamHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithStatusC data = strings.TrimPrefix(data, "data:") data = strings.TrimSpace(data) + logger.Debugf(c.Request.Context(), "stream <- %q\n", data) + var claudeResponse StreamResponse err := json.Unmarshal([]byte(data), &claudeResponse) if err != nil { @@ -344,6 +414,9 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st if err != nil { return openai.ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil } + + logger.Debugf(c.Request.Context(), "response <- %s\n", string(responseBody)) + var claudeResponse Response err = json.Unmarshal(responseBody, &claudeResponse) if err != nil { diff --git a/relay/adaptor/anthropic/model.go b/relay/adaptor/anthropic/model.go index 47f193fa..6dd299c4 100644 --- a/relay/adaptor/anthropic/model.go +++ b/relay/adaptor/anthropic/model.go @@ -1,5 +1,7 @@ package anthropic +import "github.com/songquanpeng/one-api/relay/model" + // https://docs.anthropic.com/claude/reference/messages_post type Metadata struct { @@ -22,6 +24,9 @@ type Content struct { Input any `json:"input,omitempty"` Content string `json:"content,omitempty"` ToolUseId string `json:"tool_use_id,omitempty"` + // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#implementing-extended-thinking + Thinking *string `json:"thinking,omitempty"` + Signature *string `json:"signature,omitempty"` } type Message struct { @@ -54,6 +59,7 @@ type Request struct { Tools []Tool `json:"tools,omitempty"` ToolChoice any `json:"tool_choice,omitempty"` //Metadata `json:"metadata,omitempty"` + Thinking *model.Thinking `json:"thinking,omitempty"` } type Usage struct { @@ -84,6 +90,8 @@ type Delta struct { PartialJson string `json:"partial_json,omitempty"` StopReason *string `json:"stop_reason"` StopSequence *string `json:"stop_sequence"` + Thinking *string `json:"thinking,omitempty"` + Signature *string `json:"signature,omitempty"` } type StreamResponse struct { diff --git a/relay/adaptor/aws/adaptor.go b/relay/adaptor/aws/adaptor.go index 62221346..45bfbdf6 100644 --- a/relay/adaptor/aws/adaptor.go +++ b/relay/adaptor/aws/adaptor.go @@ -72,7 +72,7 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Request, meta *me return nil } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } diff --git a/relay/adaptor/aws/claude/adapter.go b/relay/adaptor/aws/claude/adapter.go index eb3c9fb8..2f6a4cc5 100644 --- a/relay/adaptor/aws/claude/adapter.go +++ b/relay/adaptor/aws/claude/adapter.go @@ -21,7 +21,11 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G return nil, errors.New("request is nil") } - claudeReq := anthropic.ConvertRequest(*request) + claudeReq, err := anthropic.ConvertRequest(c, *request) + if err != nil { + return nil, errors.Wrap(err, "convert request") + } + c.Set(ctxkey.RequestModel, request.Model) c.Set(ctxkey.ConvertedRequest, claudeReq) return claudeReq, nil diff --git a/relay/adaptor/aws/claude/main.go b/relay/adaptor/aws/claude/main.go index 3fe3dfd8..c20827b0 100644 --- a/relay/adaptor/aws/claude/main.go +++ b/relay/adaptor/aws/claude/main.go @@ -36,6 +36,8 @@ var AwsModelIDMap = map[string]string{ "claude-3-5-sonnet-20241022": "anthropic.claude-3-5-sonnet-20241022-v2:0", "claude-3-5-sonnet-latest": "anthropic.claude-3-5-sonnet-20241022-v2:0", "claude-3-5-haiku-20241022": "anthropic.claude-3-5-haiku-20241022-v1:0", + "claude-3-7-sonnet-latest": "anthropic.claude-3-7-sonnet-20250219-v1:0", + "claude-3-7-sonnet-20250219": "anthropic.claude-3-7-sonnet-20250219-v1:0", } func awsModelID(requestModel string) (string, error) { @@ -47,13 +49,14 @@ func awsModelID(requestModel string) (string, error) { } func Handler(c *gin.Context, awsCli *bedrockruntime.Client, modelName string) (*relaymodel.ErrorWithStatusCode, *relaymodel.Usage) { - awsModelId, err := awsModelID(c.GetString(ctxkey.RequestModel)) + awsModelID, err := awsModelID(c.GetString(ctxkey.RequestModel)) if err != nil { return utils.WrapErr(errors.Wrap(err, "awsModelID")), nil } + awsModelID = utils.ConvertModelID2CrossRegionProfile(awsModelID, awsCli.Options().Region) awsReq := &bedrockruntime.InvokeModelInput{ - ModelId: aws.String(awsModelId), + ModelId: aws.String(awsModelID), Accept: aws.String("application/json"), ContentType: aws.String("application/json"), } @@ -101,13 +104,14 @@ func Handler(c *gin.Context, awsCli *bedrockruntime.Client, modelName string) (* func StreamHandler(c *gin.Context, awsCli *bedrockruntime.Client) (*relaymodel.ErrorWithStatusCode, *relaymodel.Usage) { createdTime := helper.GetTimestamp() - awsModelId, err := awsModelID(c.GetString(ctxkey.RequestModel)) + awsModelID, err := awsModelID(c.GetString(ctxkey.RequestModel)) if err != nil { return utils.WrapErr(errors.Wrap(err, "awsModelID")), nil } + awsModelID = utils.ConvertModelID2CrossRegionProfile(awsModelID, awsCli.Options().Region) awsReq := &bedrockruntime.InvokeModelWithResponseStreamInput{ - ModelId: aws.String(awsModelId), + ModelId: aws.String(awsModelID), Accept: aws.String("application/json"), ContentType: aws.String("application/json"), } diff --git a/relay/adaptor/aws/claude/model.go b/relay/adaptor/aws/claude/model.go index 10622887..b0dd6800 100644 --- a/relay/adaptor/aws/claude/model.go +++ b/relay/adaptor/aws/claude/model.go @@ -1,6 +1,9 @@ package aws -import "github.com/songquanpeng/one-api/relay/adaptor/anthropic" +import ( + "github.com/songquanpeng/one-api/relay/adaptor/anthropic" + "github.com/songquanpeng/one-api/relay/model" +) // Request is the request to AWS Claude // @@ -17,4 +20,5 @@ type Request struct { StopSequences []string `json:"stop_sequences,omitempty"` Tools []anthropic.Tool `json:"tools,omitempty"` ToolChoice any `json:"tool_choice,omitempty"` + Thinking *model.Thinking `json:"thinking,omitempty"` } diff --git a/relay/adaptor/aws/llama3/main.go b/relay/adaptor/aws/llama3/main.go index e5fcd89f..76b06f91 100644 --- a/relay/adaptor/aws/llama3/main.go +++ b/relay/adaptor/aws/llama3/main.go @@ -70,13 +70,14 @@ func ConvertRequest(textRequest relaymodel.GeneralOpenAIRequest) *Request { } func Handler(c *gin.Context, awsCli *bedrockruntime.Client, modelName string) (*relaymodel.ErrorWithStatusCode, *relaymodel.Usage) { - awsModelId, err := awsModelID(c.GetString(ctxkey.RequestModel)) + awsModelID, err := awsModelID(c.GetString(ctxkey.RequestModel)) if err != nil { return utils.WrapErr(errors.Wrap(err, "awsModelID")), nil } + awsModelID = utils.ConvertModelID2CrossRegionProfile(awsModelID, awsCli.Options().Region) awsReq := &bedrockruntime.InvokeModelInput{ - ModelId: aws.String(awsModelId), + ModelId: aws.String(awsModelID), Accept: aws.String("application/json"), ContentType: aws.String("application/json"), } @@ -140,13 +141,14 @@ func ResponseLlama2OpenAI(llamaResponse *Response) *openai.TextResponse { func StreamHandler(c *gin.Context, awsCli *bedrockruntime.Client) (*relaymodel.ErrorWithStatusCode, *relaymodel.Usage) { createdTime := helper.GetTimestamp() - awsModelId, err := awsModelID(c.GetString(ctxkey.RequestModel)) + awsModelID, err := awsModelID(c.GetString(ctxkey.RequestModel)) if err != nil { return utils.WrapErr(errors.Wrap(err, "awsModelID")), nil } + awsModelID = utils.ConvertModelID2CrossRegionProfile(awsModelID, awsCli.Options().Region) awsReq := &bedrockruntime.InvokeModelWithResponseStreamInput{ - ModelId: aws.String(awsModelId), + ModelId: aws.String(awsModelID), Accept: aws.String("application/json"), ContentType: aws.String("application/json"), } diff --git a/relay/adaptor/aws/utils/adaptor.go b/relay/adaptor/aws/utils/adaptor.go index 4cb880f2..f5fc0038 100644 --- a/relay/adaptor/aws/utils/adaptor.go +++ b/relay/adaptor/aws/utils/adaptor.go @@ -39,7 +39,7 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Request, meta *me return nil } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } diff --git a/relay/adaptor/aws/utils/consts.go b/relay/adaptor/aws/utils/consts.go new file mode 100644 index 00000000..c91f342e --- /dev/null +++ b/relay/adaptor/aws/utils/consts.go @@ -0,0 +1,75 @@ +package utils + +import ( + "context" + "slices" + "strings" + + "github.com/songquanpeng/one-api/common/logger" +) + +// CrossRegionInferences is a list of model IDs that support cross-region inference. +// +// https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html +// +// document.querySelectorAll('pre.programlisting code').forEach((e) => {console.log(e.innerHTML)}) +var CrossRegionInferences = []string{ + "us.amazon.nova-lite-v1:0", + "us.amazon.nova-micro-v1:0", + "us.amazon.nova-pro-v1:0", + "us.anthropic.claude-3-5-haiku-20241022-v1:0", + "us.anthropic.claude-3-5-sonnet-20240620-v1:0", + "us.anthropic.claude-3-5-sonnet-20241022-v2:0", + "us.anthropic.claude-3-7-sonnet-20250219-v1:0", + "us.anthropic.claude-3-haiku-20240307-v1:0", + "us.anthropic.claude-3-opus-20240229-v1:0", + "us.anthropic.claude-3-sonnet-20240229-v1:0", + "us.meta.llama3-1-405b-instruct-v1:0", + "us.meta.llama3-1-70b-instruct-v1:0", + "us.meta.llama3-1-8b-instruct-v1:0", + "us.meta.llama3-2-11b-instruct-v1:0", + "us.meta.llama3-2-1b-instruct-v1:0", + "us.meta.llama3-2-3b-instruct-v1:0", + "us.meta.llama3-2-90b-instruct-v1:0", + "us.meta.llama3-3-70b-instruct-v1:0", + "us-gov.anthropic.claude-3-5-sonnet-20240620-v1:0", + "us-gov.anthropic.claude-3-haiku-20240307-v1:0", + "eu.amazon.nova-lite-v1:0", + "eu.amazon.nova-micro-v1:0", + "eu.amazon.nova-pro-v1:0", + "eu.anthropic.claude-3-5-sonnet-20240620-v1:0", + "eu.anthropic.claude-3-haiku-20240307-v1:0", + "eu.anthropic.claude-3-sonnet-20240229-v1:0", + "eu.meta.llama3-2-1b-instruct-v1:0", + "eu.meta.llama3-2-3b-instruct-v1:0", + "apac.amazon.nova-lite-v1:0", + "apac.amazon.nova-micro-v1:0", + "apac.amazon.nova-pro-v1:0", + "apac.anthropic.claude-3-5-sonnet-20240620-v1:0", + "apac.anthropic.claude-3-5-sonnet-20241022-v2:0", + "apac.anthropic.claude-3-haiku-20240307-v1:0", + "apac.anthropic.claude-3-sonnet-20240229-v1:0", +} + +// ConvertModelID2CrossRegionProfile converts the model ID to a cross-region profile ID. +func ConvertModelID2CrossRegionProfile(model, region string) string { + var regionPrefix string + switch prefix := strings.Split(region, "-")[0]; prefix { + case "us", "eu": + regionPrefix = prefix + case "ap": + regionPrefix = "apac" + default: + // not supported, return original model + return model + } + + newModelID := regionPrefix + "." + model + if slices.Contains(CrossRegionInferences, newModelID) { + logger.Debugf(context.TODO(), "convert model %s to cross-region profile %s", model, newModelID) + return newModelID + } + + // not found, return original model + return model +} diff --git a/relay/adaptor/baidu/adaptor.go b/relay/adaptor/baidu/adaptor.go index 15306b95..664e0e77 100644 --- a/relay/adaptor/baidu/adaptor.go +++ b/relay/adaptor/baidu/adaptor.go @@ -109,7 +109,7 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G } } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } diff --git a/relay/adaptor/cloudflare/adaptor.go b/relay/adaptor/cloudflare/adaptor.go index 97e3dbb2..8958466d 100644 --- a/relay/adaptor/cloudflare/adaptor.go +++ b/relay/adaptor/cloudflare/adaptor.go @@ -19,7 +19,7 @@ type Adaptor struct { } // ConvertImageRequest implements adaptor.Adaptor. -func (*Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (*Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { return nil, errors.New("not implemented") } diff --git a/relay/adaptor/cloudflare/main.go b/relay/adaptor/cloudflare/main.go index 980a2891..e164d473 100644 --- a/relay/adaptor/cloudflare/main.go +++ b/relay/adaptor/cloudflare/main.go @@ -19,9 +19,8 @@ import ( ) func ConvertCompletionsRequest(textRequest model.GeneralOpenAIRequest) *Request { - p, _ := textRequest.Prompt.(string) return &Request{ - Prompt: p, + Prompt: textRequest.Prompt, MaxTokens: textRequest.MaxTokens, Stream: textRequest.Stream, Temperature: textRequest.Temperature, diff --git a/relay/adaptor/cohere/adaptor.go b/relay/adaptor/cohere/adaptor.go index 6fdb1b04..dd90bd7b 100644 --- a/relay/adaptor/cohere/adaptor.go +++ b/relay/adaptor/cohere/adaptor.go @@ -15,7 +15,7 @@ import ( type Adaptor struct{} // ConvertImageRequest implements adaptor.Adaptor. -func (*Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (*Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { return nil, errors.New("not implemented") } diff --git a/relay/adaptor/coze/adaptor.go b/relay/adaptor/coze/adaptor.go index 44f560e8..21d91e76 100644 --- a/relay/adaptor/coze/adaptor.go +++ b/relay/adaptor/coze/adaptor.go @@ -38,7 +38,7 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G return ConvertRequest(*request), nil } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } diff --git a/relay/adaptor/deepl/adaptor.go b/relay/adaptor/deepl/adaptor.go index d018a096..5a03c261 100644 --- a/relay/adaptor/deepl/adaptor.go +++ b/relay/adaptor/deepl/adaptor.go @@ -39,7 +39,7 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G return convertedRequest, nil } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } diff --git a/relay/adaptor/gemini/adaptor.go b/relay/adaptor/gemini/adaptor.go index 84083f60..a1b3f2fa 100644 --- a/relay/adaptor/gemini/adaptor.go +++ b/relay/adaptor/gemini/adaptor.go @@ -25,7 +25,7 @@ func (a *Adaptor) Init(meta *meta.Meta) { func (a *Adaptor) GetRequestURL(meta *meta.Meta) (string, error) { defaultVersion := config.GeminiVersion - if strings.Contains(meta.ActualModelName, "gemini-2.0") || + if strings.Contains(meta.ActualModelName, "gemini-2") || strings.Contains(meta.ActualModelName, "gemini-1.5") { defaultVersion = "v1beta" } @@ -66,7 +66,7 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G } } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } diff --git a/relay/adaptor/gemini/constants.go b/relay/adaptor/gemini/constants.go index d220b25f..424cf637 100644 --- a/relay/adaptor/gemini/constants.go +++ b/relay/adaptor/gemini/constants.go @@ -19,6 +19,9 @@ var ModelsSupportSystemInstruction = []string{ // "gemini-1.5-pro-experimental", "gemini-2.0-flash", "gemini-2.0-flash-exp", "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-lite", + // "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-pro-exp-02-05", } // IsModelSupportSystemInstruction check if the model support system instruction. diff --git a/relay/adaptor/gemini/main.go b/relay/adaptor/gemini/main.go index 29637296..26788ae7 100644 --- a/relay/adaptor/gemini/main.go +++ b/relay/adaptor/gemini/main.go @@ -8,19 +8,18 @@ import ( "net/http" "strings" - "github.com/songquanpeng/one-api/common/render" - + "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/common" "github.com/songquanpeng/one-api/common/config" "github.com/songquanpeng/one-api/common/helper" "github.com/songquanpeng/one-api/common/image" "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/common/random" + "github.com/songquanpeng/one-api/common/render" + "github.com/songquanpeng/one-api/relay/adaptor/geminiv2" "github.com/songquanpeng/one-api/relay/adaptor/openai" "github.com/songquanpeng/one-api/relay/constant" "github.com/songquanpeng/one-api/relay/model" - - "github.com/gin-gonic/gin" ) // https://ai.google.dev/docs/gemini_api_overview?hl=zh-cn @@ -61,9 +60,10 @@ func ConvertRequest(textRequest model.GeneralOpenAIRequest) *ChatRequest { }, }, GenerationConfig: ChatGenerationConfig{ - Temperature: textRequest.Temperature, - TopP: textRequest.TopP, - MaxOutputTokens: textRequest.MaxTokens, + Temperature: textRequest.Temperature, + TopP: textRequest.TopP, + MaxOutputTokens: textRequest.MaxTokens, + ResponseModalities: geminiv2.GetModelModalities(textRequest.Model), }, } if textRequest.ResponseFormat != nil { @@ -106,9 +106,9 @@ func ConvertRequest(textRequest model.GeneralOpenAIRequest) *ChatRequest { var parts []Part imageNum := 0 for _, part := range openaiContent { - if part.Type == model.ContentTypeText { + if part.Type == model.ContentTypeText && part.Text != nil && *part.Text != "" { parts = append(parts, Part{ - Text: part.Text, + Text: *part.Text, }) } else if part.Type == model.ContentTypeImageURL { imageNum += 1 @@ -258,19 +258,52 @@ func responseGeminiChat2OpenAI(response *ChatResponse) *openai.TextResponse { if candidate.Content.Parts[0].FunctionCall != nil { choice.Message.ToolCalls = getToolCalls(&candidate) } else { + // Handle text and image content var builder strings.Builder + var contentItems []model.MessageContent + for _, part := range candidate.Content.Parts { - if i > 0 { - builder.WriteString("\n") + if part.Text != "" { + // For text parts + if i > 0 { + builder.WriteString("\n") + } + builder.WriteString(part.Text) + + // Add to content items + contentItems = append(contentItems, model.MessageContent{ + Type: model.ContentTypeText, + Text: &part.Text, + }) + } + + if part.InlineData != nil && part.InlineData.MimeType != "" && part.InlineData.Data != "" { + // For inline image data + imageURL := &model.ImageURL{ + // The data is already base64 encoded + Url: fmt.Sprintf("data:%s;base64,%s", part.InlineData.MimeType, part.InlineData.Data), + } + + contentItems = append(contentItems, model.MessageContent{ + Type: model.ContentTypeImageURL, + ImageURL: imageURL, + }) } - builder.WriteString(part.Text) } - choice.Message.Content = builder.String() + + // If we have multiple content types, use structured content format + if len(contentItems) > 1 || (len(contentItems) == 1 && contentItems[0].Type != model.ContentTypeText) { + choice.Message.Content = contentItems + } else { + // Otherwise use the simple string content format + choice.Message.Content = builder.String() + } } } else { choice.Message.Content = "" choice.FinishReason = candidate.FinishReason } + fullTextResponse.Choices = append(fullTextResponse.Choices, choice) } return &fullTextResponse @@ -278,14 +311,78 @@ func responseGeminiChat2OpenAI(response *ChatResponse) *openai.TextResponse { func streamResponseGeminiChat2OpenAI(geminiResponse *ChatResponse) *openai.ChatCompletionsStreamResponse { var choice openai.ChatCompletionsStreamResponseChoice - choice.Delta.Content = geminiResponse.GetResponseText() - //choice.FinishReason = &constant.StopFinishReason + choice.Delta.Role = "assistant" + + // Check if we have any candidates + if len(geminiResponse.Candidates) == 0 { + return nil + } + + // Get the first candidate + candidate := geminiResponse.Candidates[0] + + // Check if there are parts in the content + if len(candidate.Content.Parts) == 0 { + return nil + } + + // Handle different content types in the parts + for _, part := range candidate.Content.Parts { + // Handle text content + if part.Text != "" { + // Store as string for simple text responses + textContent := part.Text + choice.Delta.Content = textContent + } + + // Handle image content + if part.InlineData != nil && part.InlineData.MimeType != "" && part.InlineData.Data != "" { + // Create a structured response for image content + imageUrl := fmt.Sprintf("data:%s;base64,%s", part.InlineData.MimeType, part.InlineData.Data) + + // If we already have text content, create a mixed content response + if strContent, ok := choice.Delta.Content.(string); ok && strContent != "" { + // Convert the existing text content and add the image + messageContents := []model.MessageContent{ + { + Type: model.ContentTypeText, + Text: &strContent, + }, + { + Type: model.ContentTypeImageURL, + ImageURL: &model.ImageURL{ + Url: imageUrl, + }, + }, + } + choice.Delta.Content = messageContents + } else { + // Only have image content + choice.Delta.Content = []model.MessageContent{ + { + Type: model.ContentTypeImageURL, + ImageURL: &model.ImageURL{ + Url: imageUrl, + }, + }, + } + } + } + + // Handle function calls (if present) + if part.FunctionCall != nil { + choice.Delta.ToolCalls = getToolCalls(&candidate) + } + } + + // Create response var response openai.ChatCompletionsStreamResponse response.Id = fmt.Sprintf("chatcmpl-%s", random.GetUUID()) response.Created = helper.GetTimestamp() response.Object = "chat.completion.chunk" response.Model = "gemini" response.Choices = []openai.ChatCompletionsStreamResponseChoice{choice} + return &response } @@ -311,17 +408,23 @@ func StreamHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithStatusC scanner := bufio.NewScanner(resp.Body) scanner.Split(bufio.ScanLines) + buffer := make([]byte, 1024*1024) // 1MB buffer + scanner.Buffer(buffer, len(buffer)) + common.SetEventStreamHeaders(c) for scanner.Scan() { data := scanner.Text() data = strings.TrimSpace(data) + if !strings.HasPrefix(data, "data: ") { continue } data = strings.TrimPrefix(data, "data: ") data = strings.TrimSuffix(data, "\"") + fmt.Printf(">> gemini response: %s\n", data) + var geminiResponse ChatResponse err := json.Unmarshal([]byte(data), &geminiResponse) if err != nil { @@ -361,6 +464,7 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st if err != nil { return openai.ErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil } + err = resp.Body.Close() if err != nil { return openai.ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil diff --git a/relay/adaptor/gemini/model.go b/relay/adaptor/gemini/model.go index c3acae60..3bf5fe8e 100644 --- a/relay/adaptor/gemini/model.go +++ b/relay/adaptor/gemini/model.go @@ -6,6 +6,19 @@ type ChatRequest struct { GenerationConfig ChatGenerationConfig `json:"generation_config,omitempty"` Tools []ChatTools `json:"tools,omitempty"` SystemInstruction *ChatContent `json:"system_instruction,omitempty"` + ModelVersion string `json:"model_version,omitempty"` + UsageMetadata *UsageMetadata `json:"usage_metadata,omitempty"` +} + +type UsageMetadata struct { + PromptTokenCount int `json:"promptTokenCount,omitempty"` + TotalTokenCount int `json:"totalTokenCount,omitempty"` + PromptTokensDetails []PromptTokensDetails `json:"promptTokensDetails,omitempty"` +} + +type PromptTokensDetails struct { + Modality string `json:"modality,omitempty"` + TokenCount int `json:"tokenCount,omitempty"` } type EmbeddingRequest struct { @@ -66,12 +79,13 @@ type ChatTools struct { } type ChatGenerationConfig struct { - ResponseMimeType string `json:"responseMimeType,omitempty"` - ResponseSchema any `json:"responseSchema,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopP *float64 `json:"topP,omitempty"` - TopK float64 `json:"topK,omitempty"` - MaxOutputTokens int `json:"maxOutputTokens,omitempty"` - CandidateCount int `json:"candidateCount,omitempty"` - StopSequences []string `json:"stopSequences,omitempty"` + ResponseMimeType string `json:"responseMimeType,omitempty"` + ResponseSchema any `json:"responseSchema,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"topP,omitempty"` + TopK float64 `json:"topK,omitempty"` + MaxOutputTokens int `json:"maxOutputTokens,omitempty"` + CandidateCount int `json:"candidateCount,omitempty"` + StopSequences []string `json:"stopSequences,omitempty"` + ResponseModalities []string `json:"responseModalities,omitempty"` } diff --git a/relay/adaptor/geminiv2/constants.go b/relay/adaptor/geminiv2/constants.go index 73e7ad7d..e2712ddd 100644 --- a/relay/adaptor/geminiv2/constants.go +++ b/relay/adaptor/geminiv2/constants.go @@ -1,15 +1,42 @@ package geminiv2 +import "strings" + // https://ai.google.dev/models/gemini var ModelList = []string{ "gemini-pro", "gemini-1.0-pro", - // "gemma-2-2b-it", "gemma-2-9b-it", "gemma-2-27b-it", + "gemma-2-2b-it", "gemma-2-9b-it", "gemma-2-27b-it", + "gemma-3-27b-it", "gemini-1.5-flash", "gemini-1.5-flash-8b", "gemini-1.5-pro", "gemini-1.5-pro-experimental", "text-embedding-004", "aqa", "gemini-2.0-flash", "gemini-2.0-flash-exp", "gemini-2.0-flash-lite-preview-02-05", "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-exp-image-generation", "gemini-2.0-pro-exp-02-05", + "gemini-2.5-pro-exp-03-25", +} + +const ( + ModalityText = "TEXT" + ModalityImage = "IMAGE" +) + +// GetModelModalities returns the modalities of the model. +func GetModelModalities(model string) []string { + if strings.Contains(model, "-image-generation") { + return []string{ModalityText, ModalityImage} + } + + // Until 2025-03-26, the following models do not accept the responseModalities field + if model == "gemini-2.5-pro-exp-03-25" || + model == "aqa" || + strings.HasPrefix(model, "gemma") || + strings.HasPrefix(model, "text-embed") { + return nil + } + + return []string{ModalityText} } diff --git a/relay/adaptor/groq/constants.go b/relay/adaptor/groq/constants.go index 2a26b28b..0b319bc4 100644 --- a/relay/adaptor/groq/constants.go +++ b/relay/adaptor/groq/constants.go @@ -1,27 +1,32 @@ package groq +// ModelList is a list of models that can be used with Groq. +// // https://console.groq.com/docs/models - var ModelList = []string{ + // Regular Models + "distil-whisper-large-v3-en", "gemma2-9b-it", - "llama-3.1-70b-versatile", + "llama-3.3-70b-versatile", "llama-3.1-8b-instant", - "llama-3.2-11b-text-preview", - "llama-3.2-11b-vision-preview", - "llama-3.2-1b-preview", - "llama-3.2-3b-preview", - "llama-3.2-90b-text-preview", - "llama-3.2-90b-vision-preview", "llama-guard-3-8b", "llama3-70b-8192", "llama3-8b-8192", - "llama3-groq-70b-8192-tool-use-preview", - "llama3-groq-8b-8192-tool-use-preview", - "llava-v1.5-7b-4096-preview", "mixtral-8x7b-32768", - "distil-whisper-large-v3-en", "whisper-large-v3", "whisper-large-v3-turbo", + + // Preview Models + "qwen-qwq-32b", + "mistral-saba-24b", + "qwen-2.5-coder-32b", + "qwen-2.5-32b", + "deepseek-r1-distill-qwen-32b", "deepseek-r1-distill-llama-70b-specdec", "deepseek-r1-distill-llama-70b", + "llama-3.2-1b-preview", + "llama-3.2-3b-preview", + "llama-3.2-11b-vision-preview", + "llama-3.2-90b-vision-preview", + "llama-3.3-70b-specdec", } diff --git a/relay/adaptor/interface.go b/relay/adaptor/interface.go index 01b2e2cb..88667561 100644 --- a/relay/adaptor/interface.go +++ b/relay/adaptor/interface.go @@ -13,7 +13,7 @@ type Adaptor interface { GetRequestURL(meta *meta.Meta) (string, error) SetupRequestHeader(c *gin.Context, req *http.Request, meta *meta.Meta) error ConvertRequest(c *gin.Context, relayMode int, request *model.GeneralOpenAIRequest) (any, error) - ConvertImageRequest(request *model.ImageRequest) (any, error) + ConvertImageRequest(c *gin.Context, request *model.ImageRequest) (any, error) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) GetModelList() []string diff --git a/relay/adaptor/ollama/adaptor.go b/relay/adaptor/ollama/adaptor.go index ad1f8983..9305340d 100644 --- a/relay/adaptor/ollama/adaptor.go +++ b/relay/adaptor/ollama/adaptor.go @@ -48,7 +48,7 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G } } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } diff --git a/relay/adaptor/ollama/main.go b/relay/adaptor/ollama/main.go index fa1b05f0..2bb49767 100644 --- a/relay/adaptor/ollama/main.go +++ b/relay/adaptor/ollama/main.go @@ -43,7 +43,9 @@ func ConvertRequest(request model.GeneralOpenAIRequest) *ChatRequest { for _, part := range openaiContent { switch part.Type { case model.ContentTypeText: - contentText = part.Text + if part.Text != nil { + contentText = *part.Text + } case model.ContentTypeImageURL: _, data, _ := image.GetImageFromUrl(part.ImageURL.Url) imageUrls = append(imageUrls, data) diff --git a/relay/adaptor/openai/adaptor.go b/relay/adaptor/openai/adaptor.go index 8faf90a5..184b6736 100644 --- a/relay/adaptor/openai/adaptor.go +++ b/relay/adaptor/openai/adaptor.go @@ -1,14 +1,18 @@ package openai import ( - "errors" "fmt" "io" + "math" "net/http" "strings" "github.com/gin-gonic/gin" + "github.com/pkg/errors" + "github.com/songquanpeng/one-api/common/config" + "github.com/songquanpeng/one-api/common/ctxkey" + "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/alibailian" "github.com/songquanpeng/one-api/relay/adaptor/baiduv2" @@ -16,6 +20,8 @@ import ( "github.com/songquanpeng/one-api/relay/adaptor/geminiv2" "github.com/songquanpeng/one-api/relay/adaptor/minimax" "github.com/songquanpeng/one-api/relay/adaptor/novita" + "github.com/songquanpeng/one-api/relay/adaptor/openrouter" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/channeltype" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" @@ -33,16 +39,24 @@ func (a *Adaptor) Init(meta *meta.Meta) { func (a *Adaptor) GetRequestURL(meta *meta.Meta) (string, error) { switch meta.ChannelType { case channeltype.Azure: + defaultVersion := meta.Config.APIVersion + + // https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/reasoning?tabs=python#api--feature-support + if strings.HasPrefix(meta.ActualModelName, "o1") || + strings.HasPrefix(meta.ActualModelName, "o3") { + defaultVersion = "2024-12-01-preview" + } + if meta.Mode == relaymode.ImagesGenerations { // https://learn.microsoft.com/en-us/azure/ai-services/openai/dall-e-quickstart?tabs=dalle3%2Ccommand-line&pivots=rest-api // https://{resource_name}.openai.azure.com/openai/deployments/dall-e-3/images/generations?api-version=2024-03-01-preview - fullRequestURL := fmt.Sprintf("%s/openai/deployments/%s/images/generations?api-version=%s", meta.BaseURL, meta.ActualModelName, meta.Config.APIVersion) + fullRequestURL := fmt.Sprintf("%s/openai/deployments/%s/images/generations?api-version=%s", meta.BaseURL, meta.ActualModelName, defaultVersion) return fullRequestURL, nil } // https://learn.microsoft.com/en-us/azure/cognitive-services/openai/chatgpt-quickstart?pivots=rest-api&tabs=command-line#rest-api requestURL := strings.Split(meta.RequestURLPath, "?")[0] - requestURL = fmt.Sprintf("%s?api-version=%s", requestURL, meta.Config.APIVersion) + requestURL = fmt.Sprintf("%s?api-version=%s", requestURL, defaultVersion) task := strings.TrimPrefix(requestURL, "/v1/") model_ := meta.ActualModelName model_ = strings.Replace(model_, ".", "", -1) @@ -85,28 +99,92 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G if request == nil { return nil, errors.New("request is nil") } - if request.Stream { + + meta := meta.GetByContext(c) + switch meta.ChannelType { + case channeltype.OpenRouter: + includeReasoning := true + request.IncludeReasoning = &includeReasoning + if request.Provider == nil || request.Provider.Sort == "" && + config.OpenrouterProviderSort != "" { + if request.Provider == nil { + request.Provider = &openrouter.RequestProvider{} + } + + request.Provider.Sort = config.OpenrouterProviderSort + } + default: + } + + if request.Stream && !config.EnforceIncludeUsage { + logger.Warn(c.Request.Context(), + "please set ENFORCE_INCLUDE_USAGE=true to ensure accurate billing in stream mode") + } + + if config.EnforceIncludeUsage && request.Stream { // always return usage in stream mode if request.StreamOptions == nil { request.StreamOptions = &model.StreamOptions{} } request.StreamOptions.IncludeUsage = true } + + // o1/o1-mini/o1-preview do not support system prompt/max_tokens/temperature + if strings.HasPrefix(meta.ActualModelName, "o1") || + strings.HasPrefix(meta.ActualModelName, "o3") { + temperature := float64(1) + request.Temperature = &temperature // Only the default (1) value is supported + + request.MaxTokens = 0 + request.Messages = func(raw []model.Message) (filtered []model.Message) { + for i := range raw { + if raw[i].Role != "system" { + filtered = append(filtered, raw[i]) + } + } + + return + }(request.Messages) + } + + // web search do not support system prompt/max_tokens/temperature + if strings.HasPrefix(meta.ActualModelName, "gpt-4o-search") || + strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search") { + request.Temperature = nil + request.TopP = nil + request.PresencePenalty = nil + request.N = nil + request.FrequencyPenalty = nil + } + + if request.Stream && !config.EnforceIncludeUsage && + (strings.HasPrefix(request.Model, "gpt-4o-audio") || + strings.HasPrefix(request.Model, "gpt-4o-mini-audio")) { + // TODO: Since it is not clear how to implement billing in stream mode, + // it is temporarily not supported + return nil, errors.New("set ENFORCE_INCLUDE_USAGE=true to enable stream mode for gpt-4o-audio") + } + return request, nil } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } return request, nil } -func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) { +func (a *Adaptor) DoRequest(c *gin.Context, + meta *meta.Meta, + requestBody io.Reader) (*http.Response, error) { return adaptor.DoRequestHelper(a, c, meta, requestBody) } -func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) { +func (a *Adaptor) DoResponse(c *gin.Context, + resp *http.Response, + meta *meta.Meta) (usage *model.Usage, + err *model.ErrorWithStatusCode) { if meta.IsStream { var responseText string err, responseText, usage = StreamHandler(c, resp, meta.Mode) @@ -121,10 +199,61 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met switch meta.Mode { case relaymode.ImagesGenerations: err, _ = ImageHandler(c, resp) + case relaymode.ImagesEdits: + err, _ = ImagesEditsHandler(c, resp) default: err, usage = Handler(c, resp, meta.PromptTokens, meta.ActualModelName) } } + + // ------------------------------------- + // calculate web-search tool cost + // ------------------------------------- + if usage != nil { + searchContextSize := "medium" + var req *model.GeneralOpenAIRequest + if vi, ok := c.Get(ctxkey.ConvertedRequest); ok { + if req, ok = vi.(*model.GeneralOpenAIRequest); ok { + if req != nil && + req.WebSearchOptions != nil && + req.WebSearchOptions.SearchContextSize != nil { + searchContextSize = *req.WebSearchOptions.SearchContextSize + } + + switch { + case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"): + switch searchContextSize { + case "low": + usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd)) + case "medium": + usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.QuotaPerUsd)) + case "high": + usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.QuotaPerUsd)) + default: + return nil, ErrorWrapper( + errors.Errorf("invalid search context size %q", searchContextSize), + "invalid search context size: "+searchContextSize, + http.StatusBadRequest) + } + case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"): + switch searchContextSize { + case "low": + usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.QuotaPerUsd)) + case "medium": + usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.QuotaPerUsd)) + case "high": + usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd)) + default: + return nil, ErrorWrapper( + errors.Errorf("invalid search context size %q", searchContextSize), + "invalid search context size: "+searchContextSize, + http.StatusBadRequest) + } + } + } + } + } + return } diff --git a/relay/adaptor/openai/constants.go b/relay/adaptor/openai/constants.go index 8a643bc6..f7518894 100644 --- a/relay/adaptor/openai/constants.go +++ b/relay/adaptor/openai/constants.go @@ -7,11 +7,10 @@ var ModelList = []string{ "gpt-4", "gpt-4-0314", "gpt-4-0613", "gpt-4-1106-preview", "gpt-4-0125-preview", "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613", "gpt-4-turbo-preview", "gpt-4-turbo", "gpt-4-turbo-2024-04-09", - "gpt-4o", "gpt-4o-2024-05-13", - "gpt-4o-2024-08-06", - "gpt-4o-2024-11-20", - "chatgpt-4o-latest", + "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-2024-08-06", "gpt-4o-2024-11-20", "chatgpt-4o-latest", "gpt-4o-mini", "gpt-4o-mini-2024-07-18", + "gpt-4o-mini-audio-preview", "gpt-4o-mini-audio-preview-2024-12-17", + "gpt-4o-audio-preview", "gpt-4o-audio-preview-2024-12-17", "gpt-4o-audio-preview-2024-10-01", "gpt-4-vision-preview", "text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large", "text-curie-001", "text-babbage-001", "text-ada-001", "text-davinci-002", "text-davinci-003", @@ -24,4 +23,8 @@ var ModelList = []string{ "o1", "o1-2024-12-17", "o1-preview", "o1-preview-2024-09-12", "o1-mini", "o1-mini-2024-09-12", + "o3-mini", "o3-mini-2025-01-31", + "gpt-4.5-preview", "gpt-4.5-preview-2025-02-27", + // https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat + "gpt-4o-search-preview", "gpt-4o-mini-search-preview", } diff --git a/relay/adaptor/openai/image.go b/relay/adaptor/openai/image.go index 0f89618a..433d9421 100644 --- a/relay/adaptor/openai/image.go +++ b/relay/adaptor/openai/image.go @@ -3,12 +3,30 @@ package openai import ( "bytes" "encoding/json" - "github.com/gin-gonic/gin" - "github.com/songquanpeng/one-api/relay/model" "io" "net/http" + + "github.com/gin-gonic/gin" + "github.com/songquanpeng/one-api/relay/model" ) +// ImagesEditsHandler just copy response body to client +// +// https://platform.openai.com/docs/api-reference/images/createEdit +func ImagesEditsHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithStatusCode, *model.Usage) { + c.Writer.WriteHeader(resp.StatusCode) + for k, v := range resp.Header { + c.Writer.Header().Set(k, v[0]) + } + + if _, err := io.Copy(c.Writer, resp.Body); err != nil { + return ErrorWrapper(err, "copy_response_body_failed", http.StatusInternalServerError), nil + } + defer resp.Body.Close() + + return nil, nil +} + func ImageHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithStatusCode, *model.Usage) { var imageResponse ImageResponse responseBody, err := io.ReadAll(resp.Body) diff --git a/relay/adaptor/openai/main.go b/relay/adaptor/openai/main.go index 97080738..1411980a 100644 --- a/relay/adaptor/openai/main.go +++ b/relay/adaptor/openai/main.go @@ -5,15 +5,16 @@ import ( "bytes" "encoding/json" "io" + "math" "net/http" "strings" - "github.com/songquanpeng/one-api/common/render" - "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/common" "github.com/songquanpeng/one-api/common/conv" "github.com/songquanpeng/one-api/common/logger" + "github.com/songquanpeng/one-api/common/render" + "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/model" "github.com/songquanpeng/one-api/relay/relaymode" ) @@ -24,128 +25,300 @@ const ( dataPrefixLength = len(dataPrefix) ) +// StreamHandler processes streaming responses from OpenAI API +// It handles incremental content delivery and accumulates the final response text +// Returns error (if any), accumulated response text, and token usage information func StreamHandler(c *gin.Context, resp *http.Response, relayMode int) (*model.ErrorWithStatusCode, string, *model.Usage) { + // Initialize accumulators for the response responseText := "" - scanner := bufio.NewScanner(resp.Body) - scanner.Split(bufio.ScanLines) + reasoningText := "" var usage *model.Usage + // Set up scanner for reading the stream line by line + scanner := bufio.NewScanner(resp.Body) + buffer := make([]byte, 256*1024) // 256KB buffer for large messages + scanner.Buffer(buffer, len(buffer)) + scanner.Split(bufio.ScanLines) + + // Set response headers for SSE common.SetEventStreamHeaders(c) doneRendered := false + + // Process each line from the stream for scanner.Scan() { - data := scanner.Text() - if len(data) < dataPrefixLength { // ignore blank line or wrong format - continue + data := NormalizeDataLine(scanner.Text()) + + // logger.Debugf(c.Request.Context(), "stream response: %s", data) + + // Skip lines that don't match expected format + if len(data) < dataPrefixLength { + continue // Ignore blank line or wrong format } + + // Verify line starts with expected prefix if data[:dataPrefixLength] != dataPrefix && data[:dataPrefixLength] != done { continue } + + // Check for stream termination if strings.HasPrefix(data[dataPrefixLength:], done) { render.StringData(c, data) doneRendered = true continue } + + // Process based on relay mode switch relayMode { case relaymode.ChatCompletions: var streamResponse ChatCompletionsStreamResponse + + // Parse the JSON response err := json.Unmarshal([]byte(data[dataPrefixLength:]), &streamResponse) if err != nil { - logger.SysError("error unmarshalling stream response: " + err.Error()) - render.StringData(c, data) // if error happened, pass the data to client - continue // just ignore the error + logger.Errorf(c.Request.Context(), "unmarshalling stream data %q got %+v", data, err) + render.StringData(c, data) // Pass raw data to client if parsing fails + continue } + + // Skip empty choices (Azure specific behavior) if len(streamResponse.Choices) == 0 && streamResponse.Usage == nil { - // but for empty choice and no usage, we should not pass it to client, this is for azure - continue // just ignore empty choice + continue } - render.StringData(c, data) + + // Process each choice in the response for _, choice := range streamResponse.Choices { + // Extract reasoning content from different possible fields + currentReasoningChunk := extractReasoningContent(&choice.Delta) + + // Update accumulated reasoning text + if currentReasoningChunk != "" { + reasoningText += currentReasoningChunk + } + + // Set the reasoning content in the format requested by client + choice.Delta.SetReasoningContent(c.Query("reasoning_format"), currentReasoningChunk) + + // Accumulate response content responseText += conv.AsString(choice.Delta.Content) } + + // Send the processed data to the client + render.StringData(c, data) + + // Update usage information if available if streamResponse.Usage != nil { usage = streamResponse.Usage } + case relaymode.Completions: + // Send the data immediately for Completions mode render.StringData(c, data) + var streamResponse CompletionsStreamResponse err := json.Unmarshal([]byte(data[dataPrefixLength:]), &streamResponse) if err != nil { logger.SysError("error unmarshalling stream response: " + err.Error()) continue } + + // Accumulate text from all choices for _, choice := range streamResponse.Choices { responseText += choice.Text } } } + // Check for scanner errors if err := scanner.Err(); err != nil { logger.SysError("error reading stream: " + err.Error()) } + // Ensure stream termination is sent to client if !doneRendered { render.Done(c) } - err := resp.Body.Close() - if err != nil { + // Clean up resources + if err := resp.Body.Close(); err != nil { return ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), "", nil } - return nil, responseText, usage + // Return the complete response text (reasoning + content) and usage + return nil, reasoningText + responseText, usage } +// Helper function to extract reasoning content from message delta +func extractReasoningContent(delta *model.Message) string { + content := "" + + // Extract reasoning from different possible fields + if delta.Reasoning != nil { + content += *delta.Reasoning + delta.Reasoning = nil + } + + if delta.ReasoningContent != nil { + content += *delta.ReasoningContent + delta.ReasoningContent = nil + } + + return content +} + +// Handler processes non-streaming responses from OpenAI API +// Returns error (if any) and token usage information func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName string) (*model.ErrorWithStatusCode, *model.Usage) { - var textResponse SlimTextResponse + // Read the entire response body responseBody, err := io.ReadAll(resp.Body) if err != nil { return ErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil } - err = resp.Body.Close() - if err != nil { + + // Close the original response body + if err = resp.Body.Close(); err != nil { return ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil } - err = json.Unmarshal(responseBody, &textResponse) - if err != nil { + + // Parse the response JSON + var textResponse SlimTextResponse + if err = json.Unmarshal(responseBody, &textResponse); err != nil { return ErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError), nil } + + // Check for API errors if textResponse.Error.Type != "" { return &model.ErrorWithStatusCode{ Error: textResponse.Error, StatusCode: resp.StatusCode, }, nil } - // Reset response body - resp.Body = io.NopCloser(bytes.NewBuffer(responseBody)) - // We shouldn't set the header before we parse the response body, because the parse part may fail. - // And then we will have to send an error response, but in this case, the header has already been set. - // So the HTTPClient will be confused by the response. - // For example, Postman will report error, and we cannot check the response at all. - for k, v := range resp.Header { - c.Writer.Header().Set(k, v[0]) + // Process reasoning content in each choice + for _, msg := range textResponse.Choices { + reasoningContent := processReasoningContent(&msg) + + // Set reasoning in requested format if content exists + if reasoningContent != "" { + msg.SetReasoningContent(c.Query("reasoning_format"), reasoningContent) + } } + + // Reset response body for forwarding to client + resp.Body = io.NopCloser(bytes.NewBuffer(responseBody)) + logger.Debugf(c.Request.Context(), "handler response: %s", string(responseBody)) + + // Forward all response headers (not just first value of each) + for k, values := range resp.Header { + for _, v := range values { + c.Writer.Header().Add(k, v) + } + } + + // Set response status and copy body to client c.Writer.WriteHeader(resp.StatusCode) - _, err = io.Copy(c.Writer, resp.Body) - if err != nil { + if _, err = io.Copy(c.Writer, resp.Body); err != nil { return ErrorWrapper(err, "copy_response_body_failed", http.StatusInternalServerError), nil } - err = resp.Body.Close() - if err != nil { + + // Close the reset body + if err = resp.Body.Close(); err != nil { return ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil } - if textResponse.Usage.TotalTokens == 0 || (textResponse.Usage.PromptTokens == 0 && textResponse.Usage.CompletionTokens == 0) { + // Calculate token usage if not provided by API + calculateTokenUsage(&textResponse, promptTokens, modelName) + + return nil, &textResponse.Usage +} + +// processReasoningContent is a helper function to extract and process reasoning content from the message +func processReasoningContent(msg *TextResponseChoice) string { + var reasoningContent string + + // Check different locations for reasoning content + switch { + case msg.Reasoning != nil: + reasoningContent = *msg.Reasoning + msg.Reasoning = nil + case msg.ReasoningContent != nil: + reasoningContent = *msg.ReasoningContent + msg.ReasoningContent = nil + case msg.Message.Reasoning != nil: + reasoningContent = *msg.Message.Reasoning + msg.Message.Reasoning = nil + case msg.Message.ReasoningContent != nil: + reasoningContent = *msg.Message.ReasoningContent + msg.Message.ReasoningContent = nil + } + + return reasoningContent +} + +// Helper function to calculate token usage +func calculateTokenUsage(response *SlimTextResponse, promptTokens int, modelName string) { + // Calculate tokens if not provided by the API + if response.Usage.TotalTokens == 0 || + (response.Usage.PromptTokens == 0 && response.Usage.CompletionTokens == 0) { + completionTokens := 0 - for _, choice := range textResponse.Choices { + for _, choice := range response.Choices { + // Count content tokens completionTokens += CountTokenText(choice.Message.StringContent(), modelName) + + // Count reasoning tokens in all possible locations + if choice.Message.Reasoning != nil { + completionTokens += CountToken(*choice.Message.Reasoning) + } + if choice.Message.ReasoningContent != nil { + completionTokens += CountToken(*choice.Message.ReasoningContent) + } + if choice.Reasoning != nil { + completionTokens += CountToken(*choice.Reasoning) + } + if choice.ReasoningContent != nil { + completionTokens += CountToken(*choice.ReasoningContent) + } } - textResponse.Usage = model.Usage{ + + // Set usage values + response.Usage = model.Usage{ PromptTokens: promptTokens, CompletionTokens: completionTokens, TotalTokens: promptTokens + completionTokens, } + } else if hasAudioTokens(response) { + // Handle audio tokens conversion + calculateAudioTokens(response, modelName) } - return nil, &textResponse.Usage +} + +// Helper function to check if response has audio tokens +func hasAudioTokens(response *SlimTextResponse) bool { + return (response.PromptTokensDetails != nil && response.PromptTokensDetails.AudioTokens > 0) || + (response.CompletionTokensDetails != nil && response.CompletionTokensDetails.AudioTokens > 0) +} + +// Helper function to calculate audio token usage +func calculateAudioTokens(response *SlimTextResponse, modelName string) { + // Convert audio tokens for prompt + if response.PromptTokensDetails != nil { + response.Usage.PromptTokens = response.PromptTokensDetails.TextTokens + + int(math.Ceil( + float64(response.PromptTokensDetails.AudioTokens)* + ratio.GetAudioPromptRatio(modelName), + )) + } + + // Convert audio tokens for completion + if response.CompletionTokensDetails != nil { + response.Usage.CompletionTokens = response.CompletionTokensDetails.TextTokens + + int(math.Ceil( + float64(response.CompletionTokensDetails.AudioTokens)* + ratio.GetAudioPromptRatio(modelName)*ratio.GetAudioCompletionRatio(modelName), + )) + } + + // Calculate total tokens + response.Usage.TotalTokens = response.Usage.PromptTokens + response.Usage.CompletionTokens } diff --git a/relay/adaptor/openai/model.go b/relay/adaptor/openai/model.go index 4c974de4..50a8e1da 100644 --- a/relay/adaptor/openai/model.go +++ b/relay/adaptor/openai/model.go @@ -1,6 +1,10 @@ package openai -import "github.com/songquanpeng/one-api/relay/model" +import ( + "mime/multipart" + + "github.com/songquanpeng/one-api/relay/model" +) type TextContent struct { Type string `json:"type,omitempty"` @@ -71,6 +75,24 @@ type TextToSpeechRequest struct { ResponseFormat string `json:"response_format"` } +type AudioTranscriptionRequest struct { + File *multipart.FileHeader `form:"file" binding:"required"` + Model string `form:"model" binding:"required"` + Language string `form:"language"` + Prompt string `form:"prompt"` + ReponseFormat string `form:"response_format" binding:"oneof=json text srt verbose_json vtt"` + Temperature float64 `form:"temperature"` + TimestampGranularity []string `form:"timestamp_granularity"` +} + +type AudioTranslationRequest struct { + File *multipart.FileHeader `form:"file" binding:"required"` + Model string `form:"model" binding:"required"` + Prompt string `form:"prompt"` + ResponseFormat string `form:"response_format" binding:"oneof=json text srt verbose_json vtt"` + Temperature float64 `form:"temperature"` +} + type UsageOrResponseText struct { *model.Usage ResponseText string @@ -110,12 +132,14 @@ type EmbeddingResponse struct { model.Usage `json:"usage"` } +// ImageData represents an image in the response type ImageData struct { Url string `json:"url,omitempty"` B64Json string `json:"b64_json,omitempty"` RevisedPrompt string `json:"revised_prompt,omitempty"` } +// ImageResponse represents the response structure for image generations type ImageResponse struct { Created int64 `json:"created"` Data []ImageData `json:"data"` diff --git a/relay/adaptor/openai/token.go b/relay/adaptor/openai/token.go index b50220e7..e3194439 100644 --- a/relay/adaptor/openai/token.go +++ b/relay/adaptor/openai/token.go @@ -1,16 +1,20 @@ package openai import ( - "errors" + "bytes" + "context" + "encoding/base64" "fmt" "math" "strings" + "github.com/pkg/errors" "github.com/pkoukk/tiktoken-go" - "github.com/songquanpeng/one-api/common/config" + "github.com/songquanpeng/one-api/common/helper" "github.com/songquanpeng/one-api/common/image" "github.com/songquanpeng/one-api/common/logger" + "github.com/songquanpeng/one-api/relay/billing/ratio" billingratio "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/model" ) @@ -73,8 +77,10 @@ func getTokenNum(tokenEncoder *tiktoken.Tiktoken, text string) int { return len(tokenEncoder.Encode(text, nil, nil)) } -func CountTokenMessages(messages []model.Message, model string) int { - tokenEncoder := getTokenEncoder(model) +// CountTokenMessages counts the number of tokens in a list of messages. +func CountTokenMessages(ctx context.Context, + messages []model.Message, actualModel string) int { + tokenEncoder := getTokenEncoder(actualModel) // Reference: // https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb // https://github.com/pkoukk/tiktoken-go/issues/6 @@ -82,47 +88,54 @@ func CountTokenMessages(messages []model.Message, model string) int { // Every message follows <|start|>{role/name}\n{content}<|end|>\n var tokensPerMessage int var tokensPerName int - if model == "gpt-3.5-turbo-0301" { + if actualModel == "gpt-3.5-turbo-0301" { tokensPerMessage = 4 tokensPerName = -1 // If there's a name, the role is omitted } else { tokensPerMessage = 3 tokensPerName = 1 } + tokenNum := 0 + var totalAudioTokens float64 for _, message := range messages { tokenNum += tokensPerMessage - switch v := message.Content.(type) { - case string: - tokenNum += getTokenNum(tokenEncoder, v) - case []any: - for _, it := range v { - m := it.(map[string]any) - switch m["type"] { - case "text": - if textValue, ok := m["text"]; ok { - if textString, ok := textValue.(string); ok { - tokenNum += getTokenNum(tokenEncoder, textString) - } - } - case "image_url": - imageUrl, ok := m["image_url"].(map[string]any) - if ok { - url := imageUrl["url"].(string) - detail := "" - if imageUrl["detail"] != nil { - detail = imageUrl["detail"].(string) - } - imageTokens, err := countImageTokens(url, detail, model) - if err != nil { - logger.SysError("error counting image tokens: " + err.Error()) - } else { - tokenNum += imageTokens - } - } + contents := message.ParseContent() + for _, content := range contents { + switch content.Type { + case model.ContentTypeText: + if content.Text != nil { + tokenNum += getTokenNum(tokenEncoder, *content.Text) + } + case model.ContentTypeImageURL: + imageTokens, err := countImageTokens( + content.ImageURL.Url, + content.ImageURL.Detail, + actualModel) + if err != nil { + logger.SysError("error counting image tokens: " + err.Error()) + } else { + tokenNum += imageTokens + } + case model.ContentTypeInputAudio: + audioData, err := base64.StdEncoding.DecodeString(content.InputAudio.Data) + if err != nil { + logger.SysError("error decoding audio data: " + err.Error()) + } + + audioTokens, err := helper.GetAudioTokens(ctx, + bytes.NewReader(audioData), + ratio.GetAudioPromptTokensPerSecond(actualModel)) + if err != nil { + logger.SysError("error counting audio tokens: " + err.Error()) + } else { + totalAudioTokens += audioTokens } } } + + tokenNum += int(math.Ceil(totalAudioTokens)) + tokenNum += getTokenNum(tokenEncoder, message.Role) if message.Name != nil { tokenNum += tokensPerName diff --git a/relay/adaptor/openai/util.go b/relay/adaptor/openai/util.go index 83beadba..ca5605c4 100644 --- a/relay/adaptor/openai/util.go +++ b/relay/adaptor/openai/util.go @@ -3,6 +3,7 @@ package openai import ( "context" "fmt" + "strings" "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/relay/model" @@ -21,3 +22,11 @@ func ErrorWrapper(err error, code string, statusCode int) *model.ErrorWithStatus StatusCode: statusCode, } } + +func NormalizeDataLine(data string) string { + if strings.HasPrefix(data, "data:") { + content := strings.TrimLeft(data[len("data:"):], " ") + return "data: " + content + } + return data +} diff --git a/relay/adaptor/openrouter/model.go b/relay/adaptor/openrouter/model.go new file mode 100644 index 00000000..581bc2cc --- /dev/null +++ b/relay/adaptor/openrouter/model.go @@ -0,0 +1,22 @@ +package openrouter + +// RequestProvider customize how your requests are routed using the provider object +// in the request body for Chat Completions and Completions. +// +// https://openrouter.ai/docs/features/provider-routing +type RequestProvider struct { + // Order is list of provider names to try in order (e.g. ["Anthropic", "OpenAI"]). Default: empty + Order []string `json:"order,omitempty"` + // AllowFallbacks is whether to allow backup providers when the primary is unavailable. Default: true + AllowFallbacks bool `json:"allow_fallbacks,omitempty"` + // RequireParameters is only use providers that support all parameters in your request. Default: false + RequireParameters bool `json:"require_parameters,omitempty"` + // DataCollection is control whether to use providers that may store data ("allow" or "deny"). Default: "allow" + DataCollection string `json:"data_collection,omitempty" binding:"omitempty,oneof=allow deny"` + // Ignore is list of provider names to skip for this request. Default: empty + Ignore []string `json:"ignore,omitempty"` + // Quantizations is list of quantization levels to filter by (e.g. ["int4", "int8"]). Default: empty + Quantizations []string `json:"quantizations,omitempty"` + // Sort is sort providers by price or throughput (e.g. "price" or "throughput"). Default: empty + Sort string `json:"sort,omitempty" binding:"omitempty,oneof=price throughput latency"` +} diff --git a/relay/adaptor/palm/adaptor.go b/relay/adaptor/palm/adaptor.go index 98aa3e18..9b51562d 100644 --- a/relay/adaptor/palm/adaptor.go +++ b/relay/adaptor/palm/adaptor.go @@ -36,7 +36,7 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G return ConvertRequest(*request), nil } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } diff --git a/relay/adaptor/palm/palm.go b/relay/adaptor/palm/palm.go index d31784ec..f3875417 100644 --- a/relay/adaptor/palm/palm.go +++ b/relay/adaptor/palm/palm.go @@ -25,11 +25,17 @@ func ConvertRequest(textRequest model.GeneralOpenAIRequest) *ChatRequest { Prompt: Prompt{ Messages: make([]ChatMessage, 0, len(textRequest.Messages)), }, - Temperature: textRequest.Temperature, - CandidateCount: textRequest.N, - TopP: textRequest.TopP, - TopK: textRequest.MaxTokens, + Temperature: textRequest.Temperature, + TopP: textRequest.TopP, + TopK: textRequest.MaxTokens, } + + if textRequest.N != nil { + palmRequest.CandidateCount = *textRequest.N + } else { + palmRequest.CandidateCount = 1 + } + for _, message := range textRequest.Messages { palmMessage := ChatMessage{ Content: message.StringContent(), diff --git a/relay/adaptor/proxy/adaptor.go b/relay/adaptor/proxy/adaptor.go index 670c7628..32984fc7 100644 --- a/relay/adaptor/proxy/adaptor.go +++ b/relay/adaptor/proxy/adaptor.go @@ -80,7 +80,7 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Request, meta *me return nil } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { return nil, errors.Errorf("not implement") } diff --git a/relay/adaptor/replicate/adaptor.go b/relay/adaptor/replicate/adaptor.go index a60a7de3..2b4c9af7 100644 --- a/relay/adaptor/replicate/adaptor.go +++ b/relay/adaptor/replicate/adaptor.go @@ -23,7 +23,7 @@ type Adaptor struct { } // ConvertImageRequest implements adaptor.Adaptor. -func (*Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (*Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { return DrawImageRequest{ Input: ImageInput{ Steps: 25, diff --git a/relay/adaptor/replicate/constant.go b/relay/adaptor/replicate/constant.go index 989142c9..6ac06bd0 100644 --- a/relay/adaptor/replicate/constant.go +++ b/relay/adaptor/replicate/constant.go @@ -33,9 +33,16 @@ var ModelList = []string{ // ------------------------------------- // language model // ------------------------------------- + "anthropic/claude-3.5-haiku", + "anthropic/claude-3.5-sonnet", + "anthropic/claude-3.7-sonnet", + "deepseek-ai/deepseek-r1", "ibm-granite/granite-20b-code-instruct-8k", "ibm-granite/granite-3.0-2b-instruct", "ibm-granite/granite-3.0-8b-instruct", + "ibm-granite/granite-3.1-2b-instruct", + "ibm-granite/granite-3.1-8b-instruct", + "ibm-granite/granite-3.2-8b-instruct", "ibm-granite/granite-8b-code-instruct-128k", "meta/llama-2-13b", "meta/llama-2-13b-chat", @@ -50,7 +57,6 @@ var ModelList = []string{ "meta/meta-llama-3-8b-instruct", "mistralai/mistral-7b-instruct-v0.2", "mistralai/mistral-7b-v0.1", - "mistralai/mixtral-8x7b-instruct-v0.1", // ------------------------------------- // video model // ------------------------------------- diff --git a/relay/adaptor/tencent/adaptor.go b/relay/adaptor/tencent/adaptor.go index b20d4279..ce964dab 100644 --- a/relay/adaptor/tencent/adaptor.go +++ b/relay/adaptor/tencent/adaptor.go @@ -69,7 +69,7 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G return convertedRequest, nil } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } diff --git a/relay/adaptor/vertexai/adaptor.go b/relay/adaptor/vertexai/adaptor.go index 3fab4a45..3b9e1a7f 100644 --- a/relay/adaptor/vertexai/adaptor.go +++ b/relay/adaptor/vertexai/adaptor.go @@ -105,7 +105,7 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Request, meta *me return nil } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } diff --git a/relay/adaptor/vertexai/claude/adapter.go b/relay/adaptor/vertexai/claude/adapter.go index cb911cfe..f591e447 100644 --- a/relay/adaptor/vertexai/claude/adapter.go +++ b/relay/adaptor/vertexai/claude/adapter.go @@ -19,6 +19,7 @@ var ModelList = []string{ "claude-3-5-sonnet@20240620", "claude-3-5-sonnet-v2@20241022", "claude-3-5-haiku@20241022", + "claude-3-7-sonnet@20250219", } const anthropicVersion = "vertex-2023-10-16" @@ -31,7 +32,11 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G return nil, errors.New("request is nil") } - claudeReq := anthropic.ConvertRequest(*request) + claudeReq, err := anthropic.ConvertRequest(c, *request) + if err != nil { + return nil, errors.Wrap(err, "convert request") + } + req := Request{ AnthropicVersion: anthropicVersion, // Model: claudeReq.Model, diff --git a/relay/adaptor/xunfei/adaptor.go b/relay/adaptor/xunfei/adaptor.go index b5967f26..404ec767 100644 --- a/relay/adaptor/xunfei/adaptor.go +++ b/relay/adaptor/xunfei/adaptor.go @@ -39,7 +39,7 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G return nil, nil } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } diff --git a/relay/adaptor/xunfei/main.go b/relay/adaptor/xunfei/main.go index 9a8aef15..b02462fa 100644 --- a/relay/adaptor/xunfei/main.go +++ b/relay/adaptor/xunfei/main.go @@ -41,10 +41,15 @@ func requestOpenAI2Xunfei(request model.GeneralOpenAIRequest, xunfeiAppId string xunfeiRequest.Header.AppId = xunfeiAppId xunfeiRequest.Parameter.Chat.Domain = domain xunfeiRequest.Parameter.Chat.Temperature = request.Temperature - xunfeiRequest.Parameter.Chat.TopK = request.N xunfeiRequest.Parameter.Chat.MaxTokens = request.MaxTokens xunfeiRequest.Payload.Message.Text = messages + if request.N != nil { + xunfeiRequest.Parameter.Chat.TopK = *request.N + } else { + xunfeiRequest.Parameter.Chat.TopK = 1 + } + if strings.HasPrefix(domain, "generalv3") || domain == "4.0Ultra" { functions := make([]model.Function, len(request.Tools)) for i, tool := range request.Tools { diff --git a/relay/adaptor/zhipu/adaptor.go b/relay/adaptor/zhipu/adaptor.go index 660bd379..1ae9e3f7 100644 --- a/relay/adaptor/zhipu/adaptor.go +++ b/relay/adaptor/zhipu/adaptor.go @@ -80,7 +80,7 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G } } -func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { +func (a *Adaptor) ConvertImageRequest(_ *gin.Context, request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } diff --git a/relay/billing/ratio/model.go b/relay/billing/ratio/model.go index e8b3b615..0c3fbb08 100644 --- a/relay/billing/ratio/model.go +++ b/relay/billing/ratio/model.go @@ -9,11 +9,20 @@ import ( "github.com/songquanpeng/one-api/common/logger" ) +// Constants defining currency conversion and token pricing const ( - USD2RMB = 7 - USD = 500 // $0.002 = 1 -> $1 = 500 - MILLI_USD = 1.0 / 1000 * USD - RMB = USD / USD2RMB + USD2RMB float64 = 7 + // QuotaPerUsd is the number of tokens per USD + QuotaPerUsd float64 = 500000 // $0.002 / 1K tokens + // KiloTokensUsd multiply by the USD price per 1,000 tokens to get the quota cost per token + KiloTokensUsd float64 = QuotaPerUsd / 1000 + // MilliTokensUsd multiply by the USD price per 1 million tokens to get the quota cost per token + MilliTokensUsd float64 = KiloTokensUsd / 1000 + // KiloRmb multiply by the RMB price per 1,000 tokens to get the quota cost per token + KiloRmb float64 = KiloTokensUsd / USD2RMB + // MilliRmb multiply by the RMB price per 1 million tokens to get the quota cost per token + MilliRmb float64 = MilliTokensUsd / USD2RMB + ImageUsdPerPic float64 = QuotaPerUsd / 1000 ) var modelRatioLock sync.RWMutex @@ -25,305 +34,342 @@ var modelRatioLock sync.RWMutex // 1 === $0.002 / 1K tokens // 1 === ¥0.014 / 1k tokens var ModelRatio = map[string]float64{ + // ------------------------------------- + // OpenAI // https://openai.com/pricing - "gpt-4": 15, - "gpt-4-0314": 15, - "gpt-4-0613": 15, - "gpt-4-32k": 30, - "gpt-4-32k-0314": 30, - "gpt-4-32k-0613": 30, - "gpt-4-1106-preview": 5, // $0.01 / 1K tokens - "gpt-4-0125-preview": 5, // $0.01 / 1K tokens - "gpt-4-turbo-preview": 5, // $0.01 / 1K tokens - "gpt-4-turbo": 5, // $0.01 / 1K tokens - "gpt-4-turbo-2024-04-09": 5, // $0.01 / 1K tokens - "gpt-4o": 2.5, // $0.005 / 1K tokens - "chatgpt-4o-latest": 2.5, // $0.005 / 1K tokens - "gpt-4o-2024-05-13": 2.5, // $0.005 / 1K tokens - "gpt-4o-2024-08-06": 1.25, // $0.0025 / 1K tokens - "gpt-4o-2024-11-20": 1.25, // $0.0025 / 1K tokens - "gpt-4o-mini": 0.075, // $0.00015 / 1K tokens - "gpt-4o-mini-2024-07-18": 0.075, // $0.00015 / 1K tokens - "gpt-4-vision-preview": 5, // $0.01 / 1K tokens - "gpt-3.5-turbo": 0.25, // $0.0005 / 1K tokens - "gpt-3.5-turbo-0301": 0.75, - "gpt-3.5-turbo-0613": 0.75, - "gpt-3.5-turbo-16k": 1.5, // $0.003 / 1K tokens - "gpt-3.5-turbo-16k-0613": 1.5, - "gpt-3.5-turbo-instruct": 0.75, // $0.0015 / 1K tokens - "gpt-3.5-turbo-1106": 0.5, // $0.001 / 1K tokens - "gpt-3.5-turbo-0125": 0.25, // $0.0005 / 1K tokens - "o1": 7.5, // $15.00 / 1M input tokens - "o1-2024-12-17": 7.5, - "o1-preview": 7.5, // $15.00 / 1M input tokens - "o1-preview-2024-09-12": 7.5, - "o1-mini": 1.5, // $3.00 / 1M input tokens - "o1-mini-2024-09-12": 1.5, - "o3-mini": 1.5, // $3.00 / 1M input tokens - "o3-mini-2025-01-31": 1.5, - "davinci-002": 1, // $0.002 / 1K tokens - "babbage-002": 0.2, // $0.0004 / 1K tokens - "text-ada-001": 0.2, - "text-babbage-001": 0.25, - "text-curie-001": 1, - "text-davinci-002": 10, - "text-davinci-003": 10, - "text-davinci-edit-001": 10, - "code-davinci-edit-001": 10, - "whisper-1": 15, // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens - "tts-1": 7.5, // $0.015 / 1K characters - "tts-1-1106": 7.5, - "tts-1-hd": 15, // $0.030 / 1K characters - "tts-1-hd-1106": 15, - "davinci": 10, - "curie": 10, - "babbage": 10, - "ada": 10, - "text-embedding-ada-002": 0.05, - "text-embedding-3-small": 0.01, - "text-embedding-3-large": 0.065, - "text-search-ada-doc-001": 10, - "text-moderation-stable": 0.1, - "text-moderation-latest": 0.1, - "dall-e-2": 0.02 * USD, // $0.016 - $0.020 / image - "dall-e-3": 0.04 * USD, // $0.040 - $0.120 / image - // https://docs.anthropic.com/en/docs/about-claude/models - "claude-instant-1.2": 0.8 / 1000 * USD, - "claude-2.0": 8.0 / 1000 * USD, - "claude-2.1": 8.0 / 1000 * USD, - "claude-3-haiku-20240307": 0.25 / 1000 * USD, - "claude-3-5-haiku-20241022": 1.0 / 1000 * USD, - "claude-3-5-haiku-latest": 1.0 / 1000 * USD, - "claude-3-sonnet-20240229": 3.0 / 1000 * USD, - "claude-3-5-sonnet-20240620": 3.0 / 1000 * USD, - "claude-3-5-sonnet-20241022": 3.0 / 1000 * USD, - "claude-3-5-sonnet-latest": 3.0 / 1000 * USD, - "claude-3-opus-20240229": 15.0 / 1000 * USD, + // ------------------------------------- + "gpt-4.5-preview": 75 * MilliTokensUsd, + "gpt-4.5-preview-2025-02-27": 75 * MilliTokensUsd, + "gpt-4": 30 * MilliTokensUsd, + "gpt-4-0314": 30 * MilliTokensUsd, + "gpt-4-0613": 30 * MilliTokensUsd, + "gpt-4-32k": 60 * MilliTokensUsd, + "gpt-4-32k-0314": 60 * MilliTokensUsd, + "gpt-4-32k-0613": 60 * MilliTokensUsd, + "gpt-4-1106-preview": 10 * MilliTokensUsd, + "gpt-4-0125-preview": 10 * MilliTokensUsd, + "gpt-4-turbo-preview": 10 * MilliTokensUsd, + "gpt-4-turbo": 10 * MilliTokensUsd, + "gpt-4-turbo-2024-04-09": 10 * MilliTokensUsd, + "gpt-4o": 2.5 * MilliTokensUsd, + "chatgpt-4o-latest": 5 * MilliTokensUsd, + "gpt-4o-2024-05-13": 5 * MilliTokensUsd, + "gpt-4o-2024-08-06": 2.5 * MilliTokensUsd, + "gpt-4o-2024-11-20": 2.5 * MilliTokensUsd, + "gpt-4o-search-preview": 5 * MilliTokensUsd, + "gpt-4o-mini": 0.15 * MilliTokensUsd, + "gpt-4o-mini-2024-07-18": 0.15 * MilliTokensUsd, + "gpt-4o-mini-search-preview": 0.15 * MilliTokensUsd, + "gpt-4-vision-preview": 10 * MilliTokensUsd, + // Audio billing will mix text and audio tokens, the unit price is different. + // Here records the cost of text, the cost multiplier of audio + // relative to text is in AudioRatio + "gpt-4o-audio-preview": 2.5 * MilliTokensUsd, + "gpt-4o-audio-preview-2024-12-17": 2.5 * MilliTokensUsd, + "gpt-4o-audio-preview-2024-10-01": 2.5 * MilliTokensUsd, + "gpt-4o-mini-audio-preview": 0.15 * MilliTokensUsd, + "gpt-4o-mini-audio-preview-2024-12-17": 0.15 * MilliTokensUsd, + "gpt-3.5-turbo": 0.5 * MilliTokensUsd, + "gpt-3.5-turbo-0301": 1.5 * MilliTokensUsd, + "gpt-3.5-turbo-0613": 1.5 * MilliTokensUsd, + "gpt-3.5-turbo-16k": 3 * MilliTokensUsd, + "gpt-3.5-turbo-16k-0613": 3 * MilliTokensUsd, + "gpt-3.5-turbo-instruct": 1.5 * MilliTokensUsd, + "gpt-3.5-turbo-1106": 1 * MilliTokensUsd, + "gpt-3.5-turbo-0125": 0.5 * MilliTokensUsd, + "o1": 15 * MilliTokensUsd, + "o1-2024-12-17": 15 * MilliTokensUsd, + "o1-preview": 15 * MilliTokensUsd, + "o1-preview-2024-09-12": 15 * MilliTokensUsd, + "o1-mini": 1.1 * MilliTokensUsd, + "o1-mini-2024-09-12": 1.1 * MilliTokensUsd, + "o3-mini": 1.1 * MilliTokensUsd, + "o3-mini-2025-01-31": 1.1 * MilliTokensUsd, + "davinci-002": 2 * MilliTokensUsd, + "babbage-002": 0.4 * MilliTokensUsd, + "text-ada-001": 0.4 * MilliTokensUsd, + "text-babbage-001": 0.5 * MilliTokensUsd, + "text-curie-001": 2 * MilliTokensUsd, + "text-davinci-002": 20 * MilliTokensUsd, + "text-davinci-003": 20 * MilliTokensUsd, + "text-davinci-edit-001": 20 * MilliTokensUsd, + "code-davinci-edit-001": 20 * MilliTokensUsd, + "whisper-1": 30 * MilliTokensUsd, + "tts-1": 15 * MilliTokensUsd, + "tts-1-1106": 15 * MilliTokensUsd, + "tts-1-hd": 30 * MilliTokensUsd, + "tts-1-hd-1106": 30 * MilliTokensUsd, + "davinci": 20 * MilliTokensUsd, + "curie": 20 * MilliTokensUsd, + "babbage": 20 * MilliTokensUsd, + "ada": 20 * MilliTokensUsd, + "text-embedding-ada-002": 0.1 * MilliTokensUsd, + "text-embedding-3-small": 0.02 * MilliTokensUsd, + "text-embedding-3-large": 0.13 * MilliTokensUsd, + "text-search-ada-doc-001": 20 * MilliTokensUsd, + "text-moderation-stable": 0.2 * MilliTokensUsd, + "text-moderation-latest": 0.2 * MilliTokensUsd, + "dall-e-2": 0.02 * ImageUsdPerPic, + "dall-e-3": 0.04 * ImageUsdPerPic, + // https://www.anthropic.com/api#pricing + "claude-instant-1.2": 0.8 * MilliTokensUsd, + "claude-2.0": 8.0 * MilliTokensUsd, + "claude-2.1": 8.0 * MilliTokensUsd, + "claude-3-haiku-20240307": 0.25 * MilliTokensUsd, + "claude-3-5-haiku-20241022": 1.0 * MilliTokensUsd, + "claude-3-5-haiku-latest": 1.0 * MilliTokensUsd, + "claude-3-sonnet-20240229": 3.0 * MilliTokensUsd, + "claude-3-5-sonnet-20240620": 3.0 * MilliTokensUsd, + "claude-3-5-sonnet-20241022": 3.0 * MilliTokensUsd, + "claude-3-5-sonnet-latest": 3.0 * MilliTokensUsd, + "claude-3-7-sonnet-20250219": 3.0 * MilliTokensUsd, + "claude-3-7-sonnet-latest": 3.0 * MilliTokensUsd, + "claude-3-opus-20240229": 15.0 * MilliTokensUsd, // https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7 - "ERNIE-4.0-8K": 0.120 * RMB, - "ERNIE-3.5-8K": 0.012 * RMB, - "ERNIE-3.5-8K-0205": 0.024 * RMB, - "ERNIE-3.5-8K-1222": 0.012 * RMB, - "ERNIE-Bot-8K": 0.024 * RMB, - "ERNIE-3.5-4K-0205": 0.012 * RMB, - "ERNIE-Speed-8K": 0.004 * RMB, - "ERNIE-Speed-128K": 0.004 * RMB, - "ERNIE-Lite-8K-0922": 0.008 * RMB, - "ERNIE-Lite-8K-0308": 0.003 * RMB, - "ERNIE-Tiny-8K": 0.001 * RMB, - "BLOOMZ-7B": 0.004 * RMB, - "Embedding-V1": 0.002 * RMB, - "bge-large-zh": 0.002 * RMB, - "bge-large-en": 0.002 * RMB, - "tao-8k": 0.002 * RMB, + "ERNIE-4.0-8K": 0.120 * KiloRmb, + "ERNIE-3.5-8K": 0.012 * KiloRmb, + "ERNIE-3.5-8K-0205": 0.024 * KiloRmb, + "ERNIE-3.5-8K-1222": 0.012 * KiloRmb, + "ERNIE-Bot-8K": 0.024 * KiloRmb, + "ERNIE-3.5-4K-0205": 0.012 * KiloRmb, + "ERNIE-Speed-8K": 0.004 * KiloRmb, + "ERNIE-Speed-128K": 0.004 * KiloRmb, + "ERNIE-Lite-8K-0922": 0.008 * KiloRmb, + "ERNIE-Lite-8K-0308": 0.003 * KiloRmb, + "ERNIE-Tiny-8K": 0.001 * KiloRmb, + "BLOOMZ-7B": 0.004 * KiloRmb, + "Embedding-V1": 0.002 * KiloRmb, + "bge-large-zh": 0.002 * KiloRmb, + "bge-large-en": 0.002 * KiloRmb, + "tao-8k": 0.002 * KiloRmb, // https://ai.google.dev/pricing // https://cloud.google.com/vertex-ai/generative-ai/pricing - // "gemma-2-2b-it": 0, - // "gemma-2-9b-it": 0, - // "gemma-2-27b-it": 0, - "gemini-pro": 0.25 * MILLI_USD, // $0.00025 / 1k characters -> $0.001 / 1k tokens - "gemini-1.0-pro": 0.125 * MILLI_USD, - "gemini-1.5-pro": 1.25 * MILLI_USD, - "gemini-1.5-pro-001": 1.25 * MILLI_USD, - "gemini-1.5-pro-experimental": 1.25 * MILLI_USD, - "gemini-1.5-flash": 0.075 * MILLI_USD, - "gemini-1.5-flash-001": 0.075 * MILLI_USD, - "gemini-1.5-flash-8b": 0.0375 * MILLI_USD, - "gemini-2.0-flash-exp": 0.075 * MILLI_USD, - "gemini-2.0-flash": 0.15 * MILLI_USD, - "gemini-2.0-flash-001": 0.15 * MILLI_USD, - "gemini-2.0-flash-lite-preview-02-05": 0.075 * MILLI_USD, - "gemini-2.0-flash-thinking-exp-01-21": 0.075 * MILLI_USD, - "gemini-2.0-pro-exp-02-05": 1.25 * MILLI_USD, - "aqa": 1, + "gemma-2-2b-it": 0, + "gemma-2-9b-it": 0, + "gemma-2-27b-it": 0, + "gemma-3-27b-it": 0, + "gemini-pro": 0.25 * MilliTokensUsd, // $0.00025 / 1k characters -> $0.001 / 1k tokens + "gemini-1.0-pro": 0.125 * MilliTokensUsd, + "gemini-1.0-pro-vision": 0.125 * MilliTokensUsd, + "gemini-1.5-pro": 1.25 * MilliTokensUsd, + "gemini-1.5-pro-001": 1.25 * MilliTokensUsd, + "gemini-1.5-pro-002": 1.25 * MilliTokensUsd, + "gemini-1.5-pro-experimental": 1.25 * MilliTokensUsd, + "gemini-1.5-flash": 0.075 * MilliTokensUsd, + "gemini-1.5-flash-001": 0.075 * MilliTokensUsd, + "gemini-1.5-flash-002": 0.075 * MilliTokensUsd, + "gemini-1.5-flash-8b": 0.0375 * MilliTokensUsd, + "gemini-2.0-flash": 0.15 * MilliTokensUsd, + "gemini-2.0-flash-exp": 0.075 * MilliTokensUsd, + "gemini-2.0-flash-001": 0.15 * MilliTokensUsd, + "gemini-2.0-flash-lite": 0.075 * MilliTokensUsd, + "gemini-2.0-flash-lite-001": 0.075 * MilliTokensUsd, + "gemini-2.0-flash-lite-preview-02-05": 0.075 * MilliTokensUsd, + "gemini-2.0-flash-thinking-exp-01-21": 0.075 * MilliTokensUsd, + "gemini-2.0-flash-exp-image-generation": 0.075 * MilliTokensUsd, + "gemini-2.0-pro-exp-02-05": 1.25 * MilliTokensUsd, + "gemini-2.5-pro-exp-03-25": 1.25 * MilliTokensUsd, + "aqa": 1, // https://open.bigmodel.cn/pricing - "glm-zero-preview": 0.01 * RMB, - "glm-4-plus": 0.05 * RMB, - "glm-4-0520": 0.1 * RMB, - "glm-4-airx": 0.01 * RMB, - "glm-4-air": 0.0005 * RMB, - "glm-4-long": 0.001 * RMB, - "glm-4-flashx": 0.0001 * RMB, + "glm-zero-preview": 0.01 * KiloRmb, + "glm-4-plus": 0.05 * KiloRmb, + "glm-4-0520": 0.1 * KiloRmb, + "glm-4-airx": 0.01 * KiloRmb, + "glm-4-air": 0.0005 * KiloRmb, + "glm-4-long": 0.001 * KiloRmb, + "glm-4-flashx": 0.0001 * KiloRmb, "glm-4-flash": 0, - "glm-4": 0.1 * RMB, // deprecated model, available until 2025/06 - "glm-3-turbo": 0.001 * RMB, // deprecated model, available until 2025/06 - "glm-4v-plus": 0.004 * RMB, - "glm-4v": 0.05 * RMB, + "glm-4": 0.1 * KiloRmb, // deprecated model, available until 2025/06 + "glm-3-turbo": 0.001 * KiloRmb, // deprecated model, available until 2025/06 + "glm-4v-plus": 0.004 * KiloRmb, + "glm-4v": 0.05 * KiloRmb, "glm-4v-flash": 0, - "cogview-3-plus": 0.06 * RMB, - "cogview-3": 0.1 * RMB, + "cogview-3-plus": 0.06 * KiloRmb, + "cogview-3": 0.1 * KiloRmb, "cogview-3-flash": 0, - "cogviewx": 0.5 * RMB, + "cogviewx": 0.5 * KiloRmb, "cogviewx-flash": 0, - "charglm-4": 0.001 * RMB, - "emohaa": 0.015 * RMB, - "codegeex-4": 0.0001 * RMB, - "embedding-2": 0.0005 * RMB, - "embedding-3": 0.0005 * RMB, + "charglm-4": 0.001 * KiloRmb, + "emohaa": 0.015 * KiloRmb, + "codegeex-4": 0.0001 * KiloRmb, + "embedding-2": 0.0005 * KiloRmb, + "embedding-3": 0.0005 * KiloRmb, // https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing - "qwen-turbo": 0.0003 * RMB, - "qwen-turbo-latest": 0.0003 * RMB, - "qwen-plus": 0.0008 * RMB, - "qwen-plus-latest": 0.0008 * RMB, - "qwen-max": 0.0024 * RMB, - "qwen-max-latest": 0.0024 * RMB, - "qwen-max-longcontext": 0.0005 * RMB, - "qwen-vl-max": 0.003 * RMB, - "qwen-vl-max-latest": 0.003 * RMB, - "qwen-vl-plus": 0.0015 * RMB, - "qwen-vl-plus-latest": 0.0015 * RMB, - "qwen-vl-ocr": 0.005 * RMB, - "qwen-vl-ocr-latest": 0.005 * RMB, + "qwen-turbo": 0.0003 * KiloRmb, + "qwen-turbo-latest": 0.0003 * KiloRmb, + "qwen-plus": 0.0008 * KiloRmb, + "qwen-plus-latest": 0.0008 * KiloRmb, + "qwen-max": 0.0024 * KiloRmb, + "qwen-max-latest": 0.0024 * KiloRmb, + "qwen-max-longcontext": 0.0005 * KiloRmb, + "qwen-vl-max": 0.003 * KiloRmb, + "qwen-vl-max-latest": 0.003 * KiloRmb, + "qwen-vl-plus": 0.0015 * KiloRmb, + "qwen-vl-plus-latest": 0.0015 * KiloRmb, + "qwen-vl-ocr": 0.005 * KiloRmb, + "qwen-vl-ocr-latest": 0.005 * KiloRmb, "qwen-audio-turbo": 1.4286, - "qwen-math-plus": 0.004 * RMB, - "qwen-math-plus-latest": 0.004 * RMB, - "qwen-math-turbo": 0.002 * RMB, - "qwen-math-turbo-latest": 0.002 * RMB, - "qwen-coder-plus": 0.0035 * RMB, - "qwen-coder-plus-latest": 0.0035 * RMB, - "qwen-coder-turbo": 0.002 * RMB, - "qwen-coder-turbo-latest": 0.002 * RMB, - "qwen-mt-plus": 0.015 * RMB, - "qwen-mt-turbo": 0.001 * RMB, - "qwq-32b-preview": 0.002 * RMB, - "qwen2.5-72b-instruct": 0.004 * RMB, - "qwen2.5-32b-instruct": 0.03 * RMB, - "qwen2.5-14b-instruct": 0.001 * RMB, - "qwen2.5-7b-instruct": 0.0005 * RMB, - "qwen2.5-3b-instruct": 0.006 * RMB, - "qwen2.5-1.5b-instruct": 0.0003 * RMB, - "qwen2.5-0.5b-instruct": 0.0003 * RMB, - "qwen2-72b-instruct": 0.004 * RMB, - "qwen2-57b-a14b-instruct": 0.0035 * RMB, - "qwen2-7b-instruct": 0.001 * RMB, - "qwen2-1.5b-instruct": 0.001 * RMB, - "qwen2-0.5b-instruct": 0.001 * RMB, - "qwen1.5-110b-chat": 0.007 * RMB, - "qwen1.5-72b-chat": 0.005 * RMB, - "qwen1.5-32b-chat": 0.0035 * RMB, - "qwen1.5-14b-chat": 0.002 * RMB, - "qwen1.5-7b-chat": 0.001 * RMB, - "qwen1.5-1.8b-chat": 0.001 * RMB, - "qwen1.5-0.5b-chat": 0.001 * RMB, - "qwen-72b-chat": 0.02 * RMB, - "qwen-14b-chat": 0.008 * RMB, - "qwen-7b-chat": 0.006 * RMB, - "qwen-1.8b-chat": 0.006 * RMB, - "qwen-1.8b-longcontext-chat": 0.006 * RMB, - "qvq-72b-preview": 0.012 * RMB, - "qwen2.5-vl-72b-instruct": 0.016 * RMB, - "qwen2.5-vl-7b-instruct": 0.002 * RMB, - "qwen2.5-vl-3b-instruct": 0.0012 * RMB, - "qwen2-vl-7b-instruct": 0.016 * RMB, - "qwen2-vl-2b-instruct": 0.002 * RMB, - "qwen-vl-v1": 0.002 * RMB, - "qwen-vl-chat-v1": 0.002 * RMB, - "qwen2-audio-instruct": 0.002 * RMB, - "qwen-audio-chat": 0.002 * RMB, - "qwen2.5-math-72b-instruct": 0.004 * RMB, - "qwen2.5-math-7b-instruct": 0.001 * RMB, - "qwen2.5-math-1.5b-instruct": 0.001 * RMB, - "qwen2-math-72b-instruct": 0.004 * RMB, - "qwen2-math-7b-instruct": 0.001 * RMB, - "qwen2-math-1.5b-instruct": 0.001 * RMB, - "qwen2.5-coder-32b-instruct": 0.002 * RMB, - "qwen2.5-coder-14b-instruct": 0.002 * RMB, - "qwen2.5-coder-7b-instruct": 0.001 * RMB, - "qwen2.5-coder-3b-instruct": 0.001 * RMB, - "qwen2.5-coder-1.5b-instruct": 0.001 * RMB, - "qwen2.5-coder-0.5b-instruct": 0.001 * RMB, - "text-embedding-v1": 0.0007 * RMB, // ¥0.0007 / 1k tokens - "text-embedding-v3": 0.0007 * RMB, - "text-embedding-v2": 0.0007 * RMB, - "text-embedding-async-v2": 0.0007 * RMB, - "text-embedding-async-v1": 0.0007 * RMB, - "ali-stable-diffusion-xl": 8.00, - "ali-stable-diffusion-v1.5": 8.00, - "wanx-v1": 8.00, - "deepseek-r1": 0.002 * RMB, - "deepseek-v3": 0.001 * RMB, - "deepseek-r1-distill-qwen-1.5b": 0.001 * RMB, - "deepseek-r1-distill-qwen-7b": 0.0005 * RMB, - "deepseek-r1-distill-qwen-14b": 0.001 * RMB, - "deepseek-r1-distill-qwen-32b": 0.002 * RMB, - "deepseek-r1-distill-llama-8b": 0.0005 * RMB, - "deepseek-r1-distill-llama-70b": 0.004 * RMB, - "SparkDesk": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.1-128K": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.5-32K": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v4.0": 1.2858, // ¥0.018 / 1k tokens - "360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens - "embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens - "embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens - "semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens + "qwen-math-plus": 0.004 * KiloRmb, + "qwen-math-plus-latest": 0.004 * KiloRmb, + "qwen-math-turbo": 0.002 * KiloRmb, + "qwen-math-turbo-latest": 0.002 * KiloRmb, + "qwen-coder-plus": 0.0035 * KiloRmb, + "qwen-coder-plus-latest": 0.0035 * KiloRmb, + "qwen-coder-turbo": 0.002 * KiloRmb, + "qwen-coder-turbo-latest": 0.002 * KiloRmb, + "qwen-mt-plus": 0.015 * KiloRmb, + "qwen-mt-turbo": 0.001 * KiloRmb, + "qwq-32b-preview": 0.002 * KiloRmb, + "qwen2.5-72b-instruct": 0.004 * KiloRmb, + "qwen2.5-32b-instruct": 0.03 * KiloRmb, + "qwen2.5-14b-instruct": 0.001 * KiloRmb, + "qwen2.5-7b-instruct": 0.0005 * KiloRmb, + "qwen2.5-3b-instruct": 0.006 * KiloRmb, + "qwen2.5-1.5b-instruct": 0.0003 * KiloRmb, + "qwen2.5-0.5b-instruct": 0.0003 * KiloRmb, + "qwen2-72b-instruct": 0.004 * KiloRmb, + "qwen2-57b-a14b-instruct": 0.0035 * KiloRmb, + "qwen2-7b-instruct": 0.001 * KiloRmb, + "qwen2-1.5b-instruct": 0.001 * KiloRmb, + "qwen2-0.5b-instruct": 0.001 * KiloRmb, + "qwen1.5-110b-chat": 0.007 * KiloRmb, + "qwen1.5-72b-chat": 0.005 * KiloRmb, + "qwen1.5-32b-chat": 0.0035 * KiloRmb, + "qwen1.5-14b-chat": 0.002 * KiloRmb, + "qwen1.5-7b-chat": 0.001 * KiloRmb, + "qwen1.5-1.8b-chat": 0.001 * KiloRmb, + "qwen1.5-0.5b-chat": 0.001 * KiloRmb, + "qwen-72b-chat": 0.02 * KiloRmb, + "qwen-14b-chat": 0.008 * KiloRmb, + "qwen-7b-chat": 0.006 * KiloRmb, + "qwen-1.8b-chat": 0.006 * KiloRmb, + "qwen-1.8b-longcontext-chat": 0.006 * KiloRmb, + "qvq-72b-preview": 0.012 * KiloRmb, + "qwen2.5-vl-72b-instruct": 0.016 * KiloRmb, + "qwen2.5-vl-7b-instruct": 0.002 * KiloRmb, + "qwen2.5-vl-3b-instruct": 0.0012 * KiloRmb, + "qwen2-vl-7b-instruct": 0.016 * KiloRmb, + "qwen2-vl-2b-instruct": 0.002 * KiloRmb, + "qwen-vl-v1": 0.002 * KiloRmb, + "qwen-vl-chat-v1": 0.002 * KiloRmb, + "qwen2-audio-instruct": 0.002 * KiloRmb, + "qwen-audio-chat": 0.002 * KiloRmb, + "qwen2.5-math-72b-instruct": 0.004 * KiloRmb, + "qwen2.5-math-7b-instruct": 0.001 * KiloRmb, + "qwen2.5-math-1.5b-instruct": 0.001 * KiloRmb, + "qwen2-math-72b-instruct": 0.004 * KiloRmb, + "qwen2-math-7b-instruct": 0.001 * KiloRmb, + "qwen2-math-1.5b-instruct": 0.001 * KiloRmb, + "qwen2.5-coder-32b-instruct": 0.002 * KiloRmb, + "qwen2.5-coder-14b-instruct": 0.002 * KiloRmb, + "qwen2.5-coder-7b-instruct": 0.001 * KiloRmb, + "qwen2.5-coder-3b-instruct": 0.001 * KiloRmb, + "qwen2.5-coder-1.5b-instruct": 0.001 * KiloRmb, + "qwen2.5-coder-0.5b-instruct": 0.001 * KiloRmb, + "text-embedding-v1": 0.0007 * KiloRmb, // ¥0.0007 / 1k tokens + "text-embedding-v3": 0.0007 * KiloRmb, + "text-embedding-v2": 0.0007 * KiloRmb, + "text-embedding-async-v2": 0.0007 * KiloRmb, + "text-embedding-async-v1": 0.0007 * KiloRmb, + "ali-stable-diffusion-xl": 0.016 * ImageUsdPerPic, + "ali-stable-diffusion-v1.5": 0.016 * ImageUsdPerPic, + "wanx-v1": 0.016 * ImageUsdPerPic, + "deepseek-r1": 0.002 * KiloRmb, + "deepseek-v3": 0.001 * KiloRmb, + "deepseek-r1-distill-qwen-1.5b": 0.001 * KiloRmb, + "deepseek-r1-distill-qwen-7b": 0.0005 * KiloRmb, + "deepseek-r1-distill-qwen-14b": 0.001 * KiloRmb, + // "deepseek-r1-distill-qwen-32b": 0.002 * KiloRmb, + "deepseek-r1-distill-llama-8b": 0.0005 * KiloRmb, + // "deepseek-r1-distill-llama-70b": 0.004 * KiloRmb, + "SparkDesk": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.1-128K": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.5-32K": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v4.0": 1.2858, // ¥0.018 / 1k tokens + "360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens + "embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens + "embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens + "semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0 - "hunyuan-turbo": 0.015 * RMB, - "hunyuan-large": 0.004 * RMB, - "hunyuan-large-longcontext": 0.006 * RMB, - "hunyuan-standard": 0.0008 * RMB, - "hunyuan-standard-256K": 0.0005 * RMB, - "hunyuan-translation-lite": 0.005 * RMB, - "hunyuan-role": 0.004 * RMB, - "hunyuan-functioncall": 0.004 * RMB, - "hunyuan-code": 0.004 * RMB, - "hunyuan-turbo-vision": 0.08 * RMB, - "hunyuan-vision": 0.018 * RMB, - "hunyuan-embedding": 0.0007 * RMB, + "hunyuan-turbo": 0.015 * KiloRmb, + "hunyuan-large": 0.004 * KiloRmb, + "hunyuan-large-longcontext": 0.006 * KiloRmb, + "hunyuan-standard": 0.0008 * KiloRmb, + "hunyuan-standard-256K": 0.0005 * KiloRmb, + "hunyuan-translation-lite": 0.005 * KiloRmb, + "hunyuan-role": 0.004 * KiloRmb, + "hunyuan-functioncall": 0.004 * KiloRmb, + "hunyuan-code": 0.004 * KiloRmb, + "hunyuan-turbo-vision": 0.08 * KiloRmb, + "hunyuan-vision": 0.018 * KiloRmb, + "hunyuan-embedding": 0.0007 * KiloRmb, // https://platform.moonshot.cn/pricing - "moonshot-v1-8k": 0.012 * RMB, - "moonshot-v1-32k": 0.024 * RMB, - "moonshot-v1-128k": 0.06 * RMB, + "moonshot-v1-8k": 0.012 * KiloRmb, + "moonshot-v1-32k": 0.024 * KiloRmb, + "moonshot-v1-128k": 0.06 * KiloRmb, // https://platform.baichuan-ai.com/price - "Baichuan2-Turbo": 0.008 * RMB, - "Baichuan2-Turbo-192k": 0.016 * RMB, - "Baichuan2-53B": 0.02 * RMB, + "Baichuan2-Turbo": 0.008 * KiloRmb, + "Baichuan2-Turbo-192k": 0.016 * KiloRmb, + "Baichuan2-53B": 0.02 * KiloRmb, // https://api.minimax.chat/document/price - "abab6.5-chat": 0.03 * RMB, - "abab6.5s-chat": 0.01 * RMB, - "abab6-chat": 0.1 * RMB, - "abab5.5-chat": 0.015 * RMB, - "abab5.5s-chat": 0.005 * RMB, + "abab6.5-chat": 0.03 * KiloRmb, + "abab6.5s-chat": 0.01 * KiloRmb, + "abab6-chat": 0.1 * KiloRmb, + "abab5.5-chat": 0.015 * KiloRmb, + "abab5.5s-chat": 0.005 * KiloRmb, // https://docs.mistral.ai/platform/pricing/ - "open-mistral-7b": 0.25 / 1000 * USD, - "open-mixtral-8x7b": 0.7 / 1000 * USD, - "mistral-small-latest": 2.0 / 1000 * USD, - "mistral-medium-latest": 2.7 / 1000 * USD, - "mistral-large-latest": 8.0 / 1000 * USD, - "mistral-embed": 0.1 / 1000 * USD, - // https://wow.groq.com/#:~:text=inquiries%C2%A0here.-,Model,-Current%20Speed - "gemma-7b-it": 0.07 / 1000000 * USD, - "gemma2-9b-it": 0.20 / 1000000 * USD, - "llama-3.1-70b-versatile": 0.59 / 1000000 * USD, - "llama-3.1-8b-instant": 0.05 / 1000000 * USD, - "llama-3.2-11b-text-preview": 0.05 / 1000000 * USD, - "llama-3.2-11b-vision-preview": 0.05 / 1000000 * USD, - "llama-3.2-1b-preview": 0.05 / 1000000 * USD, - "llama-3.2-3b-preview": 0.05 / 1000000 * USD, - "llama-3.2-90b-text-preview": 0.59 / 1000000 * USD, - "llama-guard-3-8b": 0.05 / 1000000 * USD, - "llama3-70b-8192": 0.59 / 1000000 * USD, - "llama3-8b-8192": 0.05 / 1000000 * USD, - "llama3-groq-70b-8192-tool-use-preview": 0.89 / 1000000 * USD, - "llama3-groq-8b-8192-tool-use-preview": 0.19 / 1000000 * USD, - "mixtral-8x7b-32768": 0.24 / 1000000 * USD, - + "open-mistral-7b": 0.25 * MilliTokensUsd, + "open-mixtral-8x7b": 0.7 * MilliTokensUsd, + "mistral-small-latest": 2.0 * MilliTokensUsd, + "mistral-medium-latest": 2.7 * MilliTokensUsd, + "mistral-large-latest": 8.0 * MilliTokensUsd, + "mistral-embed": 0.1 * MilliTokensUsd, + // ------------------------------------- + // https://groq.com/pricing/ + // ------------------------------------- + "gemma2-9b-it": 0.20 * MilliTokensUsd, + "llama-3.1-8b-instant": 0.05 * MilliTokensUsd, + "llama-3.2-11b-text-preview": 0.18 * MilliTokensUsd, + "llama-3.2-11b-vision-preview": 0.18 * MilliTokensUsd, + "llama-3.2-1b-preview": 0.04 * MilliTokensUsd, + "llama-3.2-3b-preview": 0.06 * MilliTokensUsd, + "llama-3.2-90b-text-preview": 0.90 * MilliTokensUsd, + "llama-3.2-90b-vision-preview": 0.90 * MilliTokensUsd, + "llama-3.3-70b-versatile": 0.59 * MilliTokensUsd, + "llama-guard-3-8b": 0.20 * MilliTokensUsd, + "llama3-70b-8192": 0.59 * MilliTokensUsd, + "llama3-8b-8192": 0.05 * MilliTokensUsd, + "llama3-groq-70b-8192-tool-use-preview": 0.59 * MilliTokensUsd, + "llama3-groq-8b-8192-tool-use-preview": 0.05 * MilliTokensUsd, + "llama-3.3-70b-specdec": 0.59 * MilliTokensUsd, + "mistral-saba-24b": 0.79 * MilliTokensUsd, + "qwen-qwq-32b": 0.29 * MilliTokensUsd, + "qwen-2.5-coder-32b": 0.79 * MilliTokensUsd, + "qwen-2.5-32b": 0.79 * MilliTokensUsd, + "mixtral-8x7b-32768": 0.24 * MilliTokensUsd, + "whisper-large-v3": 0.111 * MilliTokensUsd, + "whisper-large-v3-turbo": 0.04 * MilliTokensUsd, + "distil-whisper-large-v3-en": 0.02 * MilliTokensUsd, + "deepseek-r1-distill-qwen-32b": 0.69 * MilliTokensUsd, + "deepseek-r1-distill-llama-70b-specdec": 0.75 * MilliTokensUsd, + "deepseek-r1-distill-llama-70b": 0.75 * MilliTokensUsd, // https://platform.lingyiwanwu.com/docs#-计费单元 - "yi-34b-chat-0205": 2.5 / 1000 * RMB, - "yi-34b-chat-200k": 12.0 / 1000 * RMB, - "yi-vl-plus": 6.0 / 1000 * RMB, + "yi-34b-chat-0205": 2.5 * MilliRmb, + "yi-34b-chat-200k": 12.0 * MilliRmb, + "yi-vl-plus": 6.0 * MilliRmb, // https://platform.stepfun.com/docs/pricing/details - "step-1-8k": 0.005 / 1000 * RMB, - "step-1-32k": 0.015 / 1000 * RMB, - "step-1-128k": 0.040 / 1000 * RMB, - "step-1-256k": 0.095 / 1000 * RMB, - "step-1-flash": 0.001 / 1000 * RMB, - "step-2-16k": 0.038 / 1000 * RMB, - "step-1v-8k": 0.005 / 1000 * RMB, - "step-1v-32k": 0.015 / 1000 * RMB, + "step-1-8k": 0.005 * MilliRmb, + "step-1-32k": 0.015 * MilliRmb, + "step-1-128k": 0.040 * MilliRmb, + "step-1-256k": 0.095 * MilliRmb, + "step-1-flash": 0.001 * MilliRmb, + "step-2-16k": 0.038 * MilliRmb, + "step-1v-8k": 0.005 * MilliRmb, + "step-1v-32k": 0.015 * MilliRmb, // aws llama3 https://aws.amazon.com/cn/bedrock/pricing/ "llama3-8b-8192(33)": 0.0003 / 0.002, // $0.0003 / 1K tokens "llama3-70b-8192(33)": 0.00265 / 0.002, // $0.00265 / 1K tokens @@ -332,100 +378,116 @@ var ModelRatio = map[string]float64{ "command-nightly": 0.5, "command-light": 0.5, "command-light-nightly": 0.5, - "command-r": 0.5 / 1000 * USD, - "command-r-plus": 3.0 / 1000 * USD, + "command-r": 0.5 * MilliTokensUsd, + "command-r-plus": 3.0 * MilliTokensUsd, // https://platform.deepseek.com/api-docs/pricing/ - "deepseek-chat": 0.14 * MILLI_USD, - "deepseek-reasoner": 0.55 * MILLI_USD, + "deepseek-chat": 0.27 * MilliTokensUsd, + "deepseek-reasoner": 0.55 * MilliTokensUsd, // https://www.deepl.com/pro?cta=header-prices - "deepl-zh": 25.0 / 1000 * USD, - "deepl-en": 25.0 / 1000 * USD, - "deepl-ja": 25.0 / 1000 * USD, + "deepl-zh": 25.0 * MilliTokensUsd, + "deepl-en": 25.0 * MilliTokensUsd, + "deepl-ja": 25.0 * MilliTokensUsd, // https://console.x.ai/ - "grok-beta": 5.0 / 1000 * USD, + "grok-beta": 5.0 * MilliTokensUsd, + // vertex imagen3 + // https://cloud.google.com/vertex-ai/generative-ai/pricing#imagen-models + "imagen-3.0-generate-001": 0.04 * ImageUsdPerPic, + "imagen-3.0-generate-002": 0.04 * ImageUsdPerPic, + "imagen-3.0-fast-generate-001": 0.02 * ImageUsdPerPic, + "imagen-3.0-capability-001": 0.04 * ImageUsdPerPic, + // ------------------------------------- // replicate charges based on the number of generated images // https://replicate.com/pricing - "black-forest-labs/flux-1.1-pro": 0.04 * USD, - "black-forest-labs/flux-1.1-pro-ultra": 0.06 * USD, - "black-forest-labs/flux-canny-dev": 0.025 * USD, - "black-forest-labs/flux-canny-pro": 0.05 * USD, - "black-forest-labs/flux-depth-dev": 0.025 * USD, - "black-forest-labs/flux-depth-pro": 0.05 * USD, - "black-forest-labs/flux-dev": 0.025 * USD, - "black-forest-labs/flux-dev-lora": 0.032 * USD, - "black-forest-labs/flux-fill-dev": 0.04 * USD, - "black-forest-labs/flux-fill-pro": 0.05 * USD, - "black-forest-labs/flux-pro": 0.055 * USD, - "black-forest-labs/flux-redux-dev": 0.025 * USD, - "black-forest-labs/flux-redux-schnell": 0.003 * USD, - "black-forest-labs/flux-schnell": 0.003 * USD, - "black-forest-labs/flux-schnell-lora": 0.02 * USD, - "ideogram-ai/ideogram-v2": 0.08 * USD, - "ideogram-ai/ideogram-v2-turbo": 0.05 * USD, - "recraft-ai/recraft-v3": 0.04 * USD, - "recraft-ai/recraft-v3-svg": 0.08 * USD, - "stability-ai/stable-diffusion-3": 0.035 * USD, - "stability-ai/stable-diffusion-3.5-large": 0.065 * USD, - "stability-ai/stable-diffusion-3.5-large-turbo": 0.04 * USD, - "stability-ai/stable-diffusion-3.5-medium": 0.035 * USD, + // ------------------------------------- + "black-forest-labs/flux-1.1-pro": 0.04 * ImageUsdPerPic, + "black-forest-labs/flux-1.1-pro-ultra": 0.06 * ImageUsdPerPic, + "black-forest-labs/flux-canny-dev": 0.025 * ImageUsdPerPic, + "black-forest-labs/flux-canny-pro": 0.05 * ImageUsdPerPic, + "black-forest-labs/flux-depth-dev": 0.025 * ImageUsdPerPic, + "black-forest-labs/flux-depth-pro": 0.05 * ImageUsdPerPic, + "black-forest-labs/flux-dev": 0.025 * ImageUsdPerPic, + "black-forest-labs/flux-dev-lora": 0.032 * ImageUsdPerPic, + "black-forest-labs/flux-fill-dev": 0.04 * ImageUsdPerPic, + "black-forest-labs/flux-fill-pro": 0.05 * ImageUsdPerPic, + "black-forest-labs/flux-pro": 0.055 * ImageUsdPerPic, + "black-forest-labs/flux-redux-dev": 0.025 * ImageUsdPerPic, + "black-forest-labs/flux-redux-schnell": 0.003 * ImageUsdPerPic, + "black-forest-labs/flux-schnell": 0.003 * ImageUsdPerPic, + "black-forest-labs/flux-schnell-lora": 0.02 * ImageUsdPerPic, + "ideogram-ai/ideogram-v2": 0.08 * ImageUsdPerPic, + "ideogram-ai/ideogram-v2-turbo": 0.05 * ImageUsdPerPic, + "recraft-ai/recraft-v3": 0.04 * ImageUsdPerPic, + "recraft-ai/recraft-v3-svg": 0.08 * ImageUsdPerPic, + "stability-ai/stable-diffusion-3": 0.035 * ImageUsdPerPic, + "stability-ai/stable-diffusion-3.5-large": 0.065 * ImageUsdPerPic, + "stability-ai/stable-diffusion-3.5-large-turbo": 0.04 * ImageUsdPerPic, + "stability-ai/stable-diffusion-3.5-medium": 0.035 * ImageUsdPerPic, // replicate chat models - "ibm-granite/granite-20b-code-instruct-8k": 0.100 * USD, - "ibm-granite/granite-3.0-2b-instruct": 0.030 * USD, - "ibm-granite/granite-3.0-8b-instruct": 0.050 * USD, - "ibm-granite/granite-8b-code-instruct-128k": 0.050 * USD, - "meta/llama-2-13b": 0.100 * USD, - "meta/llama-2-13b-chat": 0.100 * USD, - "meta/llama-2-70b": 0.650 * USD, - "meta/llama-2-70b-chat": 0.650 * USD, - "meta/llama-2-7b": 0.050 * USD, - "meta/llama-2-7b-chat": 0.050 * USD, - "meta/meta-llama-3.1-405b-instruct": 9.500 * USD, - "meta/meta-llama-3-70b": 0.650 * USD, - "meta/meta-llama-3-70b-instruct": 0.650 * USD, - "meta/meta-llama-3-8b": 0.050 * USD, - "meta/meta-llama-3-8b-instruct": 0.050 * USD, - "mistralai/mistral-7b-instruct-v0.2": 0.050 * USD, - "mistralai/mistral-7b-v0.1": 0.050 * USD, - "mistralai/mixtral-8x7b-instruct-v0.1": 0.300 * USD, + "anthropic/claude-3.5-haiku": 1.0 * MilliTokensUsd, + "anthropic/claude-3.5-sonnet": 3.75 * MilliTokensUsd, + "anthropic/claude-3.7-sonnet": 3.0 * MilliTokensUsd, + "deepseek-ai/deepseek-r1": 10.0 * MilliTokensUsd, + "ibm-granite/granite-20b-code-instruct-8k": 0.100 * MilliTokensUsd, + "ibm-granite/granite-3.0-2b-instruct": 0.030 * MilliTokensUsd, + "ibm-granite/granite-3.0-8b-instruct": 0.050 * MilliTokensUsd, + "ibm-granite/granite-3.1-2b-instruct": 0.030 * MilliTokensUsd, + "ibm-granite/granite-3.1-8b-instruct": 0.030 * MilliTokensUsd, + "ibm-granite/granite-3.2-8b-instruct": 0.030 * MilliTokensUsd, + "ibm-granite/granite-8b-code-instruct-128k": 0.050 * MilliTokensUsd, + "meta/llama-2-13b": 0.100 * MilliTokensUsd, + "meta/llama-2-13b-chat": 0.100 * MilliTokensUsd, + "meta/llama-2-70b": 0.650 * MilliTokensUsd, + "meta/llama-2-70b-chat": 0.650 * MilliTokensUsd, + "meta/llama-2-7b": 0.050 * MilliTokensUsd, + "meta/llama-2-7b-chat": 0.050 * MilliTokensUsd, + "meta/meta-llama-3.1-405b-instruct": 9.500 * MilliTokensUsd, + "meta/meta-llama-3-70b": 0.650 * MilliTokensUsd, + "meta/meta-llama-3-70b-instruct": 0.650 * MilliTokensUsd, + "meta/meta-llama-3-8b": 0.050 * MilliTokensUsd, + "meta/meta-llama-3-8b-instruct": 0.050 * MilliTokensUsd, + "mistralai/mistral-7b-instruct-v0.2": 0.050 * MilliTokensUsd, + "mistralai/mistral-7b-v0.1": 0.050 * MilliTokensUsd, + // ------------------------------------- //https://openrouter.ai/models - "01-ai/yi-large": 1.5, - "aetherwiing/mn-starcannon-12b": 0.6, - "ai21/jamba-1-5-large": 4.0, - "ai21/jamba-1-5-mini": 0.2, - "ai21/jamba-instruct": 0.35, - "aion-labs/aion-1.0": 6.0, - "aion-labs/aion-1.0-mini": 1.2, - "aion-labs/aion-rp-llama-3.1-8b": 0.1, - "allenai/llama-3.1-tulu-3-405b": 5.0, - "alpindale/goliath-120b": 4.6875, - "alpindale/magnum-72b": 1.125, - "amazon/nova-lite-v1": 0.12, - "amazon/nova-micro-v1": 0.07, - "amazon/nova-pro-v1": 1.6, - "anthracite-org/magnum-v2-72b": 1.5, - "anthracite-org/magnum-v4-72b": 1.125, - "anthropic/claude-2": 12.0, - "anthropic/claude-2.0": 12.0, - "anthropic/claude-2.0:beta": 12.0, - "anthropic/claude-2.1": 12.0, - "anthropic/claude-2.1:beta": 12.0, - "anthropic/claude-2:beta": 12.0, - "anthropic/claude-3-haiku": 0.625, - "anthropic/claude-3-haiku:beta": 0.625, - "anthropic/claude-3-opus": 37.5, - "anthropic/claude-3-opus:beta": 37.5, - "anthropic/claude-3-sonnet": 7.5, - "anthropic/claude-3-sonnet:beta": 7.5, - "anthropic/claude-3.5-haiku": 2.0, - "anthropic/claude-3.5-haiku-20241022": 2.0, - "anthropic/claude-3.5-haiku-20241022:beta": 2.0, - "anthropic/claude-3.5-haiku:beta": 2.0, - "anthropic/claude-3.5-sonnet": 7.5, - "anthropic/claude-3.5-sonnet-20240620": 7.5, - "anthropic/claude-3.5-sonnet-20240620:beta": 7.5, - "anthropic/claude-3.5-sonnet:beta": 7.5, - "cognitivecomputations/dolphin-mixtral-8x22b": 0.45, - "cognitivecomputations/dolphin-mixtral-8x7b": 0.25, + // ------------------------------------- + "01-ai/yi-large": 1.5, + "aetherwiing/mn-starcannon-12b": 0.6, + "ai21/jamba-1-5-large": 4.0, + "ai21/jamba-1-5-mini": 0.2, + "ai21/jamba-instruct": 0.35, + "aion-labs/aion-1.0": 6.0, + "aion-labs/aion-1.0-mini": 1.2, + "aion-labs/aion-rp-llama-3.1-8b": 0.1, + "allenai/llama-3.1-tulu-3-405b": 5.0, + "alpindale/goliath-120b": 4.6875, + "alpindale/magnum-72b": 1.125, + "amazon/nova-lite-v1": 0.12, + "amazon/nova-micro-v1": 0.07, + "amazon/nova-pro-v1": 1.6, + "anthracite-org/magnum-v2-72b": 1.5, + "anthracite-org/magnum-v4-72b": 1.125, + "anthropic/claude-2": 12.0, + "anthropic/claude-2.0": 12.0, + "anthropic/claude-2.0:beta": 12.0, + "anthropic/claude-2.1": 12.0, + "anthropic/claude-2.1:beta": 12.0, + "anthropic/claude-2:beta": 12.0, + "anthropic/claude-3-haiku": 0.625, + "anthropic/claude-3-haiku:beta": 0.625, + "anthropic/claude-3-opus": 37.5, + "anthropic/claude-3-opus:beta": 37.5, + "anthropic/claude-3-sonnet": 7.5, + "anthropic/claude-3-sonnet:beta": 7.5, + // "anthropic/claude-3.5-haiku": 2.0, + "anthropic/claude-3.5-haiku-20241022": 2.0, + "anthropic/claude-3.5-haiku-20241022:beta": 2.0, + "anthropic/claude-3.5-haiku:beta": 2.0, + // "anthropic/claude-3.5-sonnet": 7.5, + "anthropic/claude-3.5-sonnet-20240620": 7.5, + "anthropic/claude-3.5-sonnet-20240620:beta": 7.5, + "anthropic/claude-3.5-sonnet:beta": 7.5, + "cognitivecomputations/dolphin-mixtral-8x22b": 0.45, + "cognitivecomputations/dolphin-mixtral-8x7b": 0.25, "cohere/command": 0.95, "cohere/command-r": 0.7125, "cohere/command-r-03-2024": 0.7125, @@ -435,10 +497,10 @@ var ModelRatio = map[string]float64{ "cohere/command-r-plus-08-2024": 4.75, "cohere/command-r7b-12-2024": 0.075, "databricks/dbrx-instruct": 0.6, - "deepseek/deepseek-chat": 0.445, + "deepseek/deepseek-chat": 1.25, "deepseek/deepseek-chat-v2.5": 1.0, "deepseek/deepseek-chat:free": 0.0, - "deepseek/deepseek-r1": 1.2, + "deepseek/deepseek-r1": 7, "deepseek/deepseek-r1-distill-llama-70b": 0.345, "deepseek/deepseek-r1-distill-llama-70b:free": 0.0, "deepseek/deepseek-r1-distill-llama-8b": 0.02, @@ -566,6 +628,7 @@ var ModelRatio = map[string]float64{ "openai/gpt-4o-mini": 0.3, "openai/gpt-4o-mini-2024-07-18": 0.3, "openai/gpt-4o:extended": 9.0, + "openai/gpt-4.5-preview": 75, "openai/o1": 30.0, "openai/o1-mini": 2.2, "openai/o1-mini-2024-09-12": 2.2, @@ -575,61 +638,199 @@ var ModelRatio = map[string]float64{ "openai/o3-mini-high": 2.2, "openchat/openchat-7b": 0.0275, "openchat/openchat-7b:free": 0.0, - "openrouter/auto": -500000.0, - "perplexity/llama-3.1-sonar-huge-128k-online": 2.5, - "perplexity/llama-3.1-sonar-large-128k-chat": 0.5, - "perplexity/llama-3.1-sonar-large-128k-online": 0.5, - "perplexity/llama-3.1-sonar-small-128k-chat": 0.1, - "perplexity/llama-3.1-sonar-small-128k-online": 0.1, - "perplexity/sonar": 0.5, - "perplexity/sonar-reasoning": 2.5, - "pygmalionai/mythalion-13b": 0.6, - "qwen/qvq-72b-preview": 0.25, - "qwen/qwen-2-72b-instruct": 0.45, - "qwen/qwen-2-7b-instruct": 0.027, - "qwen/qwen-2-7b-instruct:free": 0.0, - "qwen/qwen-2-vl-72b-instruct": 0.2, - "qwen/qwen-2-vl-7b-instruct": 0.05, - "qwen/qwen-2.5-72b-instruct": 0.2, - "qwen/qwen-2.5-7b-instruct": 0.025, - "qwen/qwen-2.5-coder-32b-instruct": 0.08, - "qwen/qwen-max": 3.2, - "qwen/qwen-plus": 0.6, - "qwen/qwen-turbo": 0.1, - "qwen/qwen-vl-plus:free": 0.0, - "qwen/qwen2.5-vl-72b-instruct:free": 0.0, - "qwen/qwq-32b-preview": 0.09, - "raifle/sorcererlm-8x22b": 2.25, - "sao10k/fimbulvetr-11b-v2": 0.6, - "sao10k/l3-euryale-70b": 0.4, - "sao10k/l3-lunaris-8b": 0.03, - "sao10k/l3.1-70b-hanami-x1": 1.5, - "sao10k/l3.1-euryale-70b": 0.4, - "sao10k/l3.3-euryale-70b": 0.4, - "sophosympatheia/midnight-rose-70b": 0.4, - "sophosympatheia/rogue-rose-103b-v0.2:free": 0.0, - "teknium/openhermes-2.5-mistral-7b": 0.085, - "thedrummer/rocinante-12b": 0.25, - "thedrummer/unslopnemo-12b": 0.25, - "undi95/remm-slerp-l2-13b": 0.6, - "undi95/toppy-m-7b": 0.035, - "undi95/toppy-m-7b:free": 0.0, - "x-ai/grok-2-1212": 5.0, - "x-ai/grok-2-vision-1212": 5.0, - "x-ai/grok-beta": 7.5, - "x-ai/grok-vision-beta": 7.5, - "xwin-lm/xwin-lm-70b": 1.875, + // "openrouter/auto": -500000.0, + "perplexity/llama-3.1-sonar-huge-128k-online": 2.5, + "perplexity/llama-3.1-sonar-large-128k-chat": 0.5, + "perplexity/llama-3.1-sonar-large-128k-online": 0.5, + "perplexity/llama-3.1-sonar-small-128k-chat": 0.1, + "perplexity/llama-3.1-sonar-small-128k-online": 0.1, + "perplexity/sonar": 0.5, + "perplexity/sonar-reasoning": 2.5, + "pygmalionai/mythalion-13b": 0.6, + "qwen/qvq-72b-preview": 0.25, + "qwen/qwen-2-72b-instruct": 0.45, + "qwen/qwen-2-7b-instruct": 0.027, + "qwen/qwen-2-7b-instruct:free": 0.0, + "qwen/qwen-2-vl-72b-instruct": 0.2, + "qwen/qwen-2-vl-7b-instruct": 0.05, + "qwen/qwen-2.5-72b-instruct": 0.2, + "qwen/qwen-2.5-7b-instruct": 0.025, + "qwen/qwen-2.5-coder-32b-instruct": 0.08, + "qwen/qwen-max": 3.2, + "qwen/qwen-plus": 0.6, + "qwen/qwen-turbo": 0.1, + "qwen/qwen-vl-plus:free": 0.0, + "qwen/qwen2.5-vl-72b-instruct:free": 0.0, + "qwen/qwq-32b-preview": 0.09, + "raifle/sorcererlm-8x22b": 2.25, + "sao10k/fimbulvetr-11b-v2": 0.6, + "sao10k/l3-euryale-70b": 0.4, + "sao10k/l3-lunaris-8b": 0.03, + "sao10k/l3.1-70b-hanami-x1": 1.5, + "sao10k/l3.1-euryale-70b": 0.4, + "sao10k/l3.3-euryale-70b": 0.4, + "sophosympatheia/midnight-rose-70b": 0.4, + "sophosympatheia/rogue-rose-103b-v0.2:free": 0.0, + "teknium/openhermes-2.5-mistral-7b": 0.085, + "thedrummer/rocinante-12b": 0.25, + "thedrummer/unslopnemo-12b": 0.25, + "undi95/remm-slerp-l2-13b": 0.6, + "undi95/toppy-m-7b": 0.035, + "undi95/toppy-m-7b:free": 0.0, + "x-ai/grok-2-1212": 5.0, + "x-ai/grok-2-vision-1212": 5.0, + "x-ai/grok-beta": 7.5, + "x-ai/grok-vision-beta": 7.5, + "xwin-lm/xwin-lm-70b": 1.875, } +// CompletionRatio is the price ratio between completion tokens and prompt tokens var CompletionRatio = map[string]float64{ + // ------------------------------------- // aws llama3 + // ------------------------------------- "llama3-8b-8192(33)": 0.0006 / 0.0003, "llama3-70b-8192(33)": 0.0035 / 0.00265, + // ------------------------------------- // whisper - "whisper-1": 0, // only count input tokens + // ------------------------------------- + "whisper-1": 0, // only count input tokens + "whisper-large-v3": 0, // only count input tokens + "whisper-large-v3-turbo": 0, // only count input tokens + "distil-whisper-large-v3-en": 0, // only count input tokens + // ------------------------------------- // deepseek - "deepseek-chat": 0.28 / 0.14, + // ------------------------------------- + "deepseek-chat": 1.1 / 0.27, "deepseek-reasoner": 2.19 / 0.55, + // ------------------------------------- + // openrouter + // ------------------------------------- + "deepseek/deepseek-chat": 1, + "deepseek/deepseek-r1": 1, + // ------------------------------------- + // groq + // ------------------------------------- + "llama-3.3-70b-versatile": 0.79 / 0.59, + "llama-3.1-8b-instant": 0.08 / 0.05, + "llama3-70b-8192": 0.79 / 0.59, + "llama3-8b-8192": 0.08 / 0.05, + "gemma2-9b-it": 1.0, + "llama-3.2-11b-text-preview": 1.0, + "llama-3.2-11b-vision-preview": 1.0, + "llama-3.2-1b-preview": 1.0, + "llama-3.2-3b-preview": 1.0, + "llama-3.2-90b-text-preview": 1.0, + "llama-3.2-90b-vision-preview": 1.0, + "llama-guard-3-8b": 1.0, + "llama3-groq-70b-8192-tool-use-preview": 0.79 / 0.59, + "llama3-groq-8b-8192-tool-use-preview": 0.08 / 0.05, + "mixtral-8x7b-32768": 1.0, + "deepseek-r1-distill-qwen-32b": 1.0, + "deepseek-r1-distill-llama-70b-specdec": 0.99 / 0.75, + "deepseek-r1-distill-llama-70b": 0.99 / 0.75, + "llama-3.3-70b-specdec": 0.99 / 0.59, + "mistral-saba-24b": 1.0, + "qwen-qwq-32b": 0.39 / 0.29, + "qwen-2.5-coder-32b": 1.0, + "qwen-2.5-32b": 1.0, + // ------------------------------------- + // Replicate + // ------------------------------------- + "anthropic/claude-3.5-haiku": 5.0 / 1.0, + "anthropic/claude-3.5-sonnet": 18.75 / 3.75, + "anthropic/claude-3.7-sonnet": 15.0 / 3.0, + "deepseek-ai/deepseek-r1": 10.0 / 10.0, + "ibm-granite/granite-20b-code-instruct-8k": 0.5 / 0.1, + "ibm-granite/granite-3.0-2b-instruct": 0.25 / 0.03, + "ibm-granite/granite-3.0-8b-instruct": 0.25 / 0.05, + "ibm-granite/granite-3.1-2b-instruct": 0.25 / 0.03, + "ibm-granite/granite-3.1-8b-instruct": 0.25 / 0.03, + "ibm-granite/granite-3.2-8b-instruct": 0.25 / 0.03, + "ibm-granite/granite-8b-code-instruct-128k": 0.25 / 0.05, + "meta/llama-2-13b": 0.5 / 0.1, + "meta/llama-2-13b-chat": 0.5 / 0.1, + "meta/llama-2-70b": 2.75 / 0.65, + "meta/llama-2-70b-chat": 2.75 / 0.65, + "meta/llama-2-7b": 0.25 / 0.05, + "meta/llama-2-7b-chat": 0.25 / 0.05, + "meta/meta-llama-3.1-405b-instruct": 9.5 / 9.5, + "meta/meta-llama-3-70b": 2.75 / 0.65, + "meta/meta-llama-3-70b-instruct": 2.75 / 0.65, + "meta/meta-llama-3-8b": 0.25 / 0.05, + "meta/meta-llama-3-8b-instruct": 0.25 / 0.05, + "mistralai/mistral-7b-instruct-v0.2": 0.25 / 0.05, + "mistralai/mistral-7b-v0.1": 0.25 / 0.05, +} + +// AudioRatio represents the price ratio between audio tokens and text tokens +var AudioRatio = map[string]float64{ + "gpt-4o-audio-preview": 16, + "gpt-4o-audio-preview-2024-12-17": 16, + "gpt-4o-audio-preview-2024-10-01": 40, + "gpt-4o-mini-audio-preview": 10 / 0.15, + "gpt-4o-mini-audio-preview-2024-12-17": 10 / 0.15, +} + +// GetAudioPromptRatio returns the audio prompt ratio for the given model. +func GetAudioPromptRatio(actualModelName string) float64 { + var v float64 + if ratio, ok := AudioRatio[actualModelName]; ok { + v = ratio + } else { + v = 16 + } + + return v +} + +// AudioCompletionRatio is the completion ratio for audio models. +var AudioCompletionRatio = map[string]float64{ + "whisper-1": 0, + "gpt-4o-audio-preview": 2, + "gpt-4o-audio-preview-2024-12-17": 2, + "gpt-4o-audio-preview-2024-10-01": 2, + "gpt-4o-mini-audio-preview": 2, + "gpt-4o-mini-audio-preview-2024-12-17": 2, +} + +// GetAudioCompletionRatio returns the completion ratio for audio models. +func GetAudioCompletionRatio(actualModelName string) float64 { + var v float64 + if ratio, ok := AudioCompletionRatio[actualModelName]; ok { + v = ratio + } else { + v = 2 + } + + return v +} + +// AudioTokensPerSecond is the number of audio tokens per second for each model. +var AudioPromptTokensPerSecond = map[string]float64{ + // Whisper API price is $0.0001/sec. One-api's historical ratio is 15, + // corresponding to $0.03/kilo_tokens. + // After conversion, tokens per second should be 0.0001/0.03*1000 = 3.3333. + "whisper-1": 0.0001 / 0.03 * 1000, + // gpt-4o-audio series processes 10 tokens per second + "gpt-4o-audio-preview": 10, + "gpt-4o-audio-preview-2024-12-17": 10, + "gpt-4o-audio-preview-2024-10-01": 10, + "gpt-4o-mini-audio-preview": 10, + "gpt-4o-mini-audio-preview-2024-12-17": 10, +} + +// GetAudioPromptTokensPerSecond returns the number of audio tokens per second +// for the given model. +func GetAudioPromptTokensPerSecond(actualModelName string) float64 { + var v float64 + if tokensPerSecond, ok := AudioPromptTokensPerSecond[actualModelName]; ok { + v = tokensPerSecond + } else { + v = 10 + } + + return v } var ( @@ -676,11 +877,18 @@ func ModelRatio2JSONString() string { return string(jsonBytes) } +// UpdateModelRatioByJSONString updates the ModelRatio map with the given JSON string. func UpdateModelRatioByJSONString(jsonStr string) error { modelRatioLock.Lock() defer modelRatioLock.Unlock() ModelRatio = make(map[string]float64) - return json.Unmarshal([]byte(jsonStr), &ModelRatio) + err := json.Unmarshal([]byte(jsonStr), &ModelRatio) + if err != nil { + logger.SysError("error unmarshalling model ratio: " + err.Error()) + return err + } + + return nil } func GetModelRatio(name string, channelType int) float64 { @@ -692,23 +900,26 @@ func GetModelRatio(name string, channelType int) float64 { if strings.HasPrefix(name, "command-") && strings.HasSuffix(name, "-internet") { name = strings.TrimSuffix(name, "-internet") } + model := fmt.Sprintf("%s(%d)", name, channelType) - if ratio, ok := ModelRatio[model]; ok { - return ratio - } - if ratio, ok := DefaultModelRatio[model]; ok { - return ratio - } - if ratio, ok := ModelRatio[name]; ok { - return ratio - } - if ratio, ok := DefaultModelRatio[name]; ok { - return ratio + + for _, targetName := range []string{model, name} { + for _, ratioMap := range []map[string]float64{ + ModelRatio, + DefaultModelRatio, + AudioRatio, + } { + if ratio, ok := ratioMap[targetName]; ok { + return ratio + } + } } + logger.SysError("model ratio not found: " + name) - return 30 + return 2.5 * MilliTokensUsd } +// CompletionRatio2JSONString returns the CompletionRatio map as a JSON string. func CompletionRatio2JSONString() string { jsonBytes, err := json.Marshal(CompletionRatio) if err != nil { @@ -717,59 +928,79 @@ func CompletionRatio2JSONString() string { return string(jsonBytes) } +// completionRatioLock is a mutex for synchronizing access to the CompletionRatio map. +var completionRatioLock sync.RWMutex + +// UpdateCompletionRatioByJSONString updates the CompletionRatio map with the given JSON string. func UpdateCompletionRatioByJSONString(jsonStr string) error { + completionRatioLock.Lock() + defer completionRatioLock.Unlock() CompletionRatio = make(map[string]float64) return json.Unmarshal([]byte(jsonStr), &CompletionRatio) } +// GetCompletionRatio returns the completion ratio for the given model name and channel type. func GetCompletionRatio(name string, channelType int) float64 { + completionRatioLock.RLock() + defer completionRatioLock.RUnlock() if strings.HasPrefix(name, "qwen-") && strings.HasSuffix(name, "-internet") { name = strings.TrimSuffix(name, "-internet") } model := fmt.Sprintf("%s(%d)", name, channelType) - if ratio, ok := CompletionRatio[model]; ok { - return ratio + + name = strings.TrimPrefix(name, "openai/") + for _, targetName := range []string{model, name} { + for _, ratioMap := range []map[string]float64{ + CompletionRatio, + DefaultCompletionRatio, + AudioCompletionRatio, + } { + // first try the model name + if ratio, ok := ratioMap[targetName]; ok { + return ratio + } + + // then try the model name without some special prefix + normalizedTargetName := strings.TrimPrefix(targetName, "openai/") + if ratio, ok := ratioMap[normalizedTargetName]; ok { + return ratio + } + } } - if ratio, ok := DefaultCompletionRatio[model]; ok { - return ratio - } - if ratio, ok := CompletionRatio[name]; ok { - return ratio - } - if ratio, ok := DefaultCompletionRatio[name]; ok { - return ratio - } - if strings.HasPrefix(name, "gpt-3.5") { - if name == "gpt-3.5-turbo" || strings.HasSuffix(name, "0125") { + + // openai + switch { + case strings.HasPrefix(name, "gpt-3.5"): + switch { + case name == "gpt-3.5-turbo" || strings.HasSuffix(name, "0125"): // https://openai.com/blog/new-embedding-models-and-api-updates // Updated GPT-3.5 Turbo model and lower pricing return 3 - } - if strings.HasSuffix(name, "1106") { + case strings.HasSuffix(name, "1106"): return 2 + default: + return 4.0 / 3.0 } - return 4.0 / 3.0 - } - if strings.HasPrefix(name, "gpt-4") { - if strings.HasPrefix(name, "gpt-4o") { + case name == "chatgpt-4o-latest": + return 3 + case strings.HasPrefix(name, "gpt-4"): + switch { + case strings.HasPrefix(name, "gpt-4o"): if name == "gpt-4o-2024-05-13" { return 3 } return 4 - } - if strings.HasPrefix(name, "gpt-4-turbo") || - strings.HasSuffix(name, "preview") { + case strings.HasPrefix(name, "gpt-4-"): return 3 + default: + return 2 } - return 2 - } - // including o1, o1-preview, o1-mini - if strings.HasPrefix(name, "o1") { + // including o1/o1-preview/o1-mini + case strings.HasPrefix(name, "o1") || + strings.HasPrefix(name, "o3"): return 4 } - if name == "chatgpt-4o-latest" { - return 3 - } + if strings.HasPrefix(name, "claude-3") { return 5 } @@ -780,10 +1011,7 @@ func GetCompletionRatio(name string, channelType int) float64 { return 3 } if strings.HasPrefix(name, "gemini-") { - return 3 - } - if strings.HasPrefix(name, "deepseek-") { - return 2 + return 4 } switch name { @@ -831,5 +1059,6 @@ func GetCompletionRatio(name string, channelType int) float64 { return 1.000 / 0.300 // ≈3.333333 } + logger.SysWarn(fmt.Sprintf("completion ratio not found for model: %s (channel type: %d), using default value 1", name, channelType)) return 1 } diff --git a/relay/controller/helper.go b/relay/controller/helper.go index 5b6f023f..1f352bea 100644 --- a/relay/controller/helper.go +++ b/relay/controller/helper.go @@ -8,18 +8,16 @@ import ( "net/http" "strings" - "github.com/songquanpeng/one-api/common/helper" - "github.com/songquanpeng/one-api/relay/constant/role" - "github.com/gin-gonic/gin" - "github.com/songquanpeng/one-api/common" "github.com/songquanpeng/one-api/common/config" + "github.com/songquanpeng/one-api/common/helper" "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/model" "github.com/songquanpeng/one-api/relay/adaptor/openai" billingratio "github.com/songquanpeng/one-api/relay/billing/ratio" "github.com/songquanpeng/one-api/relay/channeltype" + "github.com/songquanpeng/one-api/relay/constant/role" "github.com/songquanpeng/one-api/relay/controller/validator" "github.com/songquanpeng/one-api/relay/meta" relaymodel "github.com/songquanpeng/one-api/relay/model" @@ -45,10 +43,10 @@ func getAndValidateTextRequest(c *gin.Context, relayMode int) (*relaymodel.Gener return textRequest, nil } -func getPromptTokens(textRequest *relaymodel.GeneralOpenAIRequest, relayMode int) int { +func getPromptTokens(ctx context.Context, textRequest *relaymodel.GeneralOpenAIRequest, relayMode int) int { switch relayMode { case relaymode.ChatCompletions: - return openai.CountTokenMessages(textRequest.Messages, textRequest.Model) + return openai.CountTokenMessages(ctx, textRequest.Messages, textRequest.Model) case relaymode.Completions: return openai.CountTokenInput(textRequest.Prompt, textRequest.Model) case relaymode.Moderations: @@ -94,19 +92,30 @@ func preConsumeQuota(ctx context.Context, textRequest *relaymodel.GeneralOpenAIR return preConsumedQuota, nil } -func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.Meta, textRequest *relaymodel.GeneralOpenAIRequest, ratio float64, preConsumedQuota int64, modelRatio float64, groupRatio float64, systemPromptReset bool) { +func postConsumeQuota(ctx context.Context, + usage *relaymodel.Usage, + meta *meta.Meta, + textRequest *relaymodel.GeneralOpenAIRequest, + ratio float64, + preConsumedQuota int64, + modelRatio float64, + groupRatio float64, + systemPromptReset bool) (quota int64) { if usage == nil { logger.Error(ctx, "usage is nil, which is unexpected") return } - var quota int64 completionRatio := billingratio.GetCompletionRatio(textRequest.Model, meta.ChannelType) promptTokens := usage.PromptTokens + // It appears that DeepSeek's official service automatically merges ReasoningTokens into CompletionTokens, + // but the behavior of third-party providers may differ, so for now we do not add them manually. + // completionTokens := usage.CompletionTokens + usage.CompletionTokensDetails.ReasoningTokens completionTokens := usage.CompletionTokens - quota = int64(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * ratio)) + quota = int64(math.Ceil((float64(promptTokens)+float64(completionTokens)*completionRatio)*ratio)) + usage.ToolsCost if ratio != 0 && quota <= 0 { quota = 1 } + totalTokens := promptTokens + completionTokens if totalTokens == 0 { // in this case, must be some error happened @@ -122,7 +131,13 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M if err != nil { logger.Error(ctx, "error update user quota cache: "+err.Error()) } - logContent := fmt.Sprintf("倍率:%.2f × %.2f × %.2f", modelRatio, groupRatio, completionRatio) + + var logContent string + if usage.ToolsCost == 0 { + logContent = fmt.Sprintf("倍率:%.2f × %.2f × %.2f", modelRatio, groupRatio, completionRatio) + } else { + logContent = fmt.Sprintf("倍率:%.2f × %.2f × %.2f, tools cost %d", modelRatio, groupRatio, completionRatio, usage.ToolsCost) + } model.RecordConsumeLog(ctx, &model.Log{ UserId: meta.UserId, ChannelId: meta.ChannelId, @@ -138,6 +153,8 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M }) model.UpdateUserUsedQuotaAndRequestCount(meta.UserId, quota) model.UpdateChannelUsedQuota(meta.ChannelId, quota) + + return quota } func getMappedModelName(modelName string, mapping map[string]string) (string, bool) { diff --git a/relay/controller/image.go b/relay/controller/image.go index 9a980a14..b1344116 100644 --- a/relay/controller/image.go +++ b/relay/controller/image.go @@ -157,7 +157,7 @@ func RelayImageHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus channeltype.Ali, channeltype.Replicate, channeltype.Baidu: - finalRequest, err := adaptor.ConvertImageRequest(imageRequest) + finalRequest, err := adaptor.ConvertImageRequest(c, imageRequest) if err != nil { return openai.ErrorWrapper(err, "convert_image_request_failed", http.StatusInternalServerError) } diff --git a/relay/controller/text.go b/relay/controller/text.go index f912498a..dbb47115 100644 --- a/relay/controller/text.go +++ b/relay/controller/text.go @@ -10,6 +10,7 @@ import ( "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/common/config" + "github.com/songquanpeng/one-api/common/ctxkey" "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/relay" "github.com/songquanpeng/one-api/relay/adaptor" @@ -44,7 +45,7 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode { groupRatio := billingratio.GetGroupRatio(meta.Group) ratio := modelRatio * groupRatio // pre-consume quota - promptTokens := getPromptTokens(textRequest, meta.Mode) + promptTokens := getPromptTokens(c.Request.Context(), textRequest, meta.Mode) meta.PromptTokens = promptTokens preConsumedQuota, bizErr := preConsumeQuota(ctx, textRequest, promptTokens, ratio, meta) if bizErr != nil { @@ -104,6 +105,8 @@ func getRequestBody(c *gin.Context, meta *meta.Meta, textRequest *model.GeneralO logger.Debugf(c.Request.Context(), "converted request failed: %s\n", err.Error()) return nil, err } + c.Set(ctxkey.ConvertedRequest, convertedRequest) + jsonData, err := json.Marshal(convertedRequest) if err != nil { logger.Debugf(c.Request.Context(), "converted request json_marshal_failed: %s\n", err.Error()) diff --git a/relay/model/general.go b/relay/model/general.go index 5f5968c8..f264cf07 100644 --- a/relay/model/general.go +++ b/relay/model/general.go @@ -1,5 +1,7 @@ package model +import "github.com/songquanpeng/one-api/relay/adaptor/openrouter" + type ResponseFormat struct { Type string `json:"type,omitempty"` JsonSchema *JSONSchema `json:"json_schema,omitempty"` @@ -23,49 +25,103 @@ type StreamOptions struct { type GeneralOpenAIRequest struct { // https://platform.openai.com/docs/api-reference/chat/create - Messages []Message `json:"messages,omitempty"` - Model string `json:"model,omitempty"` - Store *bool `json:"store,omitempty"` - ReasoningEffort *string `json:"reasoning_effort,omitempty"` - Metadata any `json:"metadata,omitempty"` - FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` - LogitBias any `json:"logit_bias,omitempty"` - Logprobs *bool `json:"logprobs,omitempty"` - TopLogprobs *int `json:"top_logprobs,omitempty"` - MaxTokens int `json:"max_tokens,omitempty"` - MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"` - N int `json:"n,omitempty"` - Modalities []string `json:"modalities,omitempty"` - Prediction any `json:"prediction,omitempty"` - Audio *Audio `json:"audio,omitempty"` - PresencePenalty *float64 `json:"presence_penalty,omitempty"` - ResponseFormat *ResponseFormat `json:"response_format,omitempty"` - Seed float64 `json:"seed,omitempty"` - ServiceTier *string `json:"service_tier,omitempty"` - Stop any `json:"stop,omitempty"` - Stream bool `json:"stream,omitempty"` - StreamOptions *StreamOptions `json:"stream_options,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopP *float64 `json:"top_p,omitempty"` - TopK int `json:"top_k,omitempty"` - Tools []Tool `json:"tools,omitempty"` - ToolChoice any `json:"tool_choice,omitempty"` - ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"` - User string `json:"user,omitempty"` - FunctionCall any `json:"function_call,omitempty"` - Functions any `json:"functions,omitempty"` + Messages []Message `json:"messages,omitempty"` + Model string `json:"model,omitempty"` + Store *bool `json:"store,omitempty"` + Metadata any `json:"metadata,omitempty"` + // FrequencyPenalty is a number between -2.0 and 2.0 that penalizes + // new tokens based on their existing frequency in the text so far, + // default is 0. + FrequencyPenalty *float64 `json:"frequency_penalty,omitempty" binding:"omitempty,min=-2,max=2"` + LogitBias any `json:"logit_bias,omitempty"` + Logprobs *bool `json:"logprobs,omitempty"` + TopLogprobs *int `json:"top_logprobs,omitempty"` + MaxTokens int `json:"max_tokens,omitempty"` + MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"` + // N is how many chat completion choices to generate for each input message, + // default to 1. + N *int `json:"n,omitempty" binding:"omitempty,min=0"` + // ReasoningEffort constrains effort on reasoning for reasoning models, reasoning models only. + ReasoningEffort *string `json:"reasoning_effort,omitempty" binding:"omitempty,oneof=low medium high"` + // Modalities currently the model only programmatically allows modalities = [“text”, “audio”] + Modalities []string `json:"modalities,omitempty"` + Prediction any `json:"prediction,omitempty"` + Audio *Audio `json:"audio,omitempty"` + // PresencePenalty is a number between -2.0 and 2.0 that penalizes + // new tokens based on whether they appear in the text so far, default is 0. + PresencePenalty *float64 `json:"presence_penalty,omitempty" binding:"omitempty,min=-2,max=2"` + ResponseFormat *ResponseFormat `json:"response_format,omitempty"` + Seed float64 `json:"seed,omitempty"` + ServiceTier *string `json:"service_tier,omitempty" binding:"omitempty,oneof=default auto"` + Stop any `json:"stop,omitempty"` + Stream bool `json:"stream,omitempty"` + StreamOptions *StreamOptions `json:"stream_options,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + TopK int `json:"top_k,omitempty"` + Tools []Tool `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` + ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"` + User string `json:"user,omitempty"` + FunctionCall any `json:"function_call,omitempty"` + Functions any `json:"functions,omitempty"` // https://platform.openai.com/docs/api-reference/embeddings/create Input any `json:"input,omitempty"` EncodingFormat string `json:"encoding_format,omitempty"` Dimensions int `json:"dimensions,omitempty"` // https://platform.openai.com/docs/api-reference/images/create - Prompt any `json:"prompt,omitempty"` - Quality *string `json:"quality,omitempty"` - Size string `json:"size,omitempty"` - Style *string `json:"style,omitempty"` + Prompt string `json:"prompt,omitempty"` + Quality *string `json:"quality,omitempty"` + Size string `json:"size,omitempty"` + Style *string `json:"style,omitempty"` + WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"` + // Others Instruction string `json:"instruction,omitempty"` NumCtx int `json:"num_ctx,omitempty"` + // ------------------------------------- + // Openrouter + // ------------------------------------- + Provider *openrouter.RequestProvider `json:"provider,omitempty"` + IncludeReasoning *bool `json:"include_reasoning,omitempty"` + // ------------------------------------- + // Anthropic + // ------------------------------------- + Thinking *Thinking `json:"thinking,omitempty"` +} + +// WebSearchOptions is the tool searches the web for relevant results to use in a response. +type WebSearchOptions struct { + // SearchContextSize is the high level guidance for the amount of context window space to use for the search, + // default is "medium". + SearchContextSize *string `json:"search_context_size,omitempty" binding:"omitempty,oneof=low medium high"` + UserLocation *UserLocation `json:"user_location,omitempty"` +} + +// UserLocation is a struct that contains the location of the user. +type UserLocation struct { + // Approximate is the approximate location parameters for the search. + Approximate UserLocationApproximate `json:"approximate" binding:"required"` + // Type is the type of location approximation. + Type string `json:"type" binding:"required,oneof=approximate"` +} + +// UserLocationApproximate is a struct that contains the approximate location of the user. +type UserLocationApproximate struct { + // City is the city of the user, e.g. San Francisco. + City *string `json:"city,omitempty"` + // Country is the country of the user, e.g. US. + Country *string `json:"country,omitempty"` + // Region is the region of the user, e.g. California. + Region *string `json:"region,omitempty"` + // Timezone is the IANA timezone of the user, e.g. America/Los_Angeles. + Timezone *string `json:"timezone,omitempty"` +} + +// https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#implementing-extended-thinking +type Thinking struct { + Type string `json:"type"` + BudgetTokens int `json:"budget_tokens" binding:"omitempty,min=1024"` } func (r GeneralOpenAIRequest) ParseInput() []string { diff --git a/relay/model/message.go b/relay/model/message.go index 5ff7b7ae..597ee693 100644 --- a/relay/model/message.go +++ b/relay/model/message.go @@ -1,12 +1,106 @@ package model +import ( + "context" + "strings" + + "github.com/songquanpeng/one-api/common/logger" +) + +// ReasoningFormat is the format of reasoning content, +// can be set by the reasoning_format parameter in the request url. +type ReasoningFormat string + +const ( + ReasoningFormatUnspecified ReasoningFormat = "" + // ReasoningFormatReasoningContent is the reasoning format used by deepseek official API + ReasoningFormatReasoningContent ReasoningFormat = "reasoning_content" + // ReasoningFormatReasoning is the reasoning format used by openrouter + ReasoningFormatReasoning ReasoningFormat = "reasoning" + + // ReasoningFormatThinkTag is the reasoning format used by 3rd party deepseek-r1 providers. + // + // Deprecated: I believe is a very poor format, especially in stream mode, it is difficult to extract and convert. + // Considering that only a few deepseek-r1 third-party providers use this format, it has been decided to no longer support it. + // ReasoningFormatThinkTag ReasoningFormat = "think-tag" + + // ReasoningFormatThinking is the reasoning format used by anthropic + ReasoningFormatThinking ReasoningFormat = "thinking" +) + type Message struct { - Role string `json:"role,omitempty"` - Content any `json:"content,omitempty"` - ReasoningContent any `json:"reasoning_content,omitempty"` - Name *string `json:"name,omitempty"` - ToolCalls []Tool `json:"tool_calls,omitempty"` - ToolCallId string `json:"tool_call_id,omitempty"` + Role string `json:"role,omitempty"` + // Content is a string or a list of objects + Content any `json:"content,omitempty"` + Name *string `json:"name,omitempty"` + ToolCalls []Tool `json:"tool_calls,omitempty"` + ToolCallId string `json:"tool_call_id,omitempty"` + Audio *messageAudio `json:"audio,omitempty"` + Annotation []AnnotationItem `json:"annotation,omitempty"` + + // ------------------------------------- + // Deepseek 专有的一些字段 + // https://api-docs.deepseek.com/api/create-chat-completion + // ------------------------------------- + // Prefix forces the model to begin its answer with the supplied prefix in the assistant message. + // To enable this feature, set base_url to "https://api.deepseek.com/beta". + Prefix *bool `json:"prefix,omitempty"` // ReasoningContent is Used for the deepseek-reasoner model in the Chat + // Prefix Completion feature as the input for the CoT in the last assistant message. + // When using this feature, the prefix parameter must be set to true. + ReasoningContent *string `json:"reasoning_content,omitempty"` + + // ------------------------------------- + // Openrouter + // ------------------------------------- + Reasoning *string `json:"reasoning,omitempty"` + Refusal *bool `json:"refusal,omitempty"` + + // ------------------------------------- + // Anthropic + // ------------------------------------- + Thinking *string `json:"thinking,omitempty"` + Signature *string `json:"signature,omitempty"` +} + +type AnnotationItem struct { + Type string `json:"type" binding:"oneof=url_citation"` + UrlCitation UrlCitation `json:"url_citation"` +} + +// UrlCitation is a URL citation when using web search. +type UrlCitation struct { + // Endpoint is the index of the last character of the URL citation in the message. + EndIndex int `json:"end_index"` + // StartIndex is the index of the first character of the URL citation in the message. + StartIndex int `json:"start_index"` + // Title is the title of the web resource. + Title string `json:"title"` + // Url is the URL of the web resource. + Url string `json:"url"` +} + +// SetReasoningContent sets the reasoning content based on the format +func (m *Message) SetReasoningContent(format string, reasoningContent string) { + switch ReasoningFormat(strings.ToLower(strings.TrimSpace(format))) { + case ReasoningFormatReasoningContent: + m.ReasoningContent = &reasoningContent + // case ReasoningFormatThinkTag: + // m.Content = fmt.Sprintf("%s%s", reasoningContent, m.Content) + case ReasoningFormatThinking: + m.Thinking = &reasoningContent + case ReasoningFormatReasoning, + ReasoningFormatUnspecified: + m.Reasoning = &reasoningContent + default: + logger.Warnf(context.TODO(), "unknown reasoning format: %q", format) + } +} + +type messageAudio struct { + Id string `json:"id"` + Data string `json:"data,omitempty"` + ExpiredAt int `json:"expired_at,omitempty"` + Transcript string `json:"transcript,omitempty"` } func (m Message) IsStringContent() bool { @@ -27,6 +121,7 @@ func (m Message) StringContent() string { if !ok { continue } + if contentMap["type"] == ContentTypeText { if subStr, ok := contentMap["text"].(string); ok { contentStr += subStr @@ -35,6 +130,7 @@ func (m Message) StringContent() string { } return contentStr } + return "" } @@ -44,10 +140,11 @@ func (m Message) ParseContent() []MessageContent { if ok { contentList = append(contentList, MessageContent{ Type: ContentTypeText, - Text: content, + Text: &content, }) return contentList } + anyList, ok := m.Content.([]any) if ok { for _, contentItem := range anyList { @@ -60,7 +157,7 @@ func (m Message) ParseContent() []MessageContent { if subStr, ok := contentMap["text"].(string); ok { contentList = append(contentList, MessageContent{ Type: ContentTypeText, - Text: subStr, + Text: &subStr, }) } case ContentTypeImageURL: @@ -72,8 +169,21 @@ func (m Message) ParseContent() []MessageContent { }, }) } + case ContentTypeInputAudio: + if subObj, ok := contentMap["input_audio"].(map[string]any); ok { + contentList = append(contentList, MessageContent{ + Type: ContentTypeInputAudio, + InputAudio: &InputAudio{ + Data: subObj["data"].(string), + Format: subObj["format"].(string), + }, + }) + } + default: + logger.Warnf(context.TODO(), "unknown content type: %s", contentMap["type"]) } } + return contentList } return nil @@ -85,7 +195,23 @@ type ImageURL struct { } type MessageContent struct { - Type string `json:"type,omitempty"` - Text string `json:"text"` - ImageURL *ImageURL `json:"image_url,omitempty"` + // Type should be one of the following: text/input_audio + Type string `json:"type,omitempty"` + Text *string `json:"text,omitempty"` + ImageURL *ImageURL `json:"image_url,omitempty"` + InputAudio *InputAudio `json:"input_audio,omitempty"` + // ------------------------------------- + // Anthropic + // ------------------------------------- + Thinking *string `json:"thinking,omitempty"` + Signature *string `json:"signature,omitempty"` +} + +type InputAudio struct { + // Data is the base64 encoded audio data + Data string `json:"data" binding:"required"` + // Format is the audio format, should be one of the + // following: mp3/mp4/mpeg/mpga/m4a/wav/webm/pcm16. + // When stream=true, format should be pcm16 + Format string `json:"format"` } diff --git a/relay/model/misc.go b/relay/model/misc.go index fdba01ea..0f58bdd3 100644 --- a/relay/model/misc.go +++ b/relay/model/misc.go @@ -1,17 +1,22 @@ package model +// Usage is the token usage information returned by OpenAI API. type Usage struct { PromptTokens int `json:"prompt_tokens"` CompletionTokens int `json:"completion_tokens"` TotalTokens int `json:"total_tokens"` + // PromptTokensDetails may be empty for some models + PromptTokensDetails *usagePromptTokensDetails `json:"prompt_tokens_details,omitempty"` + // CompletionTokensDetails may be empty for some models + CompletionTokensDetails *usageCompletionTokensDetails `json:"completion_tokens_details,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` + SystemFingerprint string `json:"system_fingerprint,omitempty"` - CompletionTokensDetails *CompletionTokensDetails `json:"completion_tokens_details,omitempty"` -} - -type CompletionTokensDetails struct { - ReasoningTokens int `json:"reasoning_tokens"` - AcceptedPredictionTokens int `json:"accepted_prediction_tokens"` - RejectedPredictionTokens int `json:"rejected_prediction_tokens"` + // ------------------------------------- + // Custom fields + // ------------------------------------- + // ToolsCost is the cost of using tools, in quota. + ToolsCost int64 `json:"tools_cost,omitempty"` } type Error struct { @@ -25,3 +30,20 @@ type ErrorWithStatusCode struct { Error StatusCode int `json:"status_code"` } + +type usagePromptTokensDetails struct { + CachedTokens int `json:"cached_tokens"` + AudioTokens int `json:"audio_tokens"` + // TextTokens could be zero for pure text chats + TextTokens int `json:"text_tokens"` + ImageTokens int `json:"image_tokens"` +} + +type usageCompletionTokensDetails struct { + ReasoningTokens int `json:"reasoning_tokens"` + AudioTokens int `json:"audio_tokens"` + AcceptedPredictionTokens int `json:"accepted_prediction_tokens"` + RejectedPredictionTokens int `json:"rejected_prediction_tokens"` + // TextTokens could be zero for pure text chats + TextTokens int `json:"text_tokens"` +} diff --git a/relay/relaymode/define.go b/relay/relaymode/define.go index aa771205..12acb940 100644 --- a/relay/relaymode/define.go +++ b/relay/relaymode/define.go @@ -13,4 +13,5 @@ const ( AudioTranslation // Proxy is a special relay mode for proxying requests to custom upstream Proxy + ImagesEdits ) diff --git a/web/air/src/components/ChannelsTable.js b/web/air/src/components/ChannelsTable.js index c384d50c..1d312790 100644 --- a/web/air/src/components/ChannelsTable.js +++ b/web/air/src/components/ChannelsTable.js @@ -327,7 +327,7 @@ const ChannelsTable = () => { let res; switch (action) { case 'delete': - res = await API.delete(`/api/channel/${id}/`); + res = await API.delete(`/api/channel/${id}`); break; case 'enable': data.status = 1; diff --git a/web/air/src/components/LogsTable.js b/web/air/src/components/LogsTable.js index 7d372d49..7981eb5e 100644 --- a/web/air/src/components/LogsTable.js +++ b/web/air/src/components/LogsTable.js @@ -245,7 +245,7 @@ const LogsTable = () => { if (isAdminUser) { url = `/api/log/?p=${startIdx}&page_size=${pageSize}&type=${logType}&username=${username}&token_name=${token_name}&model_name=${model_name}&start_timestamp=${localStartTimestamp}&end_timestamp=${localEndTimestamp}&channel=${channel}`; } else { - url = `/api/log/self/?p=${startIdx}&page_size=${pageSize}&type=${logType}&token_name=${token_name}&model_name=${model_name}&start_timestamp=${localStartTimestamp}&end_timestamp=${localEndTimestamp}`; + url = `/api/log/self?p=${startIdx}&page_size=${pageSize}&type=${logType}&token_name=${token_name}&model_name=${model_name}&start_timestamp=${localStartTimestamp}&end_timestamp=${localEndTimestamp}`; } const res = await API.get(url); const { success, message, data } = res.data; diff --git a/web/air/src/components/RedemptionsTable.js b/web/air/src/components/RedemptionsTable.js index 89e4ce20..fdfd2f17 100644 --- a/web/air/src/components/RedemptionsTable.js +++ b/web/air/src/components/RedemptionsTable.js @@ -250,7 +250,7 @@ const RedemptionsTable = () => { let res; switch (action) { case 'delete': - res = await API.delete(`/api/redemption/${id}/`); + res = await API.delete(`/api/redemption/${id}`); break; case 'enable': data.status = 1; diff --git a/web/default/src/components/ChannelsTable.js b/web/default/src/components/ChannelsTable.js index ef2aec89..f54a223b 100644 --- a/web/default/src/components/ChannelsTable.js +++ b/web/default/src/components/ChannelsTable.js @@ -165,7 +165,7 @@ const ChannelsTable = () => { let res; switch (action) { case 'delete': - res = await API.delete(`/api/channel/${id}/`); + res = await API.delete(`/api/channel/${id}`); break; case 'enable': data.status = 1; @@ -360,7 +360,7 @@ const ChannelsTable = () => { }; const updateChannelBalance = async (id, name, idx) => { - const res = await API.get(`/api/channel/update_balance/${id}/`); + const res = await API.get(`/api/channel/update_balance/${id}`); const { success, message, balance } = res.data; if (success) { let newChannels = [...channels]; diff --git a/web/default/src/components/LogsTable.js b/web/default/src/components/LogsTable.js index 8af285a1..8e106e42 100644 --- a/web/default/src/components/LogsTable.js +++ b/web/default/src/components/LogsTable.js @@ -225,7 +225,7 @@ const LogsTable = () => { if (isAdminUser) { url = `/api/log/?p=${startIdx}&type=${logType}&username=${username}&token_name=${token_name}&model_name=${model_name}&start_timestamp=${localStartTimestamp}&end_timestamp=${localEndTimestamp}&channel=${channel}`; } else { - url = `/api/log/self/?p=${startIdx}&type=${logType}&token_name=${token_name}&model_name=${model_name}&start_timestamp=${localStartTimestamp}&end_timestamp=${localEndTimestamp}`; + url = `/api/log/self?p=${startIdx}&type=${logType}&token_name=${token_name}&model_name=${model_name}&start_timestamp=${localStartTimestamp}&end_timestamp=${localEndTimestamp}`; } const res = await API.get(url); const { success, message, data } = res.data; diff --git a/web/default/src/components/RedemptionsTable.js b/web/default/src/components/RedemptionsTable.js index 3b24759d..a0185727 100644 --- a/web/default/src/components/RedemptionsTable.js +++ b/web/default/src/components/RedemptionsTable.js @@ -103,7 +103,7 @@ const RedemptionsTable = () => { let res; switch (action) { case 'delete': - res = await API.delete(`/api/redemption/${id}/`); + res = await API.delete(`/api/redemption/${id}`); break; case 'enable': data.status = 1;