This commit is contained in:
Qiying Wang 2025-01-20 18:58:22 +08:00 committed by GitHub
commit 7e42bebffa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
75 changed files with 1155 additions and 562 deletions

View File

@ -1,5 +1,7 @@
package ctxkey
import "github.com/gin-gonic/gin"
const (
Config = "config"
Id = "id"
@ -19,6 +21,6 @@ const (
TokenName = "token_name"
BaseURL = "base_url"
AvailableModels = "available_models"
KeyRequestBody = "key_request_body"
KeyRequestBody = gin.BodyBytesKey
SystemPrompt = "system_prompt"
)

View File

@ -3,10 +3,11 @@ package common
import (
"bytes"
"encoding/json"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/common/ctxkey"
"io"
"strings"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/common/ctxkey"
)
func GetRequestBody(c *gin.Context) ([]byte, error) {
@ -31,7 +32,6 @@ func UnmarshalBodyReusable(c *gin.Context, v any) error {
contentType := c.Request.Header.Get("Content-Type")
if strings.HasPrefix(contentType, "application/json") {
err = json.Unmarshal(requestBody, &v)
c.Request.Body = io.NopCloser(bytes.NewBuffer(requestBody))
} else {
c.Request.Body = io.NopCloser(bytes.NewBuffer(requestBody))
err = c.ShouldBind(&v)
@ -40,6 +40,7 @@ func UnmarshalBodyReusable(c *gin.Context, v any) error {
return err
}
// Reset request body
c.Request.Body = io.NopCloser(bytes.NewBuffer(requestBody))
return nil
}

View File

@ -6,6 +6,7 @@ import (
"github.com/songquanpeng/one-api/common/ctxkey"
"github.com/songquanpeng/one-api/model"
relay "github.com/songquanpeng/one-api/relay"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/apitype"
"github.com/songquanpeng/one-api/relay/channeltype"
@ -86,7 +87,8 @@ func init() {
if channelType == channeltype.Azure {
continue
}
channelName, channelModelList := openai.GetCompatibleChannelMeta(channelType)
channelName, channelRatioMap := openai.GetCompatibleChannelMeta(channelType)
channelModelList := adaptor.GetModelListHelper(channelRatioMap)
for _, modelName := range channelModelList {
models = append(models, OpenAIModels{
Id: modelName,

View File

@ -1,12 +1,13 @@
package model
import (
"github.com/songquanpeng/one-api/common/config"
"github.com/songquanpeng/one-api/common/logger"
billingratio "github.com/songquanpeng/one-api/relay/billing/ratio"
"strconv"
"strings"
"time"
"github.com/songquanpeng/one-api/common/config"
"github.com/songquanpeng/one-api/common/logger"
billingratio "github.com/songquanpeng/one-api/relay/billing/ratio"
)
type Option struct {
@ -70,6 +71,7 @@ func InitOptionMap() {
config.OptionMap["ModelRatio"] = billingratio.ModelRatio2JSONString()
config.OptionMap["GroupRatio"] = billingratio.GroupRatio2JSONString()
config.OptionMap["CompletionRatio"] = billingratio.CompletionRatio2JSONString()
config.OptionMap["Ratio"] = billingratio.Ratio2JSONString()
config.OptionMap["TopUpLink"] = config.TopUpLink
config.OptionMap["ChatLink"] = config.ChatLink
config.OptionMap["QuotaPerUnit"] = strconv.FormatFloat(config.QuotaPerUnit, 'f', -1, 64)
@ -81,15 +83,35 @@ func InitOptionMap() {
func loadOptionsFromDatabase() {
options, _ := AllOption()
var oldModelRatio string
var oldCompletionRatio string
for _, option := range options {
if option.Key == "ModelRatio" {
oldModelRatio = option.Value
option.Value = billingratio.AddNewMissingRatio(option.Value)
}
if option.Key == "CompletionRatio" {
oldCompletionRatio = option.Value
}
err := updateOptionMap(option.Key, option.Value)
if err != nil {
logger.SysError("failed to update option map: " + err.Error())
}
}
for _, option := range options {
if option.Key == "Ratio" {
option.Value = billingratio.AddOldRatio(oldModelRatio, oldCompletionRatio)
err := updateOptionMap(option.Key, option.Value)
if err != nil {
logger.SysError("failed to update option map: " + err.Error())
}
err = UpdateOption(option.Key, option.Value)
if err != nil {
logger.SysError("failed to update option map: " + err.Error())
}
logger.SysLog("ratio merged")
}
}
}
func SyncOptions(frequency int) {
@ -223,12 +245,14 @@ func updateOptionMap(key string, value string) (err error) {
config.PreConsumedQuota, _ = strconv.ParseInt(value, 10, 64)
case "RetryTimes":
config.RetryTimes, _ = strconv.Atoi(value)
case "ModelRatio":
case "ModelRatio": // Deprecated
err = billingratio.UpdateModelRatioByJSONString(value)
case "GroupRatio":
err = billingratio.UpdateGroupRatioByJSONString(value)
case "CompletionRatio":
case "CompletionRatio": // Deprecated
err = billingratio.UpdateCompletionRatioByJSONString(value)
case "Ratio":
err = billingratio.UpdateRatioByJSONString(value)
case "TopUpLink":
config.TopUpLink = value
case "ChatLink":

View File

@ -1,8 +1,10 @@
package ai360
var ModelList = []string{
"360GPT_S2_V9",
"embedding-bert-512-v1",
"embedding_s1_v1",
"semantic_similarity_s1_v1",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var RatioMap = map[string]ratio.Ratio{
"360GPT_S2_V9": {Input: 0.012 * ratio.RMB, Output: 0.012 * ratio.RMB},
"embedding-bert-512-v1": {Input: 0.0001 * ratio.RMB, Output: 0},
"embedding_s1_v1": {Input: 0.0001 * ratio.RMB, Output: 0},
"semantic_similarity_s1_v1": {Input: 0.0001 * ratio.RMB, Output: 0},
}

View File

@ -3,12 +3,14 @@ package aiproxy
import (
"errors"
"fmt"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"io"
"net/http"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
)
type Adaptor struct {
@ -58,8 +60,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
return adaptor.GetModelListHelper(RatioMap)
}
func (a *Adaptor) GetChannelName() string {

View File

@ -2,8 +2,4 @@ package aiproxy
import "github.com/songquanpeng/one-api/relay/adaptor/openai"
var ModelList = []string{""}
func init() {
ModelList = openai.ModelList
}
var RatioMap = openai.RatioMap

View File

@ -3,13 +3,15 @@ package ali
import (
"errors"
"fmt"
"io"
"net/http"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"github.com/songquanpeng/one-api/relay/relaymode"
"io"
"net/http"
)
// https://help.aliyun.com/zh/dashscope/developer-reference/api-details
@ -96,8 +98,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
return adaptor.GetModelListHelper(RatioMap)
}
func (a *Adaptor) GetChannelName() string {

View File

@ -1,23 +1,135 @@
package ali
var ModelList = []string{
"qwen-turbo", "qwen-turbo-latest",
"qwen-plus", "qwen-plus-latest",
"qwen-max", "qwen-max-latest",
"qwen-max-longcontext",
"qwen-vl-max", "qwen-vl-max-latest", "qwen-vl-plus", "qwen-vl-plus-latest",
"qwen-vl-ocr", "qwen-vl-ocr-latest",
"qwen-audio-turbo",
"qwen-math-plus", "qwen-math-plus-latest", "qwen-math-turbo", "qwen-math-turbo-latest",
"qwen-coder-plus", "qwen-coder-plus-latest", "qwen-coder-turbo", "qwen-coder-turbo-latest",
"qwq-32b-preview", "qwen2.5-72b-instruct", "qwen2.5-32b-instruct", "qwen2.5-14b-instruct", "qwen2.5-7b-instruct", "qwen2.5-3b-instruct", "qwen2.5-1.5b-instruct", "qwen2.5-0.5b-instruct",
"qwen2-72b-instruct", "qwen2-57b-a14b-instruct", "qwen2-7b-instruct", "qwen2-1.5b-instruct", "qwen2-0.5b-instruct",
"qwen1.5-110b-chat", "qwen1.5-72b-chat", "qwen1.5-32b-chat", "qwen1.5-14b-chat", "qwen1.5-7b-chat", "qwen1.5-1.8b-chat", "qwen1.5-0.5b-chat",
"qwen-72b-chat", "qwen-14b-chat", "qwen-7b-chat", "qwen-1.8b-chat", "qwen-1.8b-longcontext-chat",
"qwen2-vl-7b-instruct", "qwen2-vl-2b-instruct", "qwen-vl-v1", "qwen-vl-chat-v1",
"qwen2-audio-instruct", "qwen-audio-chat",
"qwen2.5-math-72b-instruct", "qwen2.5-math-7b-instruct", "qwen2.5-math-1.5b-instruct", "qwen2-math-72b-instruct", "qwen2-math-7b-instruct", "qwen2-math-1.5b-instruct",
"qwen2.5-coder-32b-instruct", "qwen2.5-coder-14b-instruct", "qwen2.5-coder-7b-instruct", "qwen2.5-coder-3b-instruct", "qwen2.5-coder-1.5b-instruct", "qwen2.5-coder-0.5b-instruct",
"text-embedding-v1", "text-embedding-v3", "text-embedding-v2", "text-embedding-async-v2", "text-embedding-async-v1",
"ali-stable-diffusion-xl", "ali-stable-diffusion-v1.5", "wanx-v1",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
// https://help.aliyun.com/zh/model-studio/getting-started/models
// https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing
var RatioMap = map[string]ratio.Ratio{
"qwen-long": {Input: 0.0005 * ratio.RMB, Output: 0.002 * ratio.RMB},
"qwen-turbo": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB},
"qwen-turbo-latest": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB},
"qwen-turbo-2024-09-19": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB},
"qwen-turbo-0919": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB},
"qwen-turbo-2024-06-24": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB},
"qwen-turbo-0624": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB},
"qwen-turbo-2024-02-06": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB},
"qwen-turbo-0206": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB},
"qwen-plus": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB},
"qwen-plus-latest": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB},
"qwen-plus-2024-09-19": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB},
"qwen-plus-0919": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB},
"qwen-plus-2024-08-06": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-plus-0806": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-plus-2024-07-23": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-plus-0723": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-plus-2024-06-24": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-plus-0624": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-plus-2024-02-06": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-plus-0206": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-max": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB},
"qwen-max-latest": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB},
"qwen-max-2024-09-19": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB},
"qwen-max-0919": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB},
"qwen-max-2024-04-28": {Input: 0.04 * ratio.RMB, Output: 0.12 * ratio.RMB},
"qwen-max-0428": {Input: 0.04 * ratio.RMB, Output: 0.12 * ratio.RMB},
"qwen-max-2024-04-03": {Input: 0.04 * ratio.RMB, Output: 0.12 * ratio.RMB},
"qwen-max-0403": {Input: 0.04 * ratio.RMB, Output: 0.12 * ratio.RMB},
"qwen-max-2024-01-07": {Input: 0.04 * ratio.RMB, Output: 0.12 * ratio.RMB},
"qwen-max-0107": {Input: 0.04 * ratio.RMB, Output: 0.12 * ratio.RMB},
"qwen-vl-max": {Input: 0.003 * ratio.RMB, Output: 0.009 * ratio.RMB},
"qwen-vl-max-latest": {Input: 0.003 * ratio.RMB, Output: 0.009 * ratio.RMB},
"qwen-vl-max-2024-12-30": {Input: 0.003 * ratio.RMB, Output: 0.009 * ratio.RMB},
"qwen-vl-max-1230": {Input: 0.003 * ratio.RMB, Output: 0.009 * ratio.RMB},
"qwen-vl-max-2024-11-19": {Input: 0.003 * ratio.RMB, Output: 0.009 * ratio.RMB},
"qwen-vl-max-1119": {Input: 0.003 * ratio.RMB, Output: 0.009 * ratio.RMB},
"qwen-vl-max-2024-10-30": {Input: 0.02 * ratio.RMB, Output: 0.02 * ratio.RMB},
"qwen-vl-max-1030": {Input: 0.02 * ratio.RMB, Output: 0.02 * ratio.RMB},
"qwen-vl-max-2024-08-09": {Input: 0.02 * ratio.RMB, Output: 0.02 * ratio.RMB},
"qwen-vl-max-0809": {Input: 0.02 * ratio.RMB, Output: 0.02 * ratio.RMB},
"qwen-vl-max-2024-02-01": {Input: 0.02 * ratio.RMB, Output: 0.02 * ratio.RMB},
"qwen-vl-max-0201": {Input: 0.02 * ratio.RMB, Output: 0.02 * ratio.RMB},
"qwen-vl-plus": {Input: 0.0015 * ratio.RMB, Output: 0.0045 * ratio.RMB},
"qwen-vl-plus-latest": {Input: 0.0015 * ratio.RMB, Output: 0.0045 * ratio.RMB},
"qwen-vl-plus-2024-08-09": {Input: 0.0015 * ratio.RMB, Output: 0.0045 * ratio.RMB},
"qwen-vl-plus-0809": {Input: 0.0015 * ratio.RMB, Output: 0.0045 * ratio.RMB},
"qwen-vl-plus-2023-12-01": {Input: 0.008 * ratio.RMB, Output: 0.008 * ratio.RMB},
"qwen-vl-ocr": {Input: 0.005 * ratio.RMB, Output: 0.005 * ratio.RMB},
"qwen-vl-ocr-latest": {Input: 0.005 * ratio.RMB, Output: 0.005 * ratio.RMB},
"qwen-vl-ocr-2024-10-28": {Input: 0.005 * ratio.RMB, Output: 0.005 * ratio.RMB},
"qwen-audio-turbo": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。
"qwen-audio-turbo-latest": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。
"qwen-audio-turbo-2024-12-04": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。
"qwen-audio-turbo-1204": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。
"qwen-audio-turbo-2024-08-07": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。
"qwen-audio-turbo-0807": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。
"qwen-math-plus": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-math-plus-latest": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-math-plus-2024-09-19": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-math-plus-0919": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-math-plus-2024-08-16": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-math-plus-0816": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-math-turbo": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-math-turbo-latest": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-math-turbo-2024-09-19": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-math-turbo-0919": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen-coder-plus": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB},
"qwen-coder-plus-latest": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB},
"qwen-coder-plus-2024-11-06": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB},
"qwen-coder-plus-1106": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB},
"qwen-coder-turbo": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB},
"qwen-coder-turbo-latest": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB},
"qwen-coder-turbo-2024-09-19": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB},
"qwen-coder-turbo-0919": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB},
"qwq-32b-preview": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB},
"qwen2.5-72b-instruct": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen2.5-32b-instruct": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB},
"qwen2.5-14b-instruct": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB},
"qwen2.5-7b-instruct": {Input: 0.001 * ratio.RMB, Output: 0.002 * ratio.RMB},
"qwen2.5-3b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费
"qwen2.5-1.5b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费
"qwen2.5-0.5b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费
"qwen2-72b-instruct": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen2-57b-a14b-instruct": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB},
"qwen2-7b-instruct": {Input: 0.001 * ratio.RMB, Output: 0.002 * ratio.RMB},
"qwen2-1.5b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费
"qwen2-0.5b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费
"qwen1.5-110b-chat": {Input: 0.007 * ratio.RMB, Output: 0.014 * ratio.RMB},
"qwen1.5-72b-chat": {Input: 0.005 * ratio.RMB, Output: 0.01 * ratio.RMB},
"qwen1.5-32b-chat": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB},
"qwen1.5-14b-chat": {Input: 0.002 * ratio.RMB, Output: 0.004 * ratio.RMB},
"qwen1.5-7b-chat": {Input: 0.001 * ratio.RMB, Output: 0.002 * ratio.RMB},
"qwen1.5-1.8b-chat": {Input: 0.1, Output: 0.1}, // 限时免费
"qwen1.5-0.5b-chat": {Input: 0.1, Output: 0.1}, // 限时免费
"qwen-72b-chat": {Input: 0.02 * ratio.RMB, Output: 0.02 * ratio.RMB},
"qwen-14b-chat": {Input: 0.008 * ratio.RMB, Output: 0.008 * ratio.RMB},
"qwen-7b-chat": {Input: 0.006 * ratio.RMB, Output: 0.006 * ratio.RMB},
"qwen-1.8b-chat": {Input: 0.1, Output: 0.1}, // 限时免费
"qwen-1.8b-longcontext-chat": {Input: 0.1, Output: 0.1}, // 限时免费(需申请)
"qwen2-vl-72b-instruct": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。
"qwen2-vl-7b-instruct": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。
"qwen2-vl-2b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费
"qwen-vl-v1": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。
"qwen-vl-chat-v1": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。
"qwen2-audio-instruct": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。
"qwen-audio-chat": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。
"qwen2.5-math-72b-instruct": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen2.5-math-7b-instruct": {Input: 0.001 * ratio.RMB, Output: 0.002 * ratio.RMB},
"qwen2.5-math-1.5b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费
"qwen2-math-72b-instruct": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"qwen2-math-7b-instruct": {Input: 0.001 * ratio.RMB, Output: 0.002 * ratio.RMB},
"qwen2-math-1.5b-instruct": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。
"qwen2.5-coder-32b-instruct": {Input: 0.0035 * ratio.RMB, Output: 0.007 * ratio.RMB},
"qwen2.5-coder-14b-instruct": {Input: 0.002 * ratio.RMB, Output: 0.006 * ratio.RMB},
"qwen2.5-coder-7b-instruct": {Input: 0.001 * ratio.RMB, Output: 0.002 * ratio.RMB},
"qwen2.5-coder-3b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费
"qwen2.5-coder-1.5b-instruct": {Input: 0.1, Output: 0.1}, // 目前仅供免费体验。免费额度用完后不可调用,敬请关注后续动态。
"qwen2.5-coder-0.5b-instruct": {Input: 0.1, Output: 0.1}, // 限时免费
"text-embedding-v3": {Input: 0.0007 * ratio.RMB, Output: 0},
"text-embedding-v2": {Input: 0.0007 * ratio.RMB, Output: 0},
"text-embedding-v1": {Input: 0.0007 * ratio.RMB, Output: 0},
"text-embedding-async-v2": {Input: 0.0007 * ratio.RMB, Output: 0},
"text-embedding-async-v1": {Input: 0.0007 * ratio.RMB, Output: 0},
"ali-stable-diffusion-xl": {Input: 8.00 * ratio.RMB, Output: 0},
"ali-stable-diffusion-v1.5": {Input: 8.00 * ratio.RMB, Output: 0},
"wanx-v1": {Input: 8.00 * ratio.RMB, Output: 0},
}

View File

@ -3,12 +3,13 @@ package ali
import (
"bufio"
"encoding/json"
"github.com/songquanpeng/one-api/common/ctxkey"
"github.com/songquanpeng/one-api/common/render"
"io"
"net/http"
"strings"
"github.com/songquanpeng/one-api/common/ctxkey"
"github.com/songquanpeng/one-api/common/render"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/common"
"github.com/songquanpeng/one-api/common/helper"
@ -119,7 +120,11 @@ func embeddingResponseAli2OpenAI(response *EmbeddingResponse) *openai.EmbeddingR
Object: "list",
Data: make([]openai.EmbeddingResponseItem, 0, len(response.Output.Embeddings)),
Model: "text-embedding-v1",
Usage: model.Usage{TotalTokens: response.Usage.TotalTokens},
Usage: model.Usage{
PromptTokens: response.Usage.InputTokens,
CompletionTokens: response.Usage.OutputTokens,
TotalTokens: response.Usage.TotalTokens,
},
}
for _, item := range response.Output.Embeddings {

View File

@ -9,6 +9,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
)
@ -70,8 +71,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
return adaptor.GetModelListHelper(RatioMap)
}
func (a *Adaptor) GetChannelName() string {

View File

@ -1,12 +1,17 @@
package anthropic
var ModelList = []string{
"claude-instant-1.2", "claude-2.0", "claude-2.1",
"claude-3-haiku-20240307",
"claude-3-5-haiku-20241022",
"claude-3-sonnet-20240229",
"claude-3-opus-20240229",
"claude-3-5-sonnet-20240620",
"claude-3-5-sonnet-20241022",
"claude-3-5-sonnet-latest",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
// https://www.anthropic.com/api#pricing
var RatioMap = map[string]ratio.Ratio{
"claude-instant-1.2": {Input: 0.8 * ratio.MILLI_USD, Output: 2.4 * ratio.MILLI_USD},
"claude-2.0": {Input: 8.0 * ratio.MILLI_USD, Output: 24 * ratio.MILLI_USD},
"claude-2.1": {Input: 8.0 * ratio.MILLI_USD, Output: 24 * ratio.MILLI_USD},
"claude-3-haiku-20240307": {Input: 0.25 * ratio.MILLI_USD, Output: 1.25 * ratio.MILLI_USD},
"claude-3-5-haiku-20241022": {Input: 0.8 * ratio.MILLI_USD, Output: 4 * ratio.MILLI_USD},
"claude-3-sonnet-20240229": {Input: 3 * ratio.MILLI_USD, Output: 15 * ratio.MILLI_USD},
"claude-3-opus-20240229": {Input: 15 * ratio.MILLI_USD, Output: 75 * ratio.MILLI_USD},
"claude-3-5-sonnet-20240620": {Input: 3 * ratio.MILLI_USD, Output: 15 * ratio.MILLI_USD},
"claude-3-5-sonnet-20241022": {Input: 3 * ratio.MILLI_USD, Output: 15 * ratio.MILLI_USD},
"claude-3-5-sonnet-latest": {Input: 3 * ratio.MILLI_USD, Output: 15 * ratio.MILLI_USD},
}

View File

@ -10,7 +10,7 @@ import (
"github.com/aws/aws-sdk-go-v2/service/bedrockruntime"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/aws/utils"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
)
@ -18,8 +18,6 @@ import (
var _ adaptor.Adaptor = new(Adaptor)
type Adaptor struct {
awsAdapter utils.AwsAdapter
Meta *meta.Meta
AwsClient *bedrockruntime.Client
}
@ -42,15 +40,27 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
return nil, errors.New("adaptor not found")
}
a.awsAdapter = adaptor
return adaptor.ConvertRequest(c, relayMode, request)
}
func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) {
if a.awsAdapter == nil {
return nil, utils.WrapErr(errors.New("awsAdapter is nil"))
adaptor := GetAdaptor(meta.ActualModelName)
if adaptor == nil {
return nil, &model.ErrorWithStatusCode{
StatusCode: http.StatusInternalServerError,
Error: model.Error{Message: "adaptor not found"},
}
return a.awsAdapter.DoResponse(c, a.AwsClient, meta)
}
return adaptor.DoResponse(c, a.AwsClient, meta)
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
adaptor := GetAdaptor(meta.ActualModelName)
if adaptor == nil {
return nil
}
return adaptor.GetRatio(meta)
}
func (a *Adaptor) GetModelList() (models []string) {

View File

@ -5,8 +5,10 @@ import (
"github.com/gin-gonic/gin"
"github.com/pkg/errors"
"github.com/songquanpeng/one-api/common/ctxkey"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/anthropic"
"github.com/songquanpeng/one-api/relay/adaptor/aws/utils"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
)
@ -35,3 +37,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, awsCli *bedrockruntime.Client, meta
}
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, anthropic.RatioMap)
}

View File

@ -38,6 +38,8 @@ var AwsModelIDMap = map[string]string{
"claude-3-5-haiku-20241022": "anthropic.claude-3-5-haiku-20241022-v1:0",
}
var RatioMap = anthropic.RatioMap
func awsModelID(requestModel string) (string, error) {
if awsModelID, ok := AwsModelIDMap[requestModel]; ok {
return awsModelID, nil

View File

@ -6,7 +6,9 @@ import (
"github.com/gin-gonic/gin"
"github.com/pkg/errors"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/aws/utils"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
)
@ -35,3 +37,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, awsCli *bedrockruntime.Client, meta
}
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}

View File

@ -22,6 +22,7 @@ import (
"github.com/songquanpeng/one-api/common/logger"
"github.com/songquanpeng/one-api/relay/adaptor/aws/utils"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/billing/ratio"
relaymodel "github.com/songquanpeng/one-api/relay/model"
)
@ -32,6 +33,11 @@ var AwsModelIDMap = map[string]string{
"llama3-70b-8192": "meta.llama3-70b-instruct-v1:0",
}
var RatioMap = map[string]ratio.Ratio{
"llama3-8b-8192": {Input: 0.3 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD},
"llama3-70b-8192": {Input: 2.65 * ratio.MILLI_USD, Output: 3.5 * ratio.MILLI_USD},
}
func awsModelID(requestModel string) (string, error) {
if awsModelID, ok := AwsModelIDMap[requestModel]; ok {
return awsModelID, nil

View File

@ -9,6 +9,7 @@ import (
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/service/bedrockruntime"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
)
@ -16,6 +17,7 @@ import (
type AwsAdapter interface {
ConvertRequest(c *gin.Context, relayMode int, request *model.GeneralOpenAIRequest) (any, error)
DoResponse(c *gin.Context, awsCli *bedrockruntime.Client, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode)
GetRatio(meta *meta.Meta) *ratio.Ratio
}
type Adaptor struct {

View File

@ -1,7 +1,10 @@
package baichuan
var ModelList = []string{
"Baichuan2-Turbo",
"Baichuan2-Turbo-192k",
"Baichuan-Text-Embedding",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
// https://platform.baichuan-ai.com/price
var RatioMap = map[string]ratio.Ratio{
"Baichuan2-Turbo": {Input: 0.008 * ratio.RMB, Output: 0.008 * ratio.RMB},
"Baichuan2-Turbo-192k": {Input: 0.016 * ratio.RMB, Output: 0.016 * ratio.RMB},
"Baichuan-Text-Embedding": {Input: 0.001 * ratio.RMB, Output: 0.001 * ratio.RMB},
}

View File

@ -3,15 +3,16 @@ package baidu
import (
"errors"
"fmt"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/relaymode"
"io"
"net/http"
"strings"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"github.com/songquanpeng/one-api/relay/relaymode"
)
type Adaptor struct {
@ -120,6 +121,10 @@ func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Read
return adaptor.DoRequestHelper(a, c, meta, requestBody)
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}
func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) {
if meta.IsStream {
err, usage = StreamHandler(c, resp)
@ -135,7 +140,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
}
func (a *Adaptor) GetModelList() []string {
return ModelList
return adaptor.GetModelListHelper(RatioMap)
}
func (a *Adaptor) GetChannelName() string {

View File

@ -1,20 +1,38 @@
package baidu
var ModelList = []string{
"ERNIE-4.0-8K",
"ERNIE-3.5-8K",
"ERNIE-3.5-8K-0205",
"ERNIE-3.5-8K-1222",
"ERNIE-Bot-8K",
"ERNIE-3.5-4K-0205",
"ERNIE-Speed-8K",
"ERNIE-Speed-128K",
"ERNIE-Lite-8K-0922",
"ERNIE-Lite-8K-0308",
"ERNIE-Tiny-8K",
"BLOOMZ-7B",
"Embedding-V1",
"bge-large-zh",
"bge-large-en",
"tao-8k",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
// https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
var RatioMap = map[string]ratio.Ratio{
"ERNIE-4.0-Turbo-128K": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB},
"ERNIE-4.0-Turbo-8K": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB},
"ERNIE-4.0-Turbo-8K-Preview": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB},
"ERNIE-4.0-Turbo-8K-0628": {Input: 0.02 * ratio.RMB, Output: 0.06 * ratio.RMB},
"ERNIE-4.0-8K": {Input: 0.03 * ratio.RMB, Output: 0.09 * ratio.RMB},
"ERNIE-4.0-8K-0613": {Input: 0.03 * ratio.RMB, Output: 0.09 * ratio.RMB},
"ERNIE-4.0-8K-Latest": {Input: 0.03 * ratio.RMB, Output: 0.09 * ratio.RMB},
"ERNIE-4.0-8K-Preview": {Input: 0.03 * ratio.RMB, Output: 0.09 * ratio.RMB},
"ERNIE-3.5-128K": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB},
"ERNIE-3.5-8K": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB},
"ERNIE-3.5-8K-0701": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB},
"ERNIE-3.5-8K-Preview": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB},
"ERNIE-3.5-8K-0613": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB},
"ERNIE-Speed-Pro-128K": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB},
"ERNIE-Novel-8K": {Input: 0.04 * ratio.RMB, Output: 0.12 * ratio.RMB},
"ERNIE-Speed-128K": {Input: 0.1, Output: 0.1}, // 免费
"ERNIE-Speed-8K": {Input: 0.1, Output: 0.1}, // 免费
"ERNIE-Lite-8K": {Input: 0.1, Output: 0.1}, // 免费
"ERNIE-Tiny-8K": {Input: 0.1, Output: 0.1}, // 免费
"ERNIE-Functions-8K": {Input: 0.004 * ratio.RMB, Output: 0.008 * ratio.RMB},
"ERNIE-Character-8K": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB},
"ERNIE-Character-Fiction-8K": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB},
"ERNIE-Character-Fiction-8K-Preview ": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB},
"ERNIE-Lite-Pro-128K": {Input: 0.0002 * ratio.RMB, Output: 0.0004 * ratio.RMB},
"Qianfan-Agent-Speed-8K": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB},
"Qianfan-Agent-Lite-8K": {Input: 0.0002 * ratio.RMB, Output: 0.0004 * ratio.RMB},
"BLOOMZ-7B": {Input: 0.004 * ratio.RMB, Output: 0.004 * ratio.RMB},
"Embedding-V1": {Input: 0.0005 * ratio.RMB, Output: 0},
"bge-large-zh": {Input: 0.0005 * ratio.RMB, Output: 0},
"bge-large-en": {Input: 0.0005 * ratio.RMB, Output: 0},
"tao-8k": {Input: 0.0005 * ratio.RMB, Output: 0},
}

View File

@ -9,6 +9,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"github.com/songquanpeng/one-api/relay/relaymode"
@ -91,6 +92,10 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, nil)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
}

View File

@ -8,6 +8,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
)
@ -55,8 +56,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
return adaptor.GetModelListHelper(RatioMap)
}
func (a *Adaptor) GetChannelName() string {

View File

@ -1,14 +1,19 @@
package cohere
var ModelList = []string{
"command", "command-nightly",
"command-light", "command-light-nightly",
"command-r", "command-r-plus",
}
import "github.com/songquanpeng/one-api/relay/billing/ratio"
func init() {
num := len(ModelList)
for i := 0; i < num; i++ {
ModelList = append(ModelList, ModelList[i]+"-internet")
}
// https://cohere.com/pricing
var RatioMap = map[string]ratio.Ratio{
"command": {Input: 1 * ratio.MILLI_USD, Output: 2 * ratio.MILLI_USD},
"command-internet": {Input: 1 * ratio.MILLI_USD, Output: 2 * ratio.MILLI_USD},
"command-nightly": {Input: 1 * ratio.MILLI_USD, Output: 2 * ratio.MILLI_USD},
"command-nightly-internet": {Input: 1 * ratio.MILLI_USD, Output: 2 * ratio.MILLI_USD},
"command-light": {Input: 0.3 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD},
"command-light-internet": {Input: 0.3 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD},
"command-light-nightly": {Input: 0.3 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD},
"command-light-nightly-internet": {Input: 0.3 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD},
"command-r": {Input: 0.15 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD},
"command-r-internet": {Input: 0.15 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD},
"command-r-plus": {Input: 2.5 * ratio.MILLI_USD, Output: 10 * ratio.MILLI_USD},
"command-r-plus-internet": {Input: 2.5 * ratio.MILLI_USD, Output: 10 * ratio.MILLI_USD},
}

View File

@ -5,6 +5,7 @@ import (
"fmt"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/common/client"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"io"
"net/http"
@ -50,3 +51,24 @@ func DoRequest(c *gin.Context, req *http.Request) (*http.Response, error) {
_ = c.Request.Body.Close()
return resp, nil
}
func GetRatioHelper(meta *meta.Meta, ratioMap map[string]ratio.Ratio) *ratio.Ratio {
var result ratio.Ratio
if ratio, ok := ratioMap[meta.OriginModelName]; ok {
result = ratio
return &result
}
if ratio, ok := ratioMap[meta.ActualModelName]; ok {
result = ratio
return &result
}
return nil
}
func GetModelListHelper(ratioMap map[string]ratio.Ratio) []string {
var modelList []string
for model := range ratioMap {
modelList = append(modelList, model)
}
return modelList
}

View File

@ -3,13 +3,15 @@ package coze
import (
"errors"
"fmt"
"io"
"net/http"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"io"
"net/http"
)
type Adaptor struct {
@ -66,8 +68,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
return adaptor.GetModelListHelper(RatioMap)
}
func (a *Adaptor) GetChannelName() string {

View File

@ -1,3 +1,5 @@
package coze
var ModelList = []string{}
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var RatioMap = map[string]ratio.Ratio{}

View File

@ -3,12 +3,14 @@ package deepl
import (
"errors"
"fmt"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"io"
"net/http"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
)
type Adaptor struct {
@ -64,8 +66,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
return adaptor.GetModelListHelper(RatioMap)
}
func (a *Adaptor) GetChannelName() string {

View File

@ -1,9 +1,10 @@
package deepl
// https://developers.deepl.com/docs/api-reference/glossaries
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var ModelList = []string{
"deepl-zh",
"deepl-en",
"deepl-ja",
// https://developers.deepl.com/docs/api-reference/glossaries
var RatioMap = map[string]ratio.Ratio{
"deepl-zh": {Input: 25.0 * ratio.MILLI_USD, Output: 0},
"deepl-en": {Input: 25.0 * ratio.MILLI_USD, Output: 0},
"deepl-ja": {Input: 25.0 * ratio.MILLI_USD, Output: 0},
}

View File

@ -1,6 +1,8 @@
package deepseek
var ModelList = []string{
"deepseek-chat",
"deepseek-coder",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var RatioMap = map[string]ratio.Ratio{
"deepseek-chat": {Input: 1 * ratio.MILLI_RMB, Output: 2 * ratio.MILLI_RMB},
"deepseek-coder": {Input: 1 * ratio.MILLI_RMB, Output: 2 * ratio.MILLI_RMB},
}

View File

@ -1,13 +1,15 @@
package doubao
// https://console.volcengine.com/ark/region:ark+cn-beijing/model
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var ModelList = []string{
"Doubao-pro-128k",
"Doubao-pro-32k",
"Doubao-pro-4k",
"Doubao-lite-128k",
"Doubao-lite-32k",
"Doubao-lite-4k",
"Doubao-embedding",
// https://www.volcengine.com/product/doubao
var RatioMap = map[string]ratio.Ratio{
"Doubao-vision-pro-32k": {Input: 0.0030 * ratio.RMB, Output: 0.0090 * ratio.RMB},
"Doubao-vision-lite-32k": {Input: 0.0015 * ratio.RMB, Output: 0.0045 * ratio.RMB},
"Doubao-pro-256k": {Input: 0.0050 * ratio.RMB, Output: 0.0090 * ratio.RMB},
"Doubao-pro-128k": {Input: 0.0050 * ratio.RMB, Output: 0.0090 * ratio.RMB},
"Doubao-pro-32k": {Input: 0.0008 * ratio.RMB, Output: 0.0020 * ratio.RMB},
"Doubao-lite-128k": {Input: 0.0008 * ratio.RMB, Output: 0.0010 * ratio.RMB},
"Doubao-lite-32k": {Input: 0.0003 * ratio.RMB, Output: 0.0006 * ratio.RMB},
"Doubao-embedding": {Input: 0.0005 * ratio.RMB, Output: 0},
}

View File

@ -11,6 +11,7 @@ import (
"github.com/songquanpeng/one-api/common/helper"
channelhelper "github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"github.com/songquanpeng/one-api/relay/relaymode"
@ -92,8 +93,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return channelhelper.GetRatioHelper(meta, RatioMap)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
return channelhelper.GetModelListHelper(RatioMap)
}
func (a *Adaptor) GetChannelName() string {

View File

@ -1,11 +1,53 @@
package gemini
import "github.com/songquanpeng/one-api/relay/billing/ratio"
// https://ai.google.dev/models/gemini
var ModelList = []string{
"gemini-pro", "gemini-1.0-pro",
"gemini-1.5-flash", "gemini-1.5-pro",
"text-embedding-004", "aqa",
"gemini-2.0-flash-exp",
"gemini-2.0-flash-thinking-exp",
var gemini15FlashRatio = ratio.Ratio{
Input: 0.075 * ratio.MILLI_USD,
Output: 0.30 * ratio.MILLI_USD,
LongThreshold: 128000,
LongInput: 0.15 * ratio.MILLI_USD,
LongOutput: 0.60 * ratio.MILLI_USD,
}
var gemini15ProRatio = ratio.Ratio{
Input: 1.25 * ratio.MILLI_USD,
Output: 5.00 * ratio.MILLI_USD,
LongThreshold: 128000,
LongInput: 2.50 * ratio.MILLI_USD,
LongOutput: 10.00 * ratio.MILLI_USD,
}
var gemini10ProRatio = ratio.Ratio{
Input: 0.50 * ratio.MILLI_USD,
Output: 1.50 * ratio.MILLI_USD,
}
var gemini15Flash8bRatio = ratio.Ratio{
Input: 0.0375 * ratio.MILLI_USD,
Output: 0.15 * ratio.MILLI_USD,
LongThreshold: 128000,
LongInput: 0.075 * ratio.MILLI_USD,
LongOutput: 0.30 * ratio.MILLI_USD,
}
// https://ai.google.dev/pricing
// https://ai.google.dev/gemini-api/docs/models/gemini
// https://cloud.google.com/vertex-ai/generative-ai/pricing?hl=zh-cn#google_models
var RatioMap = map[string]ratio.Ratio{
"gemini-2.0-flash-exp": {Input: 0.1, Output: 0.1}, // currently free of charge
"gemini-2.0-flash-thinking-exp": {Input: 0.1, Output: 0.1}, // currently free of charge
"gemini-1.5-flash": gemini15FlashRatio,
"gemini-1.5-flash-001": gemini15FlashRatio,
"gemini-1.5-flash-002": gemini15FlashRatio,
"gemini-1.5-pro": gemini15ProRatio,
"gemini-1.5-pro-001": gemini15ProRatio,
"gemini-1.5-pro-002": gemini15ProRatio,
"gemini-1.0-pro": gemini10ProRatio,
"gemini-1.0-pro-001": gemini10ProRatio,
"gemini-1.5-flash-8b": gemini15Flash8bRatio,
"gemini-1.5-flash-8b-001": gemini15Flash8bRatio,
"text-embedding-004": {Input: 0.1, Output: 0.1}, // free of charge
}

View File

@ -1,27 +1,18 @@
package groq
// https://console.groq.com/docs/models
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var ModelList = []string{
"gemma-7b-it",
"gemma2-9b-it",
"llama-3.1-70b-versatile",
"llama-3.1-8b-instant",
"llama-3.2-11b-text-preview",
"llama-3.2-11b-vision-preview",
"llama-3.2-1b-preview",
"llama-3.2-3b-preview",
"llama-3.2-11b-vision-preview",
"llama-3.2-90b-text-preview",
"llama-3.2-90b-vision-preview",
"llama-guard-3-8b",
"llama3-70b-8192",
"llama3-8b-8192",
"llama3-groq-70b-8192-tool-use-preview",
"llama3-groq-8b-8192-tool-use-preview",
"llava-v1.5-7b-4096-preview",
"mixtral-8x7b-32768",
"distil-whisper-large-v3-en",
"whisper-large-v3",
"whisper-large-v3-turbo",
// https://groq.com/pricing/
// https://console.groq.com/docs/models
var RatioMap = map[string]ratio.Ratio{
"distil-whisper-large-v3-en": {Input: 0.02 / 3600 * 20 * ratio.USD, Output: 0},
"gemma2-9b-it": {Input: 0.20 * ratio.MILLI_USD, Output: 0.20 * ratio.MILLI_USD},
"llama-3.3-70b-versatile": {Input: 0.59 * ratio.MILLI_USD, Output: 0.79 * ratio.MILLI_USD},
"llama-3.1-8b-instant": {Input: 0.05 * ratio.MILLI_USD, Output: 0.08 * ratio.MILLI_USD},
"llama-guard-3-8b": {Input: 0.20 * ratio.MILLI_USD, Output: 0.20 * ratio.MILLI_USD},
"llama3-70b-8192": {Input: 0.59 * ratio.MILLI_USD, Output: 0.79 * ratio.MILLI_USD},
"llama3-8b-8192": {Input: 0.05 * ratio.MILLI_USD, Output: 0.08 * ratio.MILLI_USD},
"mixtral-8x7b-32768": {Input: 0.24 * ratio.MILLI_USD, Output: 0.24 * ratio.MILLI_USD},
"whisper-large-v3": {Input: 0.111 / 3600 * 20 * ratio.USD, Output: 0},
"whisper-large-v3-turbo": {Input: 0.04 / 3600 * 20 * ratio.USD, Output: 0},
}

View File

@ -2,6 +2,7 @@ package adaptor
import (
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"io"
@ -16,6 +17,7 @@ type Adaptor interface {
ConvertImageRequest(request *model.ImageRequest) (any, error)
DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error)
DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode)
GetRatio(meta *meta.Meta) *ratio.Ratio
GetModelList() []string
GetChannelName() string
}

View File

@ -1,9 +1,9 @@
package lingyiwanwu
// https://platform.lingyiwanwu.com/docs
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var ModelList = []string{
"yi-34b-chat-0205",
"yi-34b-chat-200k",
"yi-vl-plus",
// https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9
var RatioMap = map[string]ratio.Ratio{
"yi-lightning": {Input: 0.99 * ratio.MILLI_RMB, Output: 0.99 * ratio.MILLI_RMB},
"yi-vision-v2": {Input: 6 * ratio.MILLI_RMB, Output: 6 * ratio.MILLI_RMB},
}

View File

@ -1,11 +1,15 @@
package minimax
// https://www.minimaxi.com/document/guides/chat-model/V2?id=65e0736ab2845de20908e2dd
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var ModelList = []string{
"abab6.5-chat",
"abab6.5s-chat",
"abab6-chat",
"abab5.5-chat",
"abab5.5s-chat",
// https://platform.minimaxi.com/document/Price
// https://platform.minimaxi.com/document/ChatCompletion%20v2
var RatioMap = map[string]ratio.Ratio{
"abab7-chat-preview": {Input: 0.01 * ratio.RMB, Output: 0.01 * ratio.RMB},
"abab6.5s-chat": {Input: 0.001 * ratio.RMB, Output: 0.001 * ratio.RMB},
"abab6.5g-chat": {Input: 0.005 * ratio.RMB, Output: 0.005 * ratio.RMB},
"abab6.5t-chat": {Input: 0.005 * ratio.RMB, Output: 0.005 * ratio.RMB},
"abab5.5s-chat": {Input: 0.005 * ratio.RMB, Output: 0.005 * ratio.RMB},
"abab5.5-chat": {Input: 0.015 * ratio.RMB, Output: 0.015 * ratio.RMB},
}

View File

@ -1,10 +1,20 @@
package mistral
var ModelList = []string{
"open-mistral-7b",
"open-mixtral-8x7b",
"mistral-small-latest",
"mistral-medium-latest",
"mistral-large-latest",
"mistral-embed",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
// https://mistral.ai/technology/#pricing
var RatioMap = map[string]ratio.Ratio{
"mistral-large-latest": {Input: 2 * ratio.MILLI_USD, Output: 6 * ratio.MILLI_USD},
"pixtral-large-latest": {Input: 2 * ratio.MILLI_USD, Output: 6 * ratio.MILLI_USD},
"mistral-small-latest": {Input: 0.2 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD},
"codestral-latest": {Input: 0.2 * ratio.MILLI_USD, Output: 0.6 * ratio.MILLI_USD},
"ministral-8b-latest": {Input: 0.1 * ratio.MILLI_USD, Output: 0.1 * ratio.MILLI_USD},
"ministral-3b-latest": {Input: 0.04 * ratio.MILLI_USD, Output: 0.04 * ratio.MILLI_USD},
"mistral-embed": {Input: 0.1 * ratio.MILLI_USD, Output: 0},
"mistral-moderation-latest": {Input: 0.1 * ratio.MILLI_USD, Output: 0},
"pixtral-12b": {Input: 0.15 * ratio.MILLI_USD, Output: 0.15 * ratio.MILLI_USD},
"mistral-nemo": {Input: 0.15 * ratio.MILLI_USD, Output: 0.15 * ratio.MILLI_USD},
"open-mistral-7b": {Input: 0.25 * ratio.MILLI_USD, Output: 0.25 * ratio.MILLI_USD},
"open-mixtral-8x7b": {Input: 0.7 * ratio.MILLI_USD, Output: 0.7 * ratio.MILLI_USD},
"open-mixtral-8x22b": {Input: 2 * ratio.MILLI_USD, Output: 6 * ratio.MILLI_USD},
}

View File

@ -1,7 +1,10 @@
package moonshot
var ModelList = []string{
"moonshot-v1-8k",
"moonshot-v1-32k",
"moonshot-v1-128k",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
// https://platform.moonshot.cn/docs/pricing/chat#%E4%BA%A7%E5%93%81%E5%AE%9A%E4%BB%B7
var RatioMap = map[string]ratio.Ratio{
"moonshot-v1-8k": {Input: 12 * ratio.MILLI_RMB, Output: 12 * ratio.MILLI_RMB},
"moonshot-v1-32k": {Input: 24 * ratio.MILLI_RMB, Output: 24 * ratio.MILLI_RMB},
"moonshot-v1-128k": {Input: 60 * ratio.MILLI_RMB, Output: 60 * ratio.MILLI_RMB},
}

View File

@ -1,19 +1,38 @@
package novita
import "github.com/songquanpeng/one-api/relay/billing/ratio"
// https://novita.ai/llm-api
var ModelList = []string{
"meta-llama/llama-3-8b-instruct",
"meta-llama/llama-3-70b-instruct",
"nousresearch/hermes-2-pro-llama-3-8b",
"nousresearch/nous-hermes-llama2-13b",
"mistralai/mistral-7b-instruct",
"cognitivecomputations/dolphin-mixtral-8x22b",
"sao10k/l3-70b-euryale-v2.1",
"sophosympatheia/midnight-rose-70b",
"gryphe/mythomax-l2-13b",
"Nous-Hermes-2-Mixtral-8x7B-DPO",
"lzlv_70b",
"teknium/openhermes-2.5-mistral-7b",
"microsoft/wizardlm-2-8x22b",
var RatioMap = map[string]ratio.Ratio{
"meta-llama/llama-3.3-70b-instruct": {Input: 0.39 * ratio.MILLI_USD, Output: 0.39 * ratio.MILLI_USD},
"meta-llama/llama-3.1-8b-instruct": {Input: 0.05 * ratio.MILLI_USD, Output: 0.05 * ratio.MILLI_USD},
"meta-llama/llama-3.1-8b-instruct-max": {Input: 0.05 * ratio.MILLI_USD, Output: 0.05 * ratio.MILLI_USD},
"meta-llama/llama-3.1-70b-instruct": {Input: 0.34 * ratio.MILLI_USD, Output: 0.39 * ratio.MILLI_USD},
"meta-llama/llama-3-8b-instruct": {Input: 0.04 * ratio.MILLI_USD, Output: 0.04 * ratio.MILLI_USD},
"meta-llama/llama-3-70b-instruct": {Input: 0.51 * ratio.MILLI_USD, Output: 0.74 * ratio.MILLI_USD},
"gryphe/mythomax-l2-13b": {Input: 0.09 * ratio.MILLI_USD, Output: 0.09 * ratio.MILLI_USD},
"google/gemma-2-9b-it": {Input: 0.08 * ratio.MILLI_USD, Output: 0.08 * ratio.MILLI_USD},
"mistralai/mistral-nemo": {Input: 0.17 * ratio.MILLI_USD, Output: 0.17 * ratio.MILLI_USD},
"microsoft/wizardlm-2-8x22b": {Input: 0.62 * ratio.MILLI_USD, Output: 0.62 * ratio.MILLI_USD},
"mistralai/mistral-7b-instruct": {Input: 0.059 * ratio.MILLI_USD, Output: 0.059 * ratio.MILLI_USD},
"openchat/openchat-7b": {Input: 0.06 * ratio.MILLI_USD, Output: 0.06 * ratio.MILLI_USD},
"nousresearch/hermes-2-pro-llama-3-8b": {Input: 0.14 * ratio.MILLI_USD, Output: 0.14 * ratio.MILLI_USD},
"sao10k/l3-70b-euryale-v2.1": {Input: 1.48 * ratio.MILLI_USD, Output: 1.48 * ratio.MILLI_USD},
"cognitivecomputations/dolphin-mixtral-8x22b": {Input: 0.9 * ratio.MILLI_USD, Output: 0.9 * ratio.MILLI_USD},
"jondurbin/airoboros-l2-70b": {Input: 0.5 * ratio.MILLI_USD, Output: 0.5 * ratio.MILLI_USD},
"nousresearch/nous-hermes-llama2-13b": {Input: 0.17 * ratio.MILLI_USD, Output: 0.17 * ratio.MILLI_USD},
"teknium/openhermes-2.5-mistral-7b": {Input: 0.17 * ratio.MILLI_USD, Output: 0.17 * ratio.MILLI_USD},
"sophosympatheia/midnight-rose-70b": {Input: 0.8 * ratio.MILLI_USD, Output: 0.8 * ratio.MILLI_USD},
"Sao10K/L3-8B-Stheno-v3.2": {Input: 0.05 * ratio.MILLI_USD, Output: 0.05 * ratio.MILLI_USD},
"sao10k/l3-8b-lunaris": {Input: 0.05 * ratio.MILLI_USD, Output: 0.05 * ratio.MILLI_USD},
"qwen/qwen-2-vl-72b-instruct": {Input: 0.45 * ratio.MILLI_USD, Output: 0.45 * ratio.MILLI_USD},
"meta-llama/llama-3.2-1b-instruct": {Input: 0.02 * ratio.MILLI_USD, Output: 0.02 * ratio.MILLI_USD},
"meta-llama/llama-3.2-11b-vision-instruct": {Input: 0.06 * ratio.MILLI_USD, Output: 0.06 * ratio.MILLI_USD},
"meta-llama/llama-3.2-3b-instruct": {Input: 0.03 * ratio.MILLI_USD, Output: 0.05 * ratio.MILLI_USD},
"meta-llama/llama-3.1-8b-instruct-bf16": {Input: 0.06 * ratio.MILLI_USD, Output: 0.06 * ratio.MILLI_USD},
"qwen/qwen-2.5-72b-instruct": {Input: 0.38 * ratio.MILLI_USD, Output: 0.4 * ratio.MILLI_USD},
"sao10k/l31-70b-euryale-v2.2": {Input: 1.48 * ratio.MILLI_USD, Output: 1.48 * ratio.MILLI_USD},
"qwen/qwen-2-7b-instruct": {Input: 0.054 * ratio.MILLI_USD, Output: 0.054 * ratio.MILLI_USD},
"qwen/qwen-2-72b-instruct": {Input: 0.34 * ratio.MILLI_USD, Output: 0.39 * ratio.MILLI_USD},
}

View File

@ -3,11 +3,13 @@ package ollama
import (
"errors"
"fmt"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/relaymode"
"io"
"net/http"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/relaymode"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/model"
@ -73,8 +75,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
return adaptor.GetModelListHelper(RatioMap)
}
func (a *Adaptor) GetChannelName() string {

View File

@ -1,11 +1,13 @@
package ollama
var ModelList = []string{
"codellama:7b-instruct",
"llama2:7b",
"llama2:latest",
"llama3:latest",
"phi3:latest",
"qwen:0.5b-chat",
"qwen:7b",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var RatioMap = map[string]ratio.Ratio{
"codellama:7b-instruct": {Input: 0, Output: 0},
"llama2:7b": {Input: 0, Output: 0},
"llama2:latest": {Input: 0, Output: 0},
"llama3:latest": {Input: 0, Output: 0},
"phi3:latest": {Input: 0, Output: 0},
"qwen:0.5b-chat": {Input: 0, Output: 0},
"qwen:7b": {Input: 0, Output: 0},
}

View File

@ -12,6 +12,7 @@ import (
"github.com/songquanpeng/one-api/relay/adaptor/doubao"
"github.com/songquanpeng/one-api/relay/adaptor/minimax"
"github.com/songquanpeng/one-api/relay/adaptor/novita"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/channeltype"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
@ -118,9 +119,14 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
_, ratioMap := GetCompatibleChannelMeta(a.ChannelType)
return adaptor.GetRatioHelper(meta, ratioMap)
}
func (a *Adaptor) GetModelList() []string {
_, modelList := GetCompatibleChannelMeta(a.ChannelType)
return modelList
_, ratioMap := GetCompatibleChannelMeta(a.ChannelType)
return adaptor.GetModelListHelper(ratioMap)
}
func (a *Adaptor) GetChannelName() string {

View File

@ -15,6 +15,7 @@ import (
"github.com/songquanpeng/one-api/relay/adaptor/stepfun"
"github.com/songquanpeng/one-api/relay/adaptor/togetherai"
"github.com/songquanpeng/one-api/relay/adaptor/xai"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/channeltype"
)
@ -36,39 +37,39 @@ var CompatibleChannels = []int{
channeltype.XAI,
}
func GetCompatibleChannelMeta(channelType int) (string, []string) {
func GetCompatibleChannelMeta(channelType int) (string, map[string]ratio.Ratio) {
switch channelType {
case channeltype.Azure:
return "azure", ModelList
return "azure", RatioMap
case channeltype.AI360:
return "360", ai360.ModelList
return "360", ai360.RatioMap
case channeltype.Moonshot:
return "moonshot", moonshot.ModelList
return "moonshot", moonshot.RatioMap
case channeltype.Baichuan:
return "baichuan", baichuan.ModelList
return "baichuan", baichuan.RatioMap
case channeltype.Minimax:
return "minimax", minimax.ModelList
return "minimax", minimax.RatioMap
case channeltype.Mistral:
return "mistralai", mistral.ModelList
return "mistralai", mistral.RatioMap
case channeltype.Groq:
return "groq", groq.ModelList
return "groq", groq.RatioMap
case channeltype.LingYiWanWu:
return "lingyiwanwu", lingyiwanwu.ModelList
return "lingyiwanwu", lingyiwanwu.RatioMap
case channeltype.StepFun:
return "stepfun", stepfun.ModelList
return "stepfun", stepfun.RatioMap
case channeltype.DeepSeek:
return "deepseek", deepseek.ModelList
return "deepseek", deepseek.RatioMap
case channeltype.TogetherAI:
return "together.ai", togetherai.ModelList
return "together.ai", togetherai.RatioMap
case channeltype.Doubao:
return "doubao", doubao.ModelList
return "doubao", doubao.RatioMap
case channeltype.Novita:
return "novita", novita.ModelList
return "novita", novita.RatioMap
case channeltype.SiliconFlow:
return "siliconflow", siliconflow.ModelList
return "siliconflow", siliconflow.RatioMap
case channeltype.XAI:
return "xai", xai.ModelList
return "xai", xai.RatioMap
default:
return "openai", ModelList
return "openai", RatioMap
}
}

View File

@ -1,27 +1,61 @@
package openai
var ModelList = []string{
"gpt-3.5-turbo", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-1106", "gpt-3.5-turbo-0125",
"gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613",
"gpt-3.5-turbo-instruct",
"gpt-4", "gpt-4-0314", "gpt-4-0613", "gpt-4-1106-preview", "gpt-4-0125-preview",
"gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613",
"gpt-4-turbo-preview", "gpt-4-turbo", "gpt-4-turbo-2024-04-09",
"gpt-4o", "gpt-4o-2024-05-13",
"gpt-4o-2024-08-06",
"gpt-4o-2024-11-20",
"chatgpt-4o-latest",
"gpt-4o-mini", "gpt-4o-mini-2024-07-18",
"gpt-4-vision-preview",
"text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large",
"text-curie-001", "text-babbage-001", "text-ada-001", "text-davinci-002", "text-davinci-003",
"text-moderation-latest", "text-moderation-stable",
"text-davinci-edit-001",
"davinci-002", "babbage-002",
"dall-e-2", "dall-e-3",
"whisper-1",
"tts-1", "tts-1-1106", "tts-1-hd", "tts-1-hd-1106",
"o1", "o1-2024-12-17",
"o1-preview", "o1-preview-2024-09-12",
"o1-mini", "o1-mini-2024-09-12",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var RatioMap = map[string]ratio.Ratio{
"gpt-4": {Input: 15, Output: 30},
"gpt-4-0314": {Input: 15, Output: 30},
"gpt-4-0613": {Input: 15, Output: 30},
"gpt-4-32k": {Input: 30, Output: 60},
"gpt-4-32k-0314": {Input: 30, Output: 60},
"gpt-4-32k-0613": {Input: 30, Output: 60},
"gpt-4-1106-preview": {Input: 5, Output: 15},
"gpt-4-0125-preview": {Input: 5, Output: 15},
"gpt-4-turbo-preview": {Input: 5, Output: 15}, // $0.01 / 1K tokens
"gpt-4-turbo": {Input: 5, Output: 15}, // $0.01 / 1K tokens
"gpt-4-turbo-2024-04-09": {Input: 5, Output: 15}, // $0.01 / 1K tokens
"gpt-4o": {Input: 1.25, Output: 5}, // $0.005 / 1K tokens
"chatgpt-4o-latest": {Input: 2.5, Output: 7.5}, // $0.005 / 1K tokens
"gpt-4o-2024-05-13": {Input: 2.5, Output: 7.5}, // $0.005 / 1K tokens
"gpt-4o-2024-08-06": {Input: 1.25, Output: 5}, // $0.0025 / 1K tokens
"gpt-4o-2024-11-20": {Input: 1.25, Output: 5}, // $0.0025 / 1K tokens
"gpt-4o-mini": {Input: 0.075, Output: 0.3}, // $0.00015 / 1K tokens
"gpt-4o-mini-2024-07-18": {Input: 0.075, Output: 0.3}, // $0.00015 / 1K tokens
"gpt-4-vision-preview": {Input: 5, Output: 15}, // $0.01 / 1K tokens
"gpt-3.5-turbo": {Input: 0.25, Output: 0.75}, // $0.0005 / 1K tokens
"gpt-3.5-turbo-0301": {Input: 0.75, Output: 1}, // $0.0015 / 1K tokens
"gpt-3.5-turbo-0613": {Input: 0.75, Output: 1}, // $0.0015 / 1K tokens
"gpt-3.5-turbo-16k": {Input: 1.5, Output: 2}, // $0.003 / 1K tokens
"gpt-3.5-turbo-16k-0613": {Input: 1.5, Output: 2}, // $0.003 / 1K tokens
"gpt-3.5-turbo-instruct": {Input: 0.75, Output: 1}, // $0.0015 / 1K tokens
"gpt-3.5-turbo-1106": {Input: 0.5, Output: 1}, // $0.001 / 1K tokens
"gpt-3.5-turbo-0125": {Input: 0.25, Output: 0.75}, // $0.0005 / 1K tokens
"davinci-002": {Input: 1, Output: 1}, // $0.002 / 1K tokens
"babbage-002": {Input: 0.2, Output: 0.2}, // $0.0004 / 1K tokens
"text-ada-001": {Input: 0.2, Output: 0.2}, // $0.0004 / 1K tokens
"text-babbage-001": {Input: 0.25, Output: 0.25}, // $0.0005 / 1K tokens
"text-curie-001": {Input: 1, Output: 1}, // $0.002 / 1K tokens
"text-davinci-002": {Input: 10, Output: 10}, // $0.02 / 1K tokens
"text-davinci-003": {Input: 10, Output: 10}, // $0.02 / 1K tokens
"text-davinci-edit-001": {Input: 10, Output: 10}, // $0.02 / 1K tokens
"code-davinci-edit-001": {Input: 10, Output: 10}, // $0.02 / 1K tokens
"whisper-1": {Input: 1, Output: 1}, // $0.006 / minute -> $0.002 / 20 seconds -> $0.002 / 1K tokens -> 20 seconds / 1K tokens
"tts-1": {Input: 7.5, Output: 7.5}, // $0.015 / 1K characters
"tts-1-1106": {Input: 7.5, Output: 7.5}, // $0.015 / 1K characters
"tts-1-hd": {Input: 15, Output: 15}, // $0.030 / 1K characters
"tts-1-hd-1106": {Input: 15, Output: 15}, // $0.030 / 1K characters
"davinci": {Input: 10, Output: 10}, // $0.02 / 1K tokens
"curie": {Input: 10, Output: 10}, // $0.02 / 1K tokens
"babbage": {Input: 10, Output: 10}, // $0.02 / 1K tokens
"ada": {Input: 10, Output: 10}, // $0.02 / 1K tokens
"text-embedding-ada-002": {Input: 0.05, Output: 0}, // $0.001 / 1K tokens
"text-embedding-3-small": {Input: 0.01, Output: 0}, // $0.0002 / 1K tokens
"text-embedding-3-large": {Input: 0.065, Output: 0}, // $0.0013 / 1K tokens
"text-search-ada-doc-001": {Input: 10, Output: 0}, // $0.02 / 1K tokens
"text-moderation-stable": {Input: 0.1, Output: 0}, // currently free to use
"text-moderation-latest": {Input: 0.1, Output: 0}, // currently free to use
"omni-moderation-latest": {Input: 0.1, Output: 0}, // currently free to use
"omni-moderation-2024-09-26": {Input: 0.1, Output: 0}, // currently free to use
"dall-e-2": {Input: 0.02 * ratio.USD, Output: 0}, // $0.016 - $0.020 / image
"dall-e-3": {Input: 0.04 * ratio.USD, Output: 0}, // $0.040 - $0.120 / image
}

View File

@ -3,14 +3,14 @@ package openai
import (
"errors"
"fmt"
"math"
"strings"
"github.com/pkoukk/tiktoken-go"
"github.com/songquanpeng/one-api/common/config"
"github.com/songquanpeng/one-api/common/image"
"github.com/songquanpeng/one-api/common/logger"
billingratio "github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/model"
"math"
"strings"
)
// tokenEncoderMap won't grow after initialization
@ -32,7 +32,7 @@ func InitTokenEncoders() {
if err != nil {
logger.FatalLog(fmt.Sprintf("failed to get gpt-4 token encoder: %s", err.Error()))
}
for model := range billingratio.ModelRatio {
for model := range RatioMap {
if strings.HasPrefix(model, "gpt-3.5") {
tokenEncoderMap[model] = gpt35TokenEncoder
} else if strings.HasPrefix(model, "gpt-4o") {

View File

@ -3,13 +3,15 @@ package palm
import (
"errors"
"fmt"
"io"
"net/http"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"io"
"net/http"
)
type Adaptor struct {
@ -58,8 +60,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
return adaptor.GetModelListHelper(RatioMap)
}
func (a *Adaptor) GetChannelName() string {

View File

@ -1,5 +1,7 @@
package palm
var ModelList = []string{
"PaLM-2",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var RatioMap = map[string]ratio.Ratio{
"PaLM-2": {Input: 0, Output: 0},
}

View File

@ -10,6 +10,7 @@ import (
"github.com/pkg/errors"
"github.com/songquanpeng/one-api/relay/adaptor"
channelhelper "github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
relaymodel "github.com/songquanpeng/one-api/relay/model"
@ -48,6 +49,10 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
return nil, nil
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, nil)
}
func (a *Adaptor) GetModelList() (models []string) {
return nil
}

View File

@ -4,7 +4,6 @@ import (
"fmt"
"io"
"net/http"
"slices"
"strings"
"time"
@ -13,6 +12,7 @@ import (
"github.com/songquanpeng/one-api/common/logger"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"github.com/songquanpeng/one-api/relay/relaymode"
@ -96,7 +96,7 @@ func (a *Adaptor) Init(meta *meta.Meta) {
}
func (a *Adaptor) GetRequestURL(meta *meta.Meta) (string, error) {
if !slices.Contains(ModelList, meta.OriginModelName) {
if _, ok := RatioMap[meta.OriginModelName]; !ok {
return "", errors.Errorf("model %s not supported", meta.OriginModelName)
}
@ -127,8 +127,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
return adaptor.GetModelListHelper(RatioMap)
}
func (a *Adaptor) GetChannelName() string {

View File

@ -1,58 +1,49 @@
package replicate
// ModelList is a list of models that can be used with Replicate.
//
import "github.com/songquanpeng/one-api/relay/billing/ratio"
// https://replicate.com/pricing
var ModelList = []string{
// -------------------------------------
// image model
// -------------------------------------
"black-forest-labs/flux-1.1-pro",
"black-forest-labs/flux-1.1-pro-ultra",
"black-forest-labs/flux-canny-dev",
"black-forest-labs/flux-canny-pro",
"black-forest-labs/flux-depth-dev",
"black-forest-labs/flux-depth-pro",
"black-forest-labs/flux-dev",
"black-forest-labs/flux-dev-lora",
"black-forest-labs/flux-fill-dev",
"black-forest-labs/flux-fill-pro",
"black-forest-labs/flux-pro",
"black-forest-labs/flux-redux-dev",
"black-forest-labs/flux-redux-schnell",
"black-forest-labs/flux-schnell",
"black-forest-labs/flux-schnell-lora",
"ideogram-ai/ideogram-v2",
"ideogram-ai/ideogram-v2-turbo",
"recraft-ai/recraft-v3",
"recraft-ai/recraft-v3-svg",
"stability-ai/stable-diffusion-3",
"stability-ai/stable-diffusion-3.5-large",
"stability-ai/stable-diffusion-3.5-large-turbo",
"stability-ai/stable-diffusion-3.5-medium",
// -------------------------------------
// language model
// -------------------------------------
"ibm-granite/granite-20b-code-instruct-8k",
"ibm-granite/granite-3.0-2b-instruct",
"ibm-granite/granite-3.0-8b-instruct",
"ibm-granite/granite-8b-code-instruct-128k",
"meta/llama-2-13b",
"meta/llama-2-13b-chat",
"meta/llama-2-70b",
"meta/llama-2-70b-chat",
"meta/llama-2-7b",
"meta/llama-2-7b-chat",
"meta/meta-llama-3.1-405b-instruct",
"meta/meta-llama-3-70b",
"meta/meta-llama-3-70b-instruct",
"meta/meta-llama-3-8b",
"meta/meta-llama-3-8b-instruct",
"mistralai/mistral-7b-instruct-v0.2",
"mistralai/mistral-7b-v0.1",
"mistralai/mixtral-8x7b-instruct-v0.1",
// -------------------------------------
// video model
// -------------------------------------
// "minimax/video-01", // TODO: implement the adaptor
var RatioMap = map[string]ratio.Ratio{
"black-forest-labs/flux-1.1-pro": {Input: 0.04 * ratio.USD, Output: 0},
"black-forest-labs/flux-1.1-pro-ultra": {Input: 0.06 * ratio.USD, Output: 0},
"black-forest-labs/flux-canny-dev": {Input: 0.025 * ratio.USD, Output: 0},
"black-forest-labs/flux-canny-pro": {Input: 0.05 * ratio.USD, Output: 0},
"black-forest-labs/flux-depth-dev": {Input: 0.025 * ratio.USD, Output: 0},
"black-forest-labs/flux-depth-pro": {Input: 0.05 * ratio.USD, Output: 0},
"black-forest-labs/flux-dev": {Input: 0.025 * ratio.USD, Output: 0},
"black-forest-labs/flux-dev-lora": {Input: 0.032 * ratio.USD, Output: 0},
"black-forest-labs/flux-fill-dev": {Input: 0.04 * ratio.USD, Output: 0},
"black-forest-labs/flux-fill-pro": {Input: 0.05 * ratio.USD, Output: 0},
"black-forest-labs/flux-pro": {Input: 0.055 * ratio.USD, Output: 0},
"black-forest-labs/flux-redux-dev": {Input: 0.025 * ratio.USD, Output: 0},
"black-forest-labs/flux-redux-schnell": {Input: 0.003 * ratio.USD, Output: 0},
"black-forest-labs/flux-schnell": {Input: 0.003 * ratio.USD, Output: 0},
"black-forest-labs/flux-schnell-lora": {Input: 0.02 * ratio.USD, Output: 0},
"ideogram-ai/ideogram-v2": {Input: 0.08 * ratio.USD, Output: 0},
"ideogram-ai/ideogram-v2-turbo": {Input: 0.05 * ratio.USD, Output: 0},
"recraft-ai/recraft-v3": {Input: 0.04 * ratio.USD, Output: 0},
"recraft-ai/recraft-v3-svg": {Input: 0.08 * ratio.USD, Output: 0},
"stability-ai/stable-diffusion-3": {Input: 0.035 * ratio.USD, Output: 0},
"stability-ai/stable-diffusion-3.5-large": {Input: 0.065 * ratio.USD, Output: 0},
"stability-ai/stable-diffusion-3.5-large-turbo": {Input: 0.04 * ratio.USD, Output: 0},
"stability-ai/stable-diffusion-3.5-medium": {Input: 0.035 * ratio.USD, Output: 0},
// replicate chat models
"ibm-granite/granite-20b-code-instruct-8k": {Input: 0.100 * ratio.MILLI_USD, Output: 0.500 * ratio.MILLI_USD},
"ibm-granite/granite-3.0-2b-instruct": {Input: 0.030 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD},
"ibm-granite/granite-3.0-8b-instruct": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD},
"ibm-granite/granite-8b-code-instruct-128k": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD},
"meta/llama-2-13b": {Input: 0.100 * ratio.MILLI_USD, Output: 0.500 * ratio.MILLI_USD},
"meta/llama-2-13b-chat": {Input: 0.100 * ratio.MILLI_USD, Output: 0.500 * ratio.MILLI_USD},
"meta/llama-2-70b": {Input: 0.650 * ratio.MILLI_USD, Output: 2.750 * ratio.MILLI_USD},
"meta/llama-2-70b-chat": {Input: 0.650 * ratio.MILLI_USD, Output: 2.750 * ratio.MILLI_USD},
"meta/llama-2-7b": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD},
"meta/llama-2-7b-chat": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD},
"meta/meta-llama-3.1-405b-instruct": {Input: 9.500 * ratio.MILLI_USD, Output: 9.500 * ratio.MILLI_USD},
"meta/meta-llama-3-70b": {Input: 0.650 * ratio.MILLI_USD, Output: 2.750 * ratio.MILLI_USD},
"meta/meta-llama-3-70b-instruct": {Input: 0.650 * ratio.MILLI_USD, Output: 2.750 * ratio.MILLI_USD},
"meta/meta-llama-3-8b": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD},
"meta/meta-llama-3-8b-instruct": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD},
"mistralai/mistral-7b-instruct-v0.2": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD},
"mistralai/mistral-7b-v0.1": {Input: 0.050 * ratio.MILLI_USD, Output: 0.250 * ratio.MILLI_USD},
"mistralai/mixtral-8x7b-instruct-v0.1": {Input: 0.300 * ratio.MILLI_USD, Output: 1.000 * ratio.MILLI_USD},
}

View File

@ -1,36 +1,40 @@
package siliconflow
// https://docs.siliconflow.cn/docs/getting-started
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var ModelList = []string{
"deepseek-ai/deepseek-llm-67b-chat",
"Qwen/Qwen1.5-14B-Chat",
"Qwen/Qwen1.5-7B-Chat",
"Qwen/Qwen1.5-110B-Chat",
"Qwen/Qwen1.5-32B-Chat",
"01-ai/Yi-1.5-6B-Chat",
"01-ai/Yi-1.5-9B-Chat-16K",
"01-ai/Yi-1.5-34B-Chat-16K",
"THUDM/chatglm3-6b",
"deepseek-ai/DeepSeek-V2-Chat",
"THUDM/glm-4-9b-chat",
"Qwen/Qwen2-72B-Instruct",
"Qwen/Qwen2-7B-Instruct",
"Qwen/Qwen2-57B-A14B-Instruct",
"deepseek-ai/DeepSeek-Coder-V2-Instruct",
"Qwen/Qwen2-1.5B-Instruct",
"internlm/internlm2_5-7b-chat",
"BAAI/bge-large-en-v1.5",
"BAAI/bge-large-zh-v1.5",
"Pro/Qwen/Qwen2-7B-Instruct",
"Pro/Qwen/Qwen2-1.5B-Instruct",
"Pro/Qwen/Qwen1.5-7B-Chat",
"Pro/THUDM/glm-4-9b-chat",
"Pro/THUDM/chatglm3-6b",
"Pro/01-ai/Yi-1.5-9B-Chat-16K",
"Pro/01-ai/Yi-1.5-6B-Chat",
"Pro/google/gemma-2-9b-it",
"Pro/internlm/internlm2_5-7b-chat",
"Pro/meta-llama/Meta-Llama-3-8B-Instruct",
"Pro/mistralai/Mistral-7B-Instruct-v0.2",
// https://siliconflow.cn/zh-cn/models
// https://siliconflow.cn/zh-cn/pricing
var RatioMap = map[string]ratio.Ratio{
"Qwen/Qwen2.5-72B-Instruct": {Input: 41.3 * ratio.MILLI_RMB, Output: 41.3 * ratio.MILLI_RMB},
"Qwen/Qwen2.5-7B-Instruct": {Input: 3.5 * ratio.MILLI_RMB, Output: 3.5 * ratio.MILLI_RMB},
"deepseek-ai/deepseek-llm-67b-chat": {Input: 0, Output: 0},
"Qwen/Qwen1.5-14B-Chat": {Input: 0, Output: 0},
"Qwen/Qwen1.5-7B-Chat": {Input: 0, Output: 0},
"Qwen/Qwen1.5-110B-Chat": {Input: 0, Output: 0},
"Qwen/Qwen1.5-32B-Chat": {Input: 0, Output: 0},
"01-ai/Yi-1.5-6B-Chat": {Input: 0, Output: 0},
"01-ai/Yi-1.5-9B-Chat-16K": {Input: 0, Output: 0},
"01-ai/Yi-1.5-34B-Chat-16K": {Input: 0, Output: 0},
"THUDM/chatglm3-6b": {Input: 0, Output: 0},
"deepseek-ai/DeepSeek-V2-Chat": {Input: 0, Output: 0},
"THUDM/glm-4-9b-chat": {Input: 0, Output: 0},
"Qwen/Qwen2-72B-Instruct": {Input: 0, Output: 0},
"Qwen/Qwen2-7B-Instruct": {Input: 0, Output: 0},
"Qwen/Qwen2-57B-A14B-Instruct": {Input: 0, Output: 0},
"deepseek-ai/DeepSeek-Coder-V2-Instruct": {Input: 0, Output: 0},
"Qwen/Qwen2-1.5B-Instruct": {Input: 0, Output: 0},
"internlm/internlm2_5-7b-chat": {Input: 0, Output: 0},
"BAAI/bge-large-en-v1.5": {Input: 0, Output: 0},
"BAAI/bge-large-zh-v1.5": {Input: 0, Output: 0},
"Pro/Qwen/Qwen2-7B-Instruct": {Input: 0, Output: 0},
"Pro/Qwen/Qwen2-1.5B-Instruct": {Input: 0, Output: 0},
"Pro/Qwen/Qwen1.5-7B-Chat": {Input: 0, Output: 0},
"Pro/THUDM/glm-4-9b-chat": {Input: 0, Output: 0},
"Pro/THUDM/chatglm3-6b": {Input: 0, Output: 0},
"Pro/01-ai/Yi-1.5-9B-Chat-16K": {Input: 0, Output: 0},
"Pro/01-ai/Yi-1.5-6B-Chat": {Input: 0, Output: 0},
"Pro/google/gemma-2-9b-it": {Input: 0, Output: 0},
"Pro/internlm/internlm2_5-7b-chat": {Input: 0, Output: 0},
"Pro/meta-llama/Meta-Llama-3-8B-Instruct": {Input: 0, Output: 0},
"Pro/mistralai/Mistral-7B-Instruct-v0.2": {Input: 0, Output: 0},
}

View File

@ -1,13 +1,17 @@
package stepfun
var ModelList = []string{
"step-1-8k",
"step-1-32k",
"step-1-128k",
"step-1-256k",
"step-1-flash",
"step-2-16k",
"step-1v-8k",
"step-1v-32k",
"step-1x-medium",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
// https://platform.stepfun.com/docs/pricing/details
var RatioMap = map[string]ratio.Ratio{
"step-1-8k": {Input: 5 * ratio.MILLI_RMB, Output: 20 * ratio.MILLI_RMB},
"step-1-32k": {Input: 15 * ratio.MILLI_RMB, Output: 70 * ratio.MILLI_RMB},
"step-1-128k": {Input: 40 * ratio.MILLI_RMB, Output: 200 * ratio.MILLI_RMB},
"step-1-256k": {Input: 95 * ratio.MILLI_RMB, Output: 300 * ratio.MILLI_RMB},
"step-1-flash": {Input: 1 * ratio.MILLI_RMB, Output: 4 * ratio.MILLI_RMB},
"step-2-16k": {Input: 38 * ratio.MILLI_RMB, Output: 120 * ratio.MILLI_RMB},
"step-1v-8k": {Input: 5 * ratio.MILLI_RMB, Output: 20 * ratio.MILLI_RMB},
"step-1v-32k": {Input: 15 * ratio.MILLI_RMB, Output: 70 * ratio.MILLI_RMB},
"step-1.5v-mini": {Input: 8 * ratio.MILLI_RMB, Output: 35 * ratio.MILLI_RMB},
"step-1x-medium": {Input: 0.1 * ratio.RMB, Output: 0},
}

View File

@ -2,16 +2,18 @@ package tencent
import (
"errors"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/common/helper"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"io"
"net/http"
"strconv"
"strings"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/common/helper"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
)
// https://cloud.tencent.com/document/api/1729/101837
@ -80,8 +82,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
return adaptor.GetModelListHelper(RatioMap)
}
func (a *Adaptor) GetChannelName() string {

View File

@ -1,9 +1,19 @@
package tencent
var ModelList = []string{
"hunyuan-lite",
"hunyuan-standard",
"hunyuan-standard-256K",
"hunyuan-pro",
"hunyuan-vision",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
// https://cloud.tencent.com/document/product/1729/97731
var RatioMap = map[string]ratio.Ratio{
"hunyuan-turbo": {Input: 0.015 * ratio.RMB, Output: 0.05 * ratio.RMB},
"hunyuan-large": {Input: 0.004 * ratio.RMB, Output: 0.012 * ratio.RMB},
"hunyuan-large-longcontext": {Input: 0.006 * ratio.RMB, Output: 0.018 * ratio.RMB},
"hunyuan-standard": {Input: 0.0008 * ratio.RMB, Output: 0.002 * ratio.RMB},
"hunyuan-standard-256K": {Input: 0.0005 * ratio.RMB, Output: 0.002 * ratio.RMB},
"hunyuan-translation-lite": {Input: 0.005 * ratio.RMB, Output: 0.015 * ratio.RMB},
"hunyuan-role": {Input: 0.004 * ratio.RMB, Output: 0.008 * ratio.RMB},
"hunyuan-functioncall": {Input: 0.004 * ratio.RMB, Output: 0.008 * ratio.RMB},
"hunyuan-code": {Input: 0.004 * ratio.RMB, Output: 0.008 * ratio.RMB},
"hunyuan-turbo-vision": {Input: 0.08 * ratio.RMB, Output: 0.08 * ratio.RMB},
"hunyuan-vision": {Input: 0.018 * ratio.RMB, Output: 0.018 * ratio.RMB},
"hunyuan-embedding": {Input: 0.0007 * ratio.RMB, Output: 0.0007 * ratio.RMB},
}

View File

@ -1,10 +1,12 @@
package togetherai
// https://docs.together.ai/docs/inference-models
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var ModelList = []string{
"meta-llama/Llama-3-70b-chat-hf",
"deepseek-ai/deepseek-coder-33b-instruct",
"mistralai/Mixtral-8x22B-Instruct-v0.1",
"Qwen/Qwen1.5-72B-Chat",
// https://www.together.ai/pricing
// https://docs.together.ai/docs/inference-models
var RatioMap = map[string]ratio.Ratio{
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {Input: 0.88 * ratio.MILLI_USD, Output: 0.88 * ratio.MILLI_USD},
"deepseek-ai/deepseek-coder-33b-instruct": {Input: 1.25 * ratio.MILLI_USD, Output: 1.25 * ratio.MILLI_USD},
"mistralai/Mixtral-8x22B-Instruct-v0.1": {Input: 1.20 * ratio.MILLI_USD, Output: 1.20 * ratio.MILLI_USD},
"Qwen/Qwen2-72B-Instruct": {Input: 0.90 * ratio.MILLI_USD, Output: 0.90 * ratio.MILLI_USD},
}

View File

@ -10,9 +10,9 @@ import (
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
channelhelper "github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
relaymodel "github.com/songquanpeng/one-api/relay/model"
)
var _ adaptor.Adaptor = new(Adaptor)
@ -40,19 +40,29 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) {
adaptor := GetAdaptor(meta.ActualModelName)
if adaptor == nil {
return nil, &relaymodel.ErrorWithStatusCode{
return nil, &model.ErrorWithStatusCode{
StatusCode: http.StatusInternalServerError,
Error: relaymodel.Error{
Message: "adaptor not found",
},
Error: model.Error{Message: "adaptor not found"},
}
}
return adaptor.DoResponse(c, resp, meta)
}
func (a *Adaptor) GetModelList() (models []string) {
models = modelList
return
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
adaptor := GetAdaptor(meta.ActualModelName)
if adaptor == nil {
return nil
}
return adaptor.GetRatio(meta)
}
func (a *Adaptor) GetModelList() []string {
var resp []string
for model := range modelMapping {
resp = append(resp, model)
}
return resp
}
func (a *Adaptor) GetChannelName() string {

View File

@ -6,19 +6,21 @@ import (
"github.com/gin-gonic/gin"
"github.com/pkg/errors"
"github.com/songquanpeng/one-api/common/ctxkey"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/anthropic"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
)
var ModelList = []string{
"claude-3-haiku@20240307",
"claude-3-sonnet@20240229",
"claude-3-opus@20240229",
"claude-3-5-sonnet@20240620",
"claude-3-5-sonnet-v2@20241022",
"claude-3-5-haiku@20241022",
// https://cloud.google.com/vertex-ai/generative-ai/pricing?hl=zh-cn#claude-models
var RatioMap = map[string]ratio.Ratio{
"claude-3-haiku@20240307": {Input: 0.25 * ratio.MILLI_USD, Output: 1.25 * ratio.MILLI_USD},
"claude-3-sonnet@20240229": {Input: 3 * ratio.MILLI_USD, Output: 15 * ratio.MILLI_USD},
"claude-3-opus@20240229": {Input: 15 * ratio.MILLI_USD, Output: 75 * ratio.MILLI_USD},
"claude-3-5-sonnet@20240620": {Input: 3 * ratio.MILLI_USD, Output: 15 * ratio.MILLI_USD},
"claude-3-5-sonnet-v2@20241022": {Input: 3 * ratio.MILLI_USD, Output: 15 * ratio.MILLI_USD},
"claude-3-5-haiku@20241022": {Input: 0.80 * ratio.MILLI_USD, Output: 4 * ratio.MILLI_USD},
}
const anthropicVersion = "vertex-2023-10-16"
@ -58,3 +60,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
}
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}

View File

@ -6,20 +6,16 @@ import (
"github.com/gin-gonic/gin"
"github.com/pkg/errors"
"github.com/songquanpeng/one-api/common/ctxkey"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/gemini"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/relaymode"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"github.com/songquanpeng/one-api/relay/relaymode"
)
var ModelList = []string{
"gemini-pro", "gemini-pro-vision",
"gemini-1.5-pro-001", "gemini-1.5-flash-001",
"gemini-1.5-pro-002", "gemini-1.5-flash-002",
"gemini-2.0-flash-exp", "gemini-2.0-flash-thinking-exp",
}
var RatioMap = gemini.RatioMap
type Adaptor struct {
}
@ -50,3 +46,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
}
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}

View File

@ -6,6 +6,7 @@ import (
"github.com/gin-gonic/gin"
claude "github.com/songquanpeng/one-api/relay/adaptor/vertexai/claude"
gemini "github.com/songquanpeng/one-api/relay/adaptor/vertexai/gemini"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
)
@ -18,16 +19,13 @@ const (
)
var modelMapping = map[string]VertexAIModelType{}
var modelList = []string{}
func init() {
modelList = append(modelList, claude.ModelList...)
for _, model := range claude.ModelList {
for model := range claude.RatioMap {
modelMapping[model] = VerterAIClaude
}
modelList = append(modelList, gemini.ModelList...)
for _, model := range gemini.ModelList {
for model := range gemini.RatioMap {
modelMapping[model] = VerterAIGemini
}
}
@ -35,6 +33,7 @@ func init() {
type innerAIAdapter interface {
ConvertRequest(c *gin.Context, relayMode int, request *model.GeneralOpenAIRequest) (any, error)
DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode)
GetRatio(meta *meta.Meta) *ratio.Ratio
}
func GetAdaptor(model string) innerAIAdapter {

View File

@ -1,5 +1,7 @@
package xai
var ModelList = []string{
"grok-beta",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var RatioMap = map[string]ratio.Ratio{
"grok-beta": {Input: 5.0 * ratio.MILLI_USD, Output: 15.0 * ratio.MILLI_USD},
}

View File

@ -2,14 +2,16 @@ package xunfei
import (
"errors"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"io"
"net/http"
"strings"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
)
type Adaptor struct {
@ -77,8 +79,12 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
return
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
return adaptor.GetModelListHelper(RatioMap)
}
func (a *Adaptor) GetChannelName() string {

View File

@ -1,12 +1,14 @@
package xunfei
var ModelList = []string{
"SparkDesk",
"SparkDesk-v1.1",
"SparkDesk-v2.1",
"SparkDesk-v3.1",
"SparkDesk-v3.1-128K",
"SparkDesk-v3.5",
"SparkDesk-v3.5-32K",
"SparkDesk-v4.0",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var RatioMap = map[string]ratio.Ratio{
"SparkDesk": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens
"SparkDesk-v1.1": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens
"SparkDesk-v2.1": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens
"SparkDesk-v3.1": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens
"SparkDesk-v3.1-128K": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens
"SparkDesk-v3.5": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens
"SparkDesk-v3.5-32K": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens
"SparkDesk-v4.0": {Input: 1.2858, Output: 1.2858}, // ¥0.018 / 1k tokens
}

View File

@ -3,16 +3,18 @@ package zhipu
import (
"errors"
"fmt"
"io"
"net/http"
"strings"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/common/helper"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"github.com/songquanpeng/one-api/relay/relaymode"
"io"
"net/http"
"strings"
)
type Adaptor struct {
@ -140,8 +142,12 @@ func ConvertEmbeddingRequest(request model.GeneralOpenAIRequest) (*EmbeddingRequ
}, nil
}
func (a *Adaptor) GetRatio(meta *meta.Meta) *ratio.Ratio {
return adaptor.GetRatioHelper(meta, RatioMap)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
return adaptor.GetModelListHelper(RatioMap)
}
func (a *Adaptor) GetChannelName() string {

View File

@ -1,7 +1,24 @@
package zhipu
var ModelList = []string{
"chatglm_turbo", "chatglm_pro", "chatglm_std", "chatglm_lite",
"glm-4", "glm-4v", "glm-3-turbo", "embedding-2",
"cogview-3",
import "github.com/songquanpeng/one-api/relay/billing/ratio"
var RatioMap = map[string]ratio.Ratio{
"glm-zero-preview": {Input: 0.01 * ratio.RMB, Output: 0.01 * ratio.RMB},
"glm-4-plus": {Input: 0.05 * ratio.RMB, Output: 0.05 * ratio.RMB},
"glm-4-0520": {Input: 0.1 * ratio.RMB, Output: 0.1 * ratio.RMB},
"glm-4-airx": {Input: 0.01 * ratio.RMB, Output: 0.01 * ratio.RMB},
"glm-4-air": {Input: 0.0005 * ratio.RMB, Output: 0.0005 * ratio.RMB},
"glm-4-long": {Input: 0.001 * ratio.RMB, Output: 0.001 * ratio.RMB},
"glm-4-flashx": {Input: 0.0001 * ratio.RMB, Output: 0.0001 * ratio.RMB},
"glm-4v-plus": {Input: 0.004 * ratio.RMB, Output: 0.004 * ratio.RMB},
"glm-4v": {Input: 0.05 * ratio.RMB, Output: 0},
"cogview-3-plus": {Input: 0.06 * ratio.RMB, Output: 0},
"cogview-3": {Input: 0.1 * ratio.RMB, Output: 0},
"cogvideox": {Input: 0.5 * ratio.RMB, Output: 0},
"embedding-3": {Input: 0.0005 * ratio.RMB, Output: 0},
"embedding-2": {Input: 0.0005 * ratio.RMB, Output: 0},
"glm-4-flash": {Input: 0, Output: 0}, // 免费
"glm-4v-flash": {Input: 0, Output: 0}, // 免费
"cogview-3-flash": {Input: 0, Output: 0}, // 免费
"cogvideox-flash": {Input: 0, Output: 0}, // 免费
}

View File

@ -3,13 +3,14 @@ package zhipu
import (
"bufio"
"encoding/json"
"github.com/songquanpeng/one-api/common/render"
"io"
"net/http"
"strings"
"sync"
"time"
"github.com/songquanpeng/one-api/common/render"
"github.com/gin-gonic/gin"
"github.com/golang-jwt/jwt"
"github.com/songquanpeng/one-api/common"
@ -268,6 +269,9 @@ func EmbeddingsHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithSta
c.Writer.Header().Set("Content-Type", "application/json")
c.Writer.WriteHeader(resp.StatusCode)
_, err = c.Writer.Write(jsonResponse)
if err != nil {
return openai.ErrorWrapper(err, "write_response_body_failed", http.StatusInternalServerError), nil
}
return nil, &fullTextResponse.Usage
}
@ -276,11 +280,7 @@ func embeddingResponseZhipu2OpenAI(response *EmbeddingResponse) *openai.Embeddin
Object: "list",
Data: make([]openai.EmbeddingResponseItem, 0, len(response.Embeddings)),
Model: response.Model,
Usage: model.Usage{
PromptTokens: response.PromptTokens,
CompletionTokens: response.CompletionTokens,
TotalTokens: response.Usage.TotalTokens,
},
Usage: response.Usage,
}
for _, item := range response.Embeddings {

View File

@ -3,6 +3,7 @@ package billing
import (
"context"
"fmt"
"github.com/songquanpeng/one-api/common/logger"
"github.com/songquanpeng/one-api/model"
)
@ -19,20 +20,22 @@ func ReturnPreConsumedQuota(ctx context.Context, preConsumedQuota int64, tokenId
}
}
func PostConsumeQuota(ctx context.Context, tokenId int, quotaDelta int64, totalQuota int64, userId int, channelId int, modelRatio float64, groupRatio float64, modelName string, tokenName string) {
func PostConsumeQuota(ctx context.Context, tokenId int, quotaDelta int64, totalQuota int64, userId int, channelId int, modelRatio float64, groupRatio float64, modelName string, tokenName string, promptTokens int, completionTokens int) {
// quotaDelta is remaining quota to be consumed
if quotaDelta != 0 {
err := model.PostConsumeTokenQuota(tokenId, quotaDelta)
if err != nil {
logger.SysError("error consuming token remain quota: " + err.Error())
}
err = model.CacheUpdateUserQuota(ctx, userId)
}
err := model.CacheUpdateUserQuota(ctx, userId)
if err != nil {
logger.SysError("error update user quota cache: " + err.Error())
}
// totalQuota is total quota consumed
if totalQuota != 0 {
logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f", modelRatio, groupRatio)
model.RecordConsumeLog(ctx, userId, channelId, int(totalQuota), 0, modelName, tokenName, totalQuota, logContent)
model.RecordConsumeLog(ctx, userId, channelId, promptTokens, completionTokens, modelName, tokenName, totalQuota, logContent)
model.UpdateUserUsedQuotaAndRequestCount(userId, totalQuota)
model.UpdateChannelUsedQuota(channelId, totalQuota)
}

View File

@ -3,6 +3,7 @@ package ratio
import (
"encoding/json"
"fmt"
"strconv"
"strings"
"github.com/songquanpeng/one-api/common/logger"
@ -11,15 +12,33 @@ import (
const (
USD2RMB = 7
USD = 500 // $0.002 = 1 -> $1 = 500
RMB = USD / USD2RMB
RMB = USD / USD2RMB // 1RMB = 1/7USD
MILLI_USD = 1.0 / 1000 * USD
MILLI_RMB = 1.0 / 1000 * RMB
TokensPerSecond = 1000 / 20 // $0.006 / minute -> $0.002 / 20 seconds -> $0.002 / 1K tokens
)
type Ratio struct {
Input float64 `json:"input,omitempty"` // input ratio
Output float64 `json:"output,omitempty"` // output ratio
LongThreshold int `json:"long_threshold,omitempty"` // for gemini like models, prompt longer than threshold will be charged as long input
LongInput float64 `json:"long_input,omitempty"` // long input ratio
LongOutput float64 `json:"long_output,omitempty"` // long output ratio
}
var (
FallbackRatio = Ratio{Input: 30, Output: 30}
)
// Deprecated
// TODO: remove this
// ModelRatio
// https://platform.openai.com/docs/models/model-endpoint-compatibility
// https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Blfmc9dlf
// https://openai.com/pricing
// 1 === $0.002 / 1K tokens
// 1 === ¥0.014 / 1k tokens
// 1 === $0.002 / 20 seconds (50 tokens per second)
var ModelRatio = map[string]float64{
// https://openai.com/pricing
"gpt-4": 15,
@ -342,6 +361,7 @@ var CompletionRatio = map[string]float64{
var (
DefaultModelRatio map[string]float64
DefaultCompletionRatio map[string]float64
DefaultRatio = make(map[string]Ratio)
)
func init() {
@ -375,6 +395,58 @@ func AddNewMissingRatio(oldRatio string) string {
return string(jsonBytes)
}
func AddOldRatio(oldRatio string, oldCompletionRatio string) string {
modelRatio := make(map[string]float64)
if oldRatio != "" {
err := json.Unmarshal([]byte(oldRatio), &modelRatio)
if err != nil {
logger.SysError("error unmarshalling old ratio: " + err.Error())
return oldRatio
}
}
completionRatio := make(map[string]float64)
if oldCompletionRatio != "" {
err := json.Unmarshal([]byte(oldCompletionRatio), &completionRatio)
if err != nil {
logger.SysError("error unmarshalling old completion ratio: " + err.Error())
return oldCompletionRatio
}
}
newRatio := make(map[string]Ratio)
for k, v := range DefaultRatio {
if _, ok := newRatio[k]; !ok {
newRatio[k] = v
}
}
for k, v := range modelRatio {
if _, ok := DefaultRatio[k]; ok {
continue
}
modelName, channelType := SplitModelName(k)
ratio := Ratio{}
ratio.Input = v
if val, ok := completionRatio[k]; ok {
ratio.Output = v * val
} else {
ratio.Output = v * GetCompletionRatio(modelName, channelType)
}
newRatio[k] = ratio
}
jsonBytes, err := json.Marshal(newRatio)
if err != nil {
logger.SysError("error marshalling new ratio: " + err.Error())
return oldRatio
}
return string(jsonBytes)
}
func ModelRatio2JSONString() string {
jsonBytes, err := json.Marshal(ModelRatio)
if err != nil {
@ -425,6 +497,18 @@ func UpdateCompletionRatioByJSONString(jsonStr string) error {
return json.Unmarshal([]byte(jsonStr), &CompletionRatio)
}
func SplitModelName(name string) (string, int) {
model := strings.Split(name, "(")
modelName := model[0]
channelType := 0
if len(model) > 1 {
if v, err := strconv.Atoi(model[1]); err == nil {
channelType = v
}
}
return modelName, channelType
}
func GetCompletionRatio(name string, channelType int) float64 {
if strings.HasPrefix(name, "qwen-") && strings.HasSuffix(name, "-internet") {
name = strings.TrimSuffix(name, "-internet")
@ -536,3 +620,30 @@ func GetCompletionRatio(name string, channelType int) float64 {
return 1
}
func Ratio2JSONString() string {
jsonBytes, err := json.Marshal(DefaultRatio)
if err != nil {
logger.SysError("error marshalling ratio: " + err.Error())
}
return string(jsonBytes)
}
func UpdateRatioByJSONString(jsonStr string) error {
DefaultRatio = make(map[string]Ratio)
return json.Unmarshal([]byte(jsonStr), &DefaultRatio)
}
func GetRatio(name string, channelType int) *Ratio {
var result Ratio
model := fmt.Sprintf("%s(%d)", name, channelType)
if ratio, ok := DefaultRatio[model]; ok {
result = ratio
return &result
}
if ratio, ok := DefaultRatio[name]; ok {
result = ratio
return &result
}
return nil
}

View File

@ -18,6 +18,7 @@ import (
"github.com/songquanpeng/one-api/common/ctxkey"
"github.com/songquanpeng/one-api/common/logger"
"github.com/songquanpeng/one-api/model"
"github.com/songquanpeng/one-api/relay"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/billing"
billingratio "github.com/songquanpeng/one-api/relay/billing/ratio"
@ -54,9 +55,16 @@ func RelayAudioHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus
}
}
modelRatio := billingratio.GetModelRatio(audioModel, channelType)
adaptor := relay.GetAdaptor(meta.APIType)
if adaptor == nil {
return openai.ErrorWrapper(fmt.Errorf("invalid api type: %d", meta.APIType), "invalid_api_type", http.StatusBadRequest)
}
adaptor.Init(meta)
groupRatio := billingratio.GetGroupRatio(group)
ratio := modelRatio * groupRatio
adaptorRatio := GetRatio(meta, adaptor)
ratio := adaptorRatio.Input * groupRatio
var quota int64
var preConsumedQuota int64
switch relayMode {
@ -216,7 +224,7 @@ func RelayAudioHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus
succeed = true
quotaDelta := quota - preConsumedQuota
defer func(ctx context.Context) {
go billing.PostConsumeQuota(ctx, tokenId, quotaDelta, quota, userId, channelId, modelRatio, groupRatio, audioModel, tokenName)
go billing.PostConsumeQuota(ctx, tokenId, quotaDelta, quota, userId, channelId, adaptorRatio.Input, groupRatio, audioModel, tokenName, 0, 0)
}(c.Request.Context())
for k, v := range resp.Header {

View File

@ -4,7 +4,6 @@ import (
"context"
"errors"
"fmt"
"github.com/songquanpeng/one-api/relay/constant/role"
"math"
"net/http"
"strings"
@ -14,9 +13,12 @@ import (
"github.com/songquanpeng/one-api/common/config"
"github.com/songquanpeng/one-api/common/logger"
"github.com/songquanpeng/one-api/model"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/billing/ratio"
billingratio "github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/channeltype"
"github.com/songquanpeng/one-api/relay/constant/role"
"github.com/songquanpeng/one-api/relay/controller/validator"
"github.com/songquanpeng/one-api/relay/meta"
relaymodel "github.com/songquanpeng/one-api/relay/model"
@ -91,17 +93,26 @@ func preConsumeQuota(ctx context.Context, textRequest *relaymodel.GeneralOpenAIR
return preConsumedQuota, nil
}
func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.Meta, textRequest *relaymodel.GeneralOpenAIRequest, ratio float64, preConsumedQuota int64, modelRatio float64, groupRatio float64, systemPromptReset bool) {
func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.Meta, textRequest *relaymodel.GeneralOpenAIRequest, ratio billingratio.Ratio, preConsumedQuota int64, groupRatio float64, systemPromptReset bool) {
if usage == nil {
logger.Error(ctx, "usage is nil, which is unexpected")
return
}
var quota int64
completionRatio := billingratio.GetCompletionRatio(textRequest.Model, meta.ChannelType)
// use meta.OriginalModelName instead of mapped model name, which may named randomly in azure
promptTokens := usage.PromptTokens
completionTokens := usage.CompletionTokens
quota = int64(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * ratio))
if ratio != 0 && quota <= 0 {
promptRatio := ratio.Input
completionRatio := ratio.Output
// for gemini, prompt longer than 128k will be charged as long input
if ratio.LongInput > 0 && promptTokens > ratio.LongThreshold {
promptRatio = ratio.LongInput
completionRatio = ratio.LongOutput
}
quota = int64(math.Ceil(groupRatio * (float64(promptTokens)*promptRatio + float64(completionTokens)*completionRatio)))
if quota <= 0 && (ratio.Input > 0 || ratio.Output > 0) {
quota = 1
}
totalTokens := promptTokens + completionTokens
@ -123,8 +134,8 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M
if systemPromptReset {
extraLog = " (注意系统提示词已被重置)"
}
logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f,补全倍率 %.2f%s", modelRatio, groupRatio, completionRatio, extraLog)
model.RecordConsumeLog(ctx, meta.UserId, meta.ChannelId, promptTokens, completionTokens, textRequest.Model, meta.TokenName, quota, logContent)
logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f,补全倍率 %.2f%s", promptRatio, groupRatio, completionRatio/promptRatio, extraLog)
model.RecordConsumeLog(ctx, meta.UserId, meta.ChannelId, promptTokens, completionTokens, meta.OriginModelName, meta.TokenName, quota, logContent)
model.UpdateUserUsedQuotaAndRequestCount(meta.UserId, quota)
model.UpdateChannelUsedQuota(meta.ChannelId, quota)
}
@ -185,3 +196,16 @@ func setSystemPrompt(ctx context.Context, request *relaymodel.GeneralOpenAIReque
logger.Infof(ctx, "add system prompt")
return true
}
func GetRatio(meta *meta.Meta, adaptor adaptor.Adaptor) ratio.Ratio {
result := billingratio.GetRatio(meta.OriginModelName, meta.ChannelType)
if result != nil {
return *result
}
ratio := adaptor.GetRatio(meta)
if ratio != nil {
return *ratio
}
logger.SysError("model ratio not found: " + meta.OriginModelName)
return billingratio.FallbackRatio
}

View File

@ -128,7 +128,6 @@ func RelayImageHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus
return openai.ErrorWrapper(err, "get_image_cost_ratio_failed", http.StatusInternalServerError)
}
imageModel := imageRequest.Model
// Convert the original image model
imageRequest.Model, _ = getMappedModelName(imageRequest.Model, billingratio.ImageOriginModelName)
c.Set("response_format", imageRequest.ResponseFormat)
@ -167,9 +166,9 @@ func RelayImageHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus
requestBody = bytes.NewBuffer(jsonStr)
}
modelRatio := billingratio.GetModelRatio(imageModel, meta.ChannelType)
groupRatio := billingratio.GetGroupRatio(meta.Group)
ratio := modelRatio * groupRatio
adaptorRatio := GetRatio(meta, adaptor)
ratio := adaptorRatio.Input * groupRatio
userQuota, err := model.CacheGetUserQuota(ctx, meta.UserId)
var quota int64
@ -209,7 +208,7 @@ func RelayImageHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus
}
if quota != 0 {
tokenName := c.GetString(ctxkey.TokenName)
logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f", modelRatio, groupRatio)
logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f", adaptorRatio.Input, groupRatio)
model.RecordConsumeLog(ctx, meta.UserId, meta.ChannelId, 0, 0, imageRequest.Model, tokenName, quota, logContent)
model.UpdateUserUsedQuotaAndRequestCount(meta.UserId, quota)
channelId := c.GetInt(ctxkey.ChannelId)

View File

@ -4,10 +4,11 @@ import (
"bytes"
"encoding/json"
"fmt"
"github.com/songquanpeng/one-api/common/config"
"io"
"net/http"
"github.com/songquanpeng/one-api/common/config"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/common/logger"
"github.com/songquanpeng/one-api/relay"
@ -32,6 +33,12 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode {
}
meta.IsStream = textRequest.Stream
adaptor := relay.GetAdaptor(meta.APIType)
if adaptor == nil {
return openai.ErrorWrapper(fmt.Errorf("invalid api type: %d", meta.APIType), "invalid_api_type", http.StatusBadRequest)
}
adaptor.Init(meta)
// map model name
meta.OriginModelName = textRequest.Model
textRequest.Model, _ = getMappedModelName(textRequest.Model, meta.ModelMapping)
@ -39,9 +46,10 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode {
// set system prompt if not empty
systemPromptReset := setSystemPrompt(ctx, textRequest, meta.SystemPrompt)
// get model ratio & group ratio
modelRatio := billingratio.GetModelRatio(textRequest.Model, meta.ChannelType)
groupRatio := billingratio.GetGroupRatio(meta.Group)
ratio := modelRatio * groupRatio
adaptorRatio := GetRatio(meta, adaptor)
ratio := adaptorRatio.Input * groupRatio
// pre-consume quota
promptTokens := getPromptTokens(textRequest, meta.Mode)
meta.PromptTokens = promptTokens
@ -51,12 +59,6 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode {
return bizErr
}
adaptor := relay.GetAdaptor(meta.APIType)
if adaptor == nil {
return openai.ErrorWrapper(fmt.Errorf("invalid api type: %d", meta.APIType), "invalid_api_type", http.StatusBadRequest)
}
adaptor.Init(meta)
// get request body
requestBody, err := getRequestBody(c, meta, textRequest, adaptor)
if err != nil {
@ -82,7 +84,7 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode {
return respErr
}
// post-consume quota
go postConsumeQuota(ctx, usage, meta, textRequest, ratio, preConsumedQuota, modelRatio, groupRatio, systemPromptReset)
go postConsumeQuota(ctx, usage, meta, textRequest, adaptorRatio, preConsumedQuota, groupRatio, systemPromptReset)
return nil
}

View File

@ -2,6 +2,13 @@ import React, { useEffect, useState } from 'react';
import { Divider, Form, Grid, Header } from 'semantic-ui-react';
import { API, showError, showSuccess, timestamp2string, verifyJSON } from '../helpers';
const RATIO_MAPPING_EXAMPLE = {
'gpt-4o-mini': {'input': 0.075, 'output': 0.3},
'llama3-8b-8192(33)': {'input': 0.15, 'output': 0.3},
'llama3-70b-8192(33)': {'input': 1.325, 'output': 1.749},
};
const OperationSetting = () => {
let now = new Date();
let [inputs, setInputs] = useState({
@ -10,8 +17,7 @@ const OperationSetting = () => {
QuotaForInvitee: 0,
QuotaRemindThreshold: 0,
PreConsumedQuota: 0,
ModelRatio: '',
CompletionRatio: '',
Ratio: '',
GroupRatio: '',
TopUpLink: '',
ChatLink: '',
@ -35,7 +41,7 @@ const OperationSetting = () => {
if (success) {
let newInputs = {};
data.forEach((item) => {
if (item.key === 'ModelRatio' || item.key === 'GroupRatio' || item.key === 'CompletionRatio') {
if (item.key === 'GroupRatio' || item.key === 'Ratio') {
item.value = JSON.stringify(JSON.parse(item.value), null, 2);
}
if (item.value === '{}') {
@ -91,12 +97,12 @@ const OperationSetting = () => {
}
break;
case 'ratio':
if (originInputs['ModelRatio'] !== inputs.ModelRatio) {
if (!verifyJSON(inputs.ModelRatio)) {
showError('模型倍率不是合法的 JSON 字符串');
if (originInputs['Ratio'] !== inputs.Ratio) {
if (!verifyJSON(inputs.Ratio)) {
showError('自定义倍率不是合法的 JSON 字符串');
return;
}
await updateOption('ModelRatio', inputs.ModelRatio);
await updateOption('Ratio', inputs.Ratio);
}
if (originInputs['GroupRatio'] !== inputs.GroupRatio) {
if (!verifyJSON(inputs.GroupRatio)) {
@ -105,13 +111,6 @@ const OperationSetting = () => {
}
await updateOption('GroupRatio', inputs.GroupRatio);
}
if (originInputs['CompletionRatio'] !== inputs.CompletionRatio) {
if (!verifyJSON(inputs.CompletionRatio)) {
showError('补全倍率不是合法的 JSON 字符串');
return;
}
await updateOption('CompletionRatio', inputs.CompletionRatio);
}
break;
case 'quota':
if (originInputs['QuotaForNewUser'] !== inputs.QuotaForNewUser) {
@ -346,24 +345,13 @@ const OperationSetting = () => {
</Header>
<Form.Group widths='equal'>
<Form.TextArea
label='模型倍率'
name='ModelRatio'
label='自定义倍率'
name='Ratio'
onChange={handleInputChange}
style={{ minHeight: 250, fontFamily: 'JetBrains Mono, Consolas' }}
autoComplete='new-password'
value={inputs.ModelRatio}
placeholder='为一个 JSON 文本,键为模型名称,值为倍率'
/>
</Form.Group>
<Form.Group widths='equal'>
<Form.TextArea
label='补全倍率'
name='CompletionRatio'
onChange={handleInputChange}
style={{ minHeight: 250, fontFamily: 'JetBrains Mono, Consolas' }}
autoComplete='new-password'
value={inputs.CompletionRatio}
placeholder='为一个 JSON 文本,键为模型名称,值为倍率,此处的倍率设置是模型补全倍率相较于提示倍率的比例,使用该设置可强制覆盖 One API 的内部比例'
value={inputs.Ratio}
placeholder={`为一个 JSON 文本,键为模型名称,值为倍率结构,例如:\n${JSON.stringify(RATIO_MAPPING_EXAMPLE, null, 2)}`}
/>
</Form.Group>
<Form.Group widths='equal'>

View File

@ -18,6 +18,12 @@ import { DateTimePicker } from "@mui/x-date-pickers/DateTimePicker";
import dayjs from "dayjs";
require("dayjs/locale/zh-cn");
const RATIO_MAPPING_EXAMPLE = {
'gpt-4o-mini': {'input': 0.075, 'output': 0.3},
'llama3-8b-8192(33)': {'input': 0.15, 'output': 0.3},
'llama3-70b-8192(33)': {'input': 1.325, 'output': 1.749},
};
const OperationSetting = () => {
let now = new Date();
let [inputs, setInputs] = useState({
@ -26,8 +32,7 @@ const OperationSetting = () => {
QuotaForInvitee: 0,
QuotaRemindThreshold: 0,
PreConsumedQuota: 0,
ModelRatio: "",
CompletionRatio: "",
Ratio: "",
GroupRatio: "",
TopUpLink: "",
ChatLink: "",
@ -53,7 +58,7 @@ const OperationSetting = () => {
if (success) {
let newInputs = {};
data.forEach((item) => {
if (item.key === "ModelRatio" || item.key === "GroupRatio" || item.key === "CompletionRatio") {
if (item.key === "GroupRatio" || item.key === "Ratio") {
item.value = JSON.stringify(JSON.parse(item.value), null, 2);
}
if (item.value === '{}') {
@ -123,12 +128,12 @@ const OperationSetting = () => {
}
break;
case "ratio":
if (originInputs["ModelRatio"] !== inputs.ModelRatio) {
if (!verifyJSON(inputs.ModelRatio)) {
showError("模型倍率不是合法的 JSON 字符串");
if (originInputs["Ratio"] !== inputs.Ratio) {
if (!verifyJSON(inputs.Ratio)) {
showError("自定义倍率不是合法的 JSON 字符串");
return;
}
await updateOption("ModelRatio", inputs.ModelRatio);
await updateOption("Ratio", inputs.Ratio);
}
if (originInputs["GroupRatio"] !== inputs.GroupRatio) {
if (!verifyJSON(inputs.GroupRatio)) {
@ -137,13 +142,6 @@ const OperationSetting = () => {
}
await updateOption("GroupRatio", inputs.GroupRatio);
}
if (originInputs['CompletionRatio'] !== inputs.CompletionRatio) {
if (!verifyJSON(inputs.CompletionRatio)) {
showError('补全倍率不是合法的 JSON 字符串');
return;
}
await updateOption('CompletionRatio', inputs.CompletionRatio);
}
break;
case "quota":
if (originInputs["QuotaForNewUser"] !== inputs.QuotaForNewUser) {
@ -501,28 +499,14 @@ const OperationSetting = () => {
<TextField
multiline
maxRows={15}
id="channel-ModelRatio-label"
label="模型倍率"
value={inputs.ModelRatio}
name="ModelRatio"
id="channel-Ratio-label"
label="自定义倍率"
value={inputs.Ratio}
name="Ratio"
onChange={handleInputChange}
aria-describedby="helper-text-channel-ModelRatio-label"
aria-describedby="helper-text-channel-Ratio-label"
minRows={5}
placeholder="为一个 JSON 文本,键为模型名称,值为倍率"
/>
</FormControl>
<FormControl fullWidth>
<TextField
multiline
maxRows={15}
id="channel-CompletionRatio-label"
label="补全倍率"
value={inputs.CompletionRatio}
name="CompletionRatio"
onChange={handleInputChange}
aria-describedby="helper-text-channel-CompletionRatio-label"
minRows={5}
placeholder="为一个 JSON 文本,键为模型名称,值为倍率,此处的倍率设置是模型补全倍率相较于提示倍率的比例,使用该设置可强制覆盖 One API 的内部比例"
placeholder={`为一个 JSON 文本,键为模型名称,值为倍率结构,例如:\n${JSON.stringify(RATIO_MAPPING_EXAMPLE, null, 2)}`}
/>
</FormControl>
<FormControl fullWidth>

View File

@ -2,6 +2,12 @@ import React, { useEffect, useState } from 'react';
import { Divider, Form, Grid, Header } from 'semantic-ui-react';
import { API, showError, showSuccess, timestamp2string, verifyJSON } from '../helpers';
const RATIO_MAPPING_EXAMPLE = {
'gpt-4o-mini': {'input': 0.075, 'output': 0.3},
'llama3-8b-8192(33)': {'input': 0.15, 'output': 0.3},
'llama3-70b-8192(33)': {'input': 1.325, 'output': 1.749},
};
const OperationSetting = () => {
let now = new Date();
let [inputs, setInputs] = useState({
@ -10,9 +16,8 @@ const OperationSetting = () => {
QuotaForInvitee: 0,
QuotaRemindThreshold: 0,
PreConsumedQuota: 0,
ModelRatio: '',
CompletionRatio: '',
GroupRatio: '',
Ratio: '',
TopUpLink: '',
ChatLink: '',
QuotaPerUnit: 0,
@ -35,7 +40,7 @@ const OperationSetting = () => {
if (success) {
let newInputs = {};
data.forEach((item) => {
if (item.key === 'ModelRatio' || item.key === 'GroupRatio' || item.key === 'CompletionRatio') {
if (item.key === 'GroupRatio' || item.key === 'Ratio') {
item.value = JSON.stringify(JSON.parse(item.value), null, 2);
}
if (item.value === '{}') {
@ -91,13 +96,6 @@ const OperationSetting = () => {
}
break;
case 'ratio':
if (originInputs['ModelRatio'] !== inputs.ModelRatio) {
if (!verifyJSON(inputs.ModelRatio)) {
showError('模型倍率不是合法的 JSON 字符串');
return;
}
await updateOption('ModelRatio', inputs.ModelRatio);
}
if (originInputs['GroupRatio'] !== inputs.GroupRatio) {
if (!verifyJSON(inputs.GroupRatio)) {
showError('分组倍率不是合法的 JSON 字符串');
@ -105,12 +103,12 @@ const OperationSetting = () => {
}
await updateOption('GroupRatio', inputs.GroupRatio);
}
if (originInputs['CompletionRatio'] !== inputs.CompletionRatio) {
if (!verifyJSON(inputs.CompletionRatio)) {
showError('补全倍率不是合法的 JSON 字符串');
if (originInputs['Ratio'] !== inputs.Ratio) {
if (!verifyJSON(inputs.Ratio)) {
showError('倍率不是合法的 JSON 字符串');
return;
}
await updateOption('CompletionRatio', inputs.CompletionRatio);
await updateOption('Ratio', inputs.Ratio);
}
break;
case 'quota':
@ -346,24 +344,13 @@ const OperationSetting = () => {
</Header>
<Form.Group widths='equal'>
<Form.TextArea
label='模型倍率'
name='ModelRatio'
label='自定义倍率'
name='Ratio'
onChange={handleInputChange}
style={{ minHeight: 250, fontFamily: 'JetBrains Mono, Consolas' }}
autoComplete='new-password'
value={inputs.ModelRatio}
placeholder='为一个 JSON 文本,键为模型名称,值为倍率'
/>
</Form.Group>
<Form.Group widths='equal'>
<Form.TextArea
label='补全倍率'
name='CompletionRatio'
onChange={handleInputChange}
style={{ minHeight: 250, fontFamily: 'JetBrains Mono, Consolas' }}
autoComplete='new-password'
value={inputs.CompletionRatio}
placeholder='为一个 JSON 文本,键为模型名称,值为倍率,此处的倍率设置是模型补全倍率相较于提示倍率的比例,使用该设置可强制覆盖 One API 的内部比例'
value={inputs.Ratio}
placeholder={`为一个 JSON 文本,键为模型名称,值为倍率结构,例如:\n${JSON.stringify(RATIO_MAPPING_EXAMPLE, null, 2)}`}
/>
</Form.Group>
<Form.Group widths='equal'>