Compare commits

...

6 Commits

Author SHA1 Message Date
wzxjohn
bc7c9105f4 chore: update quota calc logic (close #599) (#627)
* fix: change quota calc code (close #599)

Use float64 during calc and do math.Ceil after calc. This will result in the quota being used slightly more than the official standard, but it will be guaranteed that it will not be less.

* chore: remove blank line

---------

Co-authored-by: JustSong <songquanpeng@foxmail.com>
2023-11-05 19:15:06 +08:00
wood chen
3fe76c8af7 fix: fix Cloudflare AI Gateway channel test support (#639)
* 当使用Cloudflare AI Gateway时,支持openai渠道测试

* refactor: change logic

---------

Co-authored-by: JustSong <songquanpeng@foxmail.com>
2023-11-05 19:08:25 +08:00
papersnake
c70c614018 feat: support chatglm_turbo (#648)
* feat: support chatglm_turbo

* fix: remove characterglm
2023-11-05 17:59:38 +08:00
Baksi
0d87de697c fix: fix typo (#651) 2023-11-02 22:24:22 +08:00
MaricoHan
aec343dc38 feat: support xunfei v3 (#637) 2023-10-29 22:03:01 +08:00
JustSong
89d458b9cf feat: able to set RELAY_TIMEOUT 2023-10-22 20:39:49 +08:00
8 changed files with 32 additions and 11 deletions

View File

@@ -355,6 +355,7 @@ graph LR
14. 编码器缓存设置:
+ `TIKTOKEN_CACHE_DIR`:默认程序启动时会联网下载一些通用的词元的编码,如:`gpt-3.5-turbo`,在一些网络环境不稳定,或者离线情况,可能会导致启动有问题,可以配置此目录缓存数据,可迁移到离线环境。
+ `DATA_GYM_CACHE_DIR`:目前该配置作用与 `TIKTOKEN_CACHE_DIR` 一致,但是优先级没有它高。
15. `RELAY_TIMEOUT`:中继超时设置,单位为秒,默认不设置超时时间。
### 命令行参数
1. `--port <port_number>`: 指定服务器监听的端口号,默认为 `3000`。

View File

@@ -95,6 +95,8 @@ var SyncFrequency = GetOrDefault("SYNC_FREQUENCY", 10*60) // unit is second
var BatchUpdateEnabled = false
var BatchUpdateInterval = GetOrDefault("BATCH_UPDATE_INTERVAL", 5)
var RelayTimeout = GetOrDefault("RELAY_TIMEOUT", 0) // unit is second
const (
RequestIdKey = "X-Oneapi-Request-Id"
)

View File

@@ -49,6 +49,7 @@ var ModelRatio = map[string]float64{
"ERNIE-Bot-4": 8.572, // ¥0.12 / 1k tokens
"Embedding-V1": 0.1429, // ¥0.002 / 1k tokens
"PaLM-2": 1,
"chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens
"chatglm_pro": 0.7143, // ¥0.01 / 1k tokens
"chatglm_std": 0.3572, // ¥0.005 / 1k tokens
"chatglm_lite": 0.1429, // ¥0.002 / 1k tokens

View File

@@ -5,14 +5,14 @@ import (
"encoding/json"
"errors"
"fmt"
"github.com/gin-gonic/gin"
"net/http"
"one-api/common"
"one-api/model"
"strconv"
"strings"
"sync"
"time"
"github.com/gin-gonic/gin"
)
func testChannel(channel *model.Channel, request ChatRequest) (err error, openaiErr *OpenAIError) {
@@ -50,6 +50,8 @@ func testChannel(channel *model.Channel, request ChatRequest) (err error, openai
}
requestURL += "/v1/chat/completions"
}
// for Cloudflare AI gateway: https://github.com/songquanpeng/one-api/pull/639
requestURL = strings.Replace(requestURL, "/v1/v1", "/v1", 1)
jsonData, err := json.Marshal(request)
if err != nil {

View File

@@ -274,7 +274,7 @@ func init() {
Id: "claude-instant-1",
Object: "model",
Created: 1677649963,
OwnedBy: "anturopic",
OwnedBy: "anthropic",
Permission: permission,
Root: "claude-instant-1",
Parent: nil,
@@ -283,7 +283,7 @@ func init() {
Id: "claude-2",
Object: "model",
Created: 1677649963,
OwnedBy: "anturopic",
OwnedBy: "anthropic",
Permission: permission,
Root: "claude-2",
Parent: nil,
@@ -333,6 +333,15 @@ func init() {
Root: "PaLM-2",
Parent: nil,
},
{
Id: "chatglm_turbo",
Object: "model",
Created: 1677649963,
OwnedBy: "zhipu",
Permission: permission,
Root: "chatglm_turbo",
Parent: nil,
},
{
Id: "chatglm_pro",
Object: "model",

View File

@@ -7,6 +7,7 @@ import (
"errors"
"fmt"
"io"
"math"
"net/http"
"one-api/common"
"one-api/model"
@@ -32,7 +33,14 @@ var httpClient *http.Client
var impatientHTTPClient *http.Client
func init() {
httpClient = &http.Client{}
if common.RelayTimeout == 0 {
httpClient = &http.Client{}
} else {
httpClient = &http.Client{
Timeout: time.Duration(common.RelayTimeout) * time.Second,
}
}
impatientHTTPClient = &http.Client{
Timeout: 5 * time.Second,
}
@@ -407,9 +415,7 @@ func relayTextHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode {
completionRatio := common.GetCompletionRatio(textRequest.Model)
promptTokens = textResponse.Usage.PromptTokens
completionTokens = textResponse.Usage.CompletionTokens
quota = promptTokens + int(float64(completionTokens)*completionRatio)
quota = int(float64(quota) * ratio)
quota = int(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * ratio))
if ratio != 0 && quota <= 0 {
quota = 1
}

View File

@@ -298,8 +298,8 @@ func getXunfeiAuthUrl(c *gin.Context, apiKey string, apiSecret string) (string,
common.SysLog("api_version not found, use default: " + apiVersion)
}
domain := "general"
if apiVersion == "v2.1" {
domain = "generalv2"
if apiVersion != "v1.1" {
domain += strings.Split(apiVersion, ".")[0]
}
authUrl := buildXunfeiAuthUrl(fmt.Sprintf("wss://spark-api.xf-yun.com/%s/chat", apiVersion), apiKey, apiSecret)
return domain, authUrl

View File

@@ -72,7 +72,7 @@ const EditChannel = () => {
localModels = ['qwen-turbo', 'qwen-plus', 'text-embedding-v1'];
break;
case 16:
localModels = ['chatglm_pro', 'chatglm_std', 'chatglm_lite'];
localModels = ['chatglm_turbo', 'chatglm_pro', 'chatglm_std', 'chatglm_lite'];
break;
case 18:
localModels = ['SparkDesk'];