This commit is contained in:
Mo
2024-06-30 16:26:00 +08:00
committed by GitHub
26 changed files with 528 additions and 1 deletions

View File

@@ -45,6 +45,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
return request, nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
return adaptor.DoRequestHelper(a, c, meta, requestBody)
}

View File

@@ -1,9 +1,11 @@
package ali
import (
"encoding/json"
"errors"
"fmt"
"github.com/gin-gonic/gin"
"github.com/gorilla/websocket"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
@@ -76,7 +78,19 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
return aliRequest, nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
aliRequest := ConvertTextToSpeechRequest(*request)
return aliRequest, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
if meta.Mode == relaymode.AudioSpeech {
return a.DoWSSRequest(c, meta, requestBody)
}
return adaptor.DoRequestHelper(a, c, meta, requestBody)
}
@@ -89,6 +103,8 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
err, usage = EmbeddingHandler(c, resp)
case relaymode.ImagesGenerations:
err, usage = ImageHandler(c, resp)
case relaymode.AudioSpeech:
err, usage = AudioSpeechHandler(c, resp)
default:
err, usage = Handler(c, resp)
}
@@ -103,3 +119,74 @@ func (a *Adaptor) GetModelList() []string {
func (a *Adaptor) GetChannelName() string {
return "ali"
}
func (a *Adaptor) DoWSSRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
baseURL := "wss://dashscope.aliyuncs.com/api-ws/v1/inference"
var usage Usage
// Create an empty http.Response object
response := &http.Response{
StatusCode: http.StatusInternalServerError,
Body: io.NopCloser(nil),
}
conn, _, err := websocket.DefaultDialer.Dial(baseURL, http.Header{"Authorization": {"Bearer " + meta.APIKey}})
if err != nil {
return response, errors.New("ali_wss_conn_failed")
}
defer conn.Close()
var requestBodyBytes []byte
requestBodyBytes, err = io.ReadAll(requestBody)
if err != nil {
return response, errors.New("ali_failed_to_read_request_body")
}
// Convert JSON strings to map[string]interface{}
var requestBodyMap map[string]interface{}
err = json.Unmarshal(requestBodyBytes, &requestBodyMap)
if err != nil {
return response, errors.New("ali_failed_to_parse_request_body")
}
if err := conn.WriteJSON(requestBodyMap); err != nil {
return response, errors.New("ali_wss_write_msg_failed")
}
const chunkSize = 1024
for {
messageType, audioData, err := conn.ReadMessage()
if err != nil {
if err == io.EOF {
break
}
return response, errors.New("ali_wss_read_msg_failed")
}
var msg WSSMessage
switch messageType {
case websocket.TextMessage:
err = json.Unmarshal(audioData, &msg)
if msg.Header.Event == "task-finished" {
response.StatusCode = http.StatusOK
usage.TotalTokens = msg.Payload.Usage.Characters
return response, nil
}
case websocket.BinaryMessage:
for i := 0; i < len(audioData); i += chunkSize {
end := i + chunkSize
if end > len(audioData) {
end = len(audioData)
}
chunk := audioData[i:end]
_, writeErr := c.Writer.Write(chunk)
if writeErr != nil {
return response, errors.New("wss_write_chunk_failed")
}
}
}
}
return response, nil
}

View File

@@ -0,0 +1,21 @@
package ali
import (
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/model"
"net/http"
)
func AudioSpeechHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithStatusCode, *model.Usage) {
for k, v := range resp.Header {
c.Writer.Header().Set(k, v[0])
}
c.Writer.WriteHeader(resp.StatusCode)
err := resp.Body.Close()
if err != nil {
return openai.ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
}
return nil, nil
}

View File

@@ -4,4 +4,48 @@ var ModelList = []string{
"qwen-turbo", "qwen-plus", "qwen-max", "qwen-max-longcontext",
"text-embedding-v1",
"ali-stable-diffusion-xl", "ali-stable-diffusion-v1.5", "wanx-v1",
"sambert-zhichu-v1",
"sambert-zhiwei-v1",
"sambert-zhixiang-v1",
"sambert-zhide-v1",
"sambert-zhijia-v1",
"sambert-zhinan-v1",
"sambert-zhiqi-v1",
"sambert-zhiqian-v1",
"sambert-zhiru-v1",
"sambert-zhimiao-emo-v1",
"sambert-zhida-v1",
"sambert-zhifei-v1",
"sambert-zhigui-v1",
"sambert-zhihao-v1",
"sambert-zhijing-v1",
"sambert-zhilun-v1",
"sambert-zhimao-v1",
"sambert-zhiming-v1",
"sambert-zhimo-v1",
"sambert-zhina-v1",
"sambert-zhishu-v1",
"sambert-zhishuo-v1",
"sambert-zhistella-v1",
"sambert-zhiting-v1",
"sambert-zhixiao-v1",
"sambert-zhiya-v1",
"sambert-zhiye-v1",
"sambert-zhiying-v1",
"sambert-zhiyuan-v1",
"sambert-zhiyue-v1",
"sambert-camila-v1",
"sambert-perla-v1",
"sambert-indah-v1",
"sambert-clara-v1",
"sambert-hanna-v1",
"sambert-beth-v1",
"sambert-betty-v1",
"sambert-cally-v1",
"sambert-cindy-v1",
"sambert-eva-v1",
"sambert-donna-v1",
"sambert-brian-v1",
"sambert-waan-v1",
}

View File

@@ -4,6 +4,7 @@ import (
"bufio"
"encoding/json"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/songquanpeng/one-api/common"
"github.com/songquanpeng/one-api/common/helper"
"github.com/songquanpeng/one-api/common/logger"
@@ -77,6 +78,37 @@ func ConvertImageRequest(request model.ImageRequest) *ImageRequest {
return &imageRequest
}
func ConvertTextToSpeechRequest(request model.TextToSpeechRequest) *WSSMessage {
var ttsRequest WSSMessage
ttsRequest.Header.Action = "run-task"
ttsRequest.Header.Streaming = "out"
ttsRequest.Header.TaskID = uuid.New().String()
ttsRequest.Payload.Function = "SpeechSynthesizer"
ttsRequest.Payload.Input.Text = request.Input
ttsRequest.Payload.Model = request.Model
ttsRequest.Payload.Parameters.Format = "wav"
//ttsRequest.Payload.Parameters.SampleRate = 48000
ttsRequest.Payload.Parameters.Rate = 1.0
ttsRequest.Payload.Task = "tts"
ttsRequest.Payload.TaskGroup = "audio"
format := map[string]bool{
"pcm": true,
"wav": true,
"mp3": true,
}
if _, ok := format[request.ResponseFormat]; ok {
ttsRequest.Payload.Parameters.Format = request.ResponseFormat
}
if 0.5 <= request.Speed && request.Speed <= 2 {
ttsRequest.Payload.Parameters.Rate = request.Speed
}
return &ttsRequest
}
func EmbeddingHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithStatusCode, *model.Usage) {
var aliResponse EmbeddingResponse
err := json.NewDecoder(resp.Body).Decode(&aliResponse)

View File

@@ -48,6 +48,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
return request, nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
return adaptor.DoRequestHelper(a, c, meta, requestBody)
}

View File

@@ -57,6 +57,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
return request, nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
return nil, nil
}

View File

@@ -116,6 +116,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
return request, nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
return adaptor.DoRequestHelper(a, c, meta, requestBody)
}

View File

@@ -44,6 +44,13 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
return ConvertRequest(*request), nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
return adaptor.DoRequestHelper(a, c, meta, requestBody)
}

View File

@@ -42,6 +42,13 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
return ConvertRequest(*request), nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
return adaptor.DoRequestHelper(a, c, meta, requestBody)
}

View File

@@ -45,6 +45,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
return request, nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
return adaptor.DoRequestHelper(a, c, meta, requestBody)
}

View File

@@ -46,6 +46,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
return request, nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
return adaptor.DoRequestHelper(a, c, meta, requestBody)
}

View File

@@ -66,6 +66,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
return request, nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
return channelhelper.DoRequestHelper(a, c, meta, requestBody)
}

View File

@@ -14,6 +14,7 @@ type Adaptor interface {
SetupRequestHeader(c *gin.Context, req *http.Request, meta *meta.Meta) error
ConvertRequest(c *gin.Context, relayMode int, request *model.GeneralOpenAIRequest) (any, error)
ConvertImageRequest(request *model.ImageRequest) (any, error)
ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error)
DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error)
DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode)
GetModelList() []string

View File

@@ -55,6 +55,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
return request, nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
return adaptor.DoRequestHelper(a, c, meta, requestBody)
}

View File

@@ -32,6 +32,14 @@ func (a *Adaptor) GetRequestURL(meta *meta.Meta) (string, error) {
// https://{resource_name}.openai.azure.com/openai/deployments/dall-e-3/images/generations?api-version=2024-03-01-preview
fullRequestURL := fmt.Sprintf("%s/openai/deployments/%s/images/generations?api-version=%s", meta.BaseURL, meta.ActualModelName, meta.Config.APIVersion)
return fullRequestURL, nil
} else if meta.Mode == relaymode.AudioTranscription {
// https://learn.microsoft.com/en-us/azure/ai-services/openai/whisper-quickstart?tabs=command-line#rest-api
fullRequestURL := fmt.Sprintf("%s/openai/deployments/%s/audio/transcriptions?api-version=%s", meta.BaseURL, meta.ActualModelName, meta.Config.APIVersion)
return fullRequestURL, nil
} else if meta.Mode == relaymode.AudioSpeech {
// https://learn.microsoft.com/en-us/azure/ai-services/openai/text-to-speech-quickstart?tabs=command-line#rest-api
fullRequestURL := fmt.Sprintf("%s/openai/deployments/%s/audio/speech?api-version=%s", meta.BaseURL, meta.ActualModelName, meta.Config.APIVersion)
return fullRequestURL, nil
}
// https://learn.microsoft.com/en-us/azure/cognitive-services/openai/chatgpt-quickstart?pivots=rest-api&tabs=command-line#rest-api
@@ -57,6 +65,9 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Request, meta *me
adaptor.SetupCommonRequestHeader(c, req, meta)
if meta.ChannelType == channeltype.Azure {
req.Header.Set("api-key", meta.APIKey)
if meta.Mode == relaymode.AudioTranscription || meta.Mode == relaymode.AudioSpeech {
req.ContentLength = c.Request.ContentLength
}
return nil
}
req.Header.Set("Authorization", "Bearer "+meta.APIKey)
@@ -81,6 +92,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
return request, nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
return adaptor.DoRequestHelper(a, c, meta, requestBody)
}
@@ -100,6 +118,8 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
switch meta.Mode {
case relaymode.ImagesGenerations:
err, _ = ImageHandler(c, resp)
case relaymode.AudioSpeech:
err, _ = TextToSpeechHandler(c, resp)
default:
err, usage = Handler(c, resp, meta.PromptTokens, meta.ActualModelName)
}

View File

@@ -0,0 +1,26 @@
package openai
import (
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/relay/model"
"io"
"net/http"
)
func TextToSpeechHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithStatusCode, *model.Usage) {
var err error
for k, v := range resp.Header {
c.Writer.Header().Set(k, v[0])
}
c.Writer.WriteHeader(resp.StatusCode)
_, err = io.Copy(c.Writer, resp.Body)
if err != nil {
return ErrorWrapper(err, "copy_response_body_failed", http.StatusInternalServerError), nil
}
err = resp.Body.Close()
if err != nil {
return ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
}
return nil, nil
}

View File

@@ -43,6 +43,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
return request, nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
return adaptor.DoRequestHelper(a, c, meta, requestBody)
}

View File

@@ -65,6 +65,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
return request, nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
return adaptor.DoRequestHelper(a, c, meta, requestBody)
}

View File

@@ -46,6 +46,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
return request, nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
// xunfei's request is not http request, so we don't need to do anything here
dummyResp := &http.Response{}

View File

@@ -92,6 +92,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
return newRequest, nil
}
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
return adaptor.DoRequestHelper(a, c, meta, requestBody)
}