From 0103e980fcadab91258b274a4edce26533bc6764 Mon Sep 17 00:00:00 2001 From: "Laisky.Cai" Date: Thu, 6 Feb 2025 13:12:35 +0000 Subject: [PATCH] feat: add support for audio content in Message model and update README closes #2054 --- README.md | 2 +- relay/model/message.go | 68 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 37781eb7..8ff4b841 100644 --- a/README.md +++ b/README.md @@ -381,7 +381,7 @@ graph LR + 例子:`NODE_TYPE=slave` 9. `CHANNEL_UPDATE_FREQUENCY`:设置之后将定期更新渠道余额,单位为分钟,未设置则不进行更新。 + 例子:`CHANNEL_UPDATE_FREQUENCY=1440` -10. `CHANNEL_TEST_FREQUENCY`:设置之后将定期检查渠道,单位为分钟,未设置则不进行检查。 +10. `CHANNEL_TEST_FREQUENCY`:设置之后将定期检查渠道,单位为分钟,未设置则不进行检查。 +例子:`CHANNEL_TEST_FREQUENCY=1440` 11. `POLLING_INTERVAL`:批量更新渠道余额以及测试可用性时的请求间隔,单位为秒,默认无间隔。 + 例子:`POLLING_INTERVAL=5` diff --git a/relay/model/message.go b/relay/model/message.go index b908f989..bd30676b 100644 --- a/relay/model/message.go +++ b/relay/model/message.go @@ -1,11 +1,36 @@ package model +import ( + "context" + + "github.com/songquanpeng/one-api/common/logger" +) + type Message struct { - Role string `json:"role,omitempty"` - Content any `json:"content,omitempty"` - Name *string `json:"name,omitempty"` - ToolCalls []Tool `json:"tool_calls,omitempty"` - ToolCallId string `json:"tool_call_id,omitempty"` + Role string `json:"role,omitempty"` + // Content is a string or a list of objects + Content any `json:"content,omitempty"` + Name *string `json:"name,omitempty"` + ToolCalls []Tool `json:"tool_calls,omitempty"` + ToolCallId string `json:"tool_call_id,omitempty"` + Audio *messageAudio `json:"audio,omitempty"` + // ------------------------------------- + // Deepseek 专有的一些字段 + // https://api-docs.deepseek.com/api/create-chat-completion + // ------------------------------------- + // Prefix forces the model to begin its answer with the supplied prefix in the assistant message. + // To enable this feature, set base_url to "https://api.deepseek.com/beta". + Prefix *bool `json:"prefix,omitempty"` // ReasoningContent is Used for the deepseek-reasoner model in the Chat + // Prefix Completion feature as the input for the CoT in the last assistant message. + // When using this feature, the prefix parameter must be set to true. + ReasoningContent *string `json:"reasoning_content,omitempty"` +} + +type messageAudio struct { + Id string `json:"id"` + Data string `json:"data,omitempty"` + ExpiredAt int `json:"expired_at,omitempty"` + Transcript string `json:"transcript,omitempty"` } func (m Message) IsStringContent() bool { @@ -26,6 +51,7 @@ func (m Message) StringContent() string { if !ok { continue } + if contentMap["type"] == ContentTypeText { if subStr, ok := contentMap["text"].(string); ok { contentStr += subStr @@ -34,6 +60,7 @@ func (m Message) StringContent() string { } return contentStr } + return "" } @@ -47,6 +74,7 @@ func (m Message) ParseContent() []MessageContent { }) return contentList } + anyList, ok := m.Content.([]any) if ok { for _, contentItem := range anyList { @@ -71,8 +99,21 @@ func (m Message) ParseContent() []MessageContent { }, }) } + case ContentTypeInputAudio: + if subObj, ok := contentMap["input_audio"].(map[string]any); ok { + contentList = append(contentList, MessageContent{ + Type: ContentTypeInputAudio, + InputAudio: &InputAudio{ + Data: subObj["data"].(string), + Format: subObj["format"].(string), + }, + }) + } + default: + logger.Warnf(context.TODO(), "unknown content type: %s", contentMap["type"]) } } + return contentList } return nil @@ -84,7 +125,18 @@ type ImageURL struct { } type MessageContent struct { - Type string `json:"type,omitempty"` - Text string `json:"text"` - ImageURL *ImageURL `json:"image_url,omitempty"` + // Type should be one of the following: text/input_audio + Type string `json:"type,omitempty"` + Text string `json:"text"` + ImageURL *ImageURL `json:"image_url,omitempty"` + InputAudio *InputAudio `json:"input_audio,omitempty"` +} + +type InputAudio struct { + // Data is the base64 encoded audio data + Data string `json:"data" binding:"required"` + // Format is the audio format, should be one of the + // following: mp3/mp4/mpeg/mpga/m4a/wav/webm/pcm16. + // When stream=true, format should be pcm16 + Format string `json:"format"` }