增加语音合成功能

This commit is contained in:
RockYang
2025-04-01 17:03:51 +08:00
parent afb9193985
commit ff69cb231a
20 changed files with 216 additions and 88 deletions

View File

@@ -30,20 +30,21 @@ func NewChatModelHandler(app *core.AppServer, db *gorm.DB) *ChatModelHandler {
func (h *ChatModelHandler) Save(c *gin.Context) {
var data struct {
Id uint `json:"id"`
Name string `json:"name"`
Value string `json:"value"`
Enabled bool `json:"enabled"`
SortNum int `json:"sort_num"`
Open bool `json:"open"`
Platform string `json:"platform"`
Power int `json:"power"`
MaxTokens int `json:"max_tokens"` // 最大响应长度
MaxContext int `json:"max_context"` // 最大上下文长度
Temperature float32 `json:"temperature"` // 模型温度
KeyId int `json:"key_id,omitempty"`
CreatedAt int64 `json:"created_at"`
Type string `json:"type"`
Id uint `json:"id"`
Name string `json:"name"`
Value string `json:"value"`
Enabled bool `json:"enabled"`
SortNum int `json:"sort_num"`
Open bool `json:"open"`
Platform string `json:"platform"`
Power int `json:"power"`
MaxTokens int `json:"max_tokens"` // 最大响应长度
MaxContext int `json:"max_context"` // 最大上下文长度
Temperature float32 `json:"temperature"` // 模型温度
KeyId int `json:"key_id,omitempty"`
CreatedAt int64 `json:"created_at"`
Type string `json:"type"`
Options map[string]string `json:"options"`
}
if err := c.ShouldBindJSON(&data); err != nil {
resp.ERROR(c, types.InvalidArgs)
@@ -59,7 +60,6 @@ func (h *ChatModelHandler) Save(c *gin.Context) {
item.Name = data.Name
item.Value = data.Value
item.Enabled = data.Enabled
item.SortNum = data.SortNum
item.Open = data.Open
item.Power = data.Power
item.MaxTokens = data.MaxTokens
@@ -67,6 +67,7 @@ func (h *ChatModelHandler) Save(c *gin.Context) {
item.Temperature = data.Temperature
item.KeyId = data.KeyId
item.Type = data.Type
item.Options = utils.JsonEncode(data.Options)
var res *gorm.DB
if data.Id > 0 {
res = h.DB.Save(&item)

View File

@@ -25,6 +25,7 @@ import (
"io"
"net/http"
"net/url"
"os"
"strings"
"time"
"unicode/utf8"
@@ -505,47 +506,90 @@ func (h *ChatHandler) saveChatHistory(
// 文本生成语音
func (h *ChatHandler) TextToSpeech(c *gin.Context) {
var data struct {
Text string `json:"text"`
ModelId int `json:"model_id"`
Text string `json:"text"`
}
if err := c.ShouldBindJSON(&data); err != nil {
resp.ERROR(c, types.InvalidArgs)
return
}
// 调用 DeepSeek 的 API 接口
var apiKey model.ApiKey
h.DB.Where("type", "chat").Where("enabled", true).First(&apiKey)
if apiKey.Id == 0 {
resp.ERROR(c, "no available key, please import key")
textHash := utils.Sha256(fmt.Sprintf("%d/%s", data.ModelId, data.Text))
audioFile := fmt.Sprintf("%s/audio", h.App.Config.StaticDir)
if _, err := os.Stat(audioFile); err != nil {
os.MkdirAll(audioFile, 0755)
}
audioFile = fmt.Sprintf("%s/%s.mp3", audioFile, textHash)
if _, err := os.Stat(audioFile); err == nil {
// 设置响应头
c.Header("Content-Type", "audio/mpeg")
c.Header("Content-Disposition", "attachment; filename=speech.mp3")
c.File(audioFile)
return
}
// 查询模型
var chatModel model.ChatModel
err := h.DB.Where("id", data.ModelId).First(&chatModel).Error
if err != nil {
resp.ERROR(c, "找不到语音模型")
return
}
// 调用 DeepSeek 的 API 接口
var apiKey model.ApiKey
if chatModel.KeyId > 0 {
h.DB.Where("id", chatModel.KeyId).First(&apiKey)
}
if apiKey.Id == 0 {
h.DB.Where("type", "tts").Where("enabled", true).First(&apiKey)
}
if apiKey.Id == 0 {
resp.ERROR(c, "no TTS API key, please import key")
return
}
logger.Debugf("chatModel: %+v, apiKey: %+v", chatModel, apiKey)
// 调用 openai tts api
config := openai.DefaultConfig(apiKey.Value)
config.BaseURL = apiKey.ApiURL
config.BaseURL = apiKey.ApiURL + "/v1"
client := openai.NewClientWithConfig(config)
voice := openai.VoiceAlloy
var options map[string]string
err = utils.JsonDecode(chatModel.Options, &options)
if err == nil {
voice = openai.SpeechVoice(options["voice"])
}
req := openai.CreateSpeechRequest{
Model: openai.TTSModel1,
Model: openai.SpeechModel(chatModel.Value),
Input: data.Text,
Voice: openai.VoiceAlloy,
Voice: voice,
}
audioData, err := client.CreateSpeech(context.Background(), req)
if err != nil {
logger.Error("failed to create speech: ", err)
resp.ERROR(c, "failed to create speech")
resp.ERROR(c, err.Error())
return
}
// 先将音频数据读取到内存
audioBytes, err := io.ReadAll(audioData)
if err != nil {
resp.ERROR(c, err.Error())
return
}
// 保存到音频文件
err = os.WriteFile(audioFile, audioBytes, 0644)
if err != nil {
logger.Error("failed to save audio file: ", err)
}
// 设置响应头
c.Header("Content-Type", "audio/mpeg")
c.Header("Content-Disposition", "attachment; filename=speech.mp3")
// 音频数据写入响应
_, err = io.Copy(c.Writer, audioData)
if err != nil {
logger.Error("failed to write audio data: ", err)
resp.ERROR(c, "failed to write audio data")
return
}
// 直接写入完整的音频数据响应
c.Writer.Write(audioBytes)
}

View File

@@ -30,14 +30,17 @@ func NewChatModelHandler(app *core.AppServer, db *gorm.DB) *ChatModelHandler {
func (h *ChatModelHandler) List(c *gin.Context) {
var items []model.ChatModel
var chatModels = make([]vo.ChatModel, 0)
session := h.DB.Session(&gorm.Session{}).Where("type", "chat").Where("enabled", true)
session := h.DB.Session(&gorm.Session{}).Where("enabled", true)
t := c.Query("type")
logger.Info("type: ", t)
if t != "" {
session = session.Where("type", t)
} else {
session = session.Where("type", "chat")
}
session = session.Where("open", true)
if h.IsLogin(c) {
if h.IsLogin(c) && t == "chat" {
user, _ := h.GetLoginUser(c)
var models []int
err := utils.JsonDecode(user.ChatModels, &models)
@@ -48,7 +51,7 @@ func (h *ChatModelHandler) List(c *gin.Context) {
}
res := session.Order("sort_num ASC").Find(&items)
res := session.Debug().Order("sort_num ASC").Find(&items)
if res.Error == nil {
for _, item := range items {
var cm vo.ChatModel