增加语音合成功能

2026-02-10 08:24:26 +08:00 · 2025-04-01 17:03:51 +08:00
parent afb9193985
commit ff69cb231a
20 changed files with 216 additions and 88 deletions
--- a/api/handler/admin/chat_model_handler.go
+++ b/api/handler/admin/chat_model_handler.go
@@ -30,20 +30,21 @@ func NewChatModelHandler(app *core.AppServer, db *gorm.DB) *ChatModelHandler {

 func (h *ChatModelHandler) Save(c *gin.Context) {
 	var data struct {
-		Id          uint    `json:"id"`
-		Name        string  `json:"name"`
-		Value       string  `json:"value"`
-		Enabled     bool    `json:"enabled"`
-		SortNum     int     `json:"sort_num"`
-		Open        bool    `json:"open"`
-		Platform    string  `json:"platform"`
-		Power       int     `json:"power"`
-		MaxTokens   int     `json:"max_tokens"`  // 最大响应长度
-		MaxContext  int     `json:"max_context"` // 最大上下文长度
-		Temperature float32 `json:"temperature"` // 模型温度
-		KeyId       int     `json:"key_id,omitempty"`
-		CreatedAt   int64   `json:"created_at"`
-		Type        string  `json:"type"`
+		Id          uint              `json:"id"`
+		Name        string            `json:"name"`
+		Value       string            `json:"value"`
+		Enabled     bool              `json:"enabled"`
+		SortNum     int               `json:"sort_num"`
+		Open        bool              `json:"open"`
+		Platform    string            `json:"platform"`
+		Power       int               `json:"power"`
+		MaxTokens   int               `json:"max_tokens"`  // 最大响应长度
+		MaxContext  int               `json:"max_context"` // 最大上下文长度
+		Temperature float32           `json:"temperature"` // 模型温度
+		KeyId       int               `json:"key_id,omitempty"`
+		CreatedAt   int64             `json:"created_at"`
+		Type        string            `json:"type"`
+		Options     map[string]string `json:"options"`
 	}
 	if err := c.ShouldBindJSON(&data); err != nil {
 		resp.ERROR(c, types.InvalidArgs)
@@ -59,7 +60,6 @@ func (h *ChatModelHandler) Save(c *gin.Context) {
 	item.Name = data.Name
 	item.Value = data.Value
 	item.Enabled = data.Enabled
-	item.SortNum = data.SortNum
 	item.Open = data.Open
 	item.Power = data.Power
 	item.MaxTokens = data.MaxTokens
@@ -67,6 +67,7 @@ func (h *ChatModelHandler) Save(c *gin.Context) {
 	item.Temperature = data.Temperature
 	item.KeyId = data.KeyId
 	item.Type = data.Type
+	item.Options = utils.JsonEncode(data.Options)
 	var res *gorm.DB
 	if data.Id > 0 {
 		res = h.DB.Save(&item)
--- a/api/handler/chat_handler.go
+++ b/api/handler/chat_handler.go
@@ -25,6 +25,7 @@ import (
 	"io"
 	"net/http"
 	"net/url"
+	"os"
 	"strings"
 	"time"
 	"unicode/utf8"
@@ -505,47 +506,90 @@ func (h *ChatHandler) saveChatHistory(
 // 文本生成语音
 func (h *ChatHandler) TextToSpeech(c *gin.Context) {
 	var data struct {
-		Text string `json:"text"`
+		ModelId int    `json:"model_id"`
+		Text    string `json:"text"`
 	}
 	if err := c.ShouldBindJSON(&data); err != nil {
 		resp.ERROR(c, types.InvalidArgs)
 		return
 	}

-	// 调用 DeepSeek 的 API 接口
-	var apiKey model.ApiKey
-	h.DB.Where("type", "chat").Where("enabled", true).First(&apiKey)
-	if apiKey.Id == 0 {
-		resp.ERROR(c, "no available key, please import key")
+	textHash := utils.Sha256(fmt.Sprintf("%d/%s", data.ModelId, data.Text))
+	audioFile := fmt.Sprintf("%s/audio", h.App.Config.StaticDir)
+	if _, err := os.Stat(audioFile); err != nil {
+		os.MkdirAll(audioFile, 0755)
+	}
+	audioFile = fmt.Sprintf("%s/%s.mp3", audioFile, textHash)
+	if _, err := os.Stat(audioFile); err == nil {
+		// 设置响应头
+		c.Header("Content-Type", "audio/mpeg")
+		c.Header("Content-Disposition", "attachment; filename=speech.mp3")
+		c.File(audioFile)
 		return
 	}

+	// 查询模型
+	var chatModel model.ChatModel
+	err := h.DB.Where("id", data.ModelId).First(&chatModel).Error
+	if err != nil {
+		resp.ERROR(c, "找不到语音模型")
+		return
+	}
+
+	// 调用 DeepSeek 的 API 接口
+	var apiKey model.ApiKey
+	if chatModel.KeyId > 0 {
+		h.DB.Where("id", chatModel.KeyId).First(&apiKey)
+	}
+	if apiKey.Id == 0 {
+		h.DB.Where("type", "tts").Where("enabled", true).First(&apiKey)
+	}
+	if apiKey.Id == 0 {
+		resp.ERROR(c, "no TTS API key, please import key")
+		return
+	}
+
+	logger.Debugf("chatModel: %+v, apiKey: %+v", chatModel, apiKey)
+
 	// 调用 openai tts api
 	config := openai.DefaultConfig(apiKey.Value)
-	config.BaseURL = apiKey.ApiURL
+	config.BaseURL = apiKey.ApiURL + "/v1"
 	client := openai.NewClientWithConfig(config)
+	voice := openai.VoiceAlloy
+	var options map[string]string
+	err = utils.JsonDecode(chatModel.Options, &options)
+	if err == nil {
+		voice = openai.SpeechVoice(options["voice"])
+	}
 	req := openai.CreateSpeechRequest{
-		Model: openai.TTSModel1,
+		Model: openai.SpeechModel(chatModel.Value),
 		Input: data.Text,
-		Voice: openai.VoiceAlloy,
+		Voice: voice,
 	}

 	audioData, err := client.CreateSpeech(context.Background(), req)
 	if err != nil {
-		logger.Error("failed to create speech: ", err)
-		resp.ERROR(c, "failed to create speech")
+		resp.ERROR(c, err.Error())
 		return
 	}

+	// 先将音频数据读取到内存
+	audioBytes, err := io.ReadAll(audioData)
+	if err != nil {
+		resp.ERROR(c, err.Error())
+		return
+	}
+
+	// 保存到音频文件
+	err = os.WriteFile(audioFile, audioBytes, 0644)
+	if err != nil {
+		logger.Error("failed to save audio file: ", err)
+	}
+
 	// 设置响应头
 	c.Header("Content-Type", "audio/mpeg")
 	c.Header("Content-Disposition", "attachment; filename=speech.mp3")

-	// 将音频数据写入响应
-	_, err = io.Copy(c.Writer, audioData)
-	if err != nil {
-		logger.Error("failed to write audio data: ", err)
-		resp.ERROR(c, "failed to write audio data")
-		return
-	}
+	// 直接写入完整的音频数据到响应
+	c.Writer.Write(audioBytes)
 }
--- a/api/handler/chat_model_handler.go
+++ b/api/handler/chat_model_handler.go
@@ -30,14 +30,17 @@ func NewChatModelHandler(app *core.AppServer, db *gorm.DB) *ChatModelHandler {
 func (h *ChatModelHandler) List(c *gin.Context) {
 	var items []model.ChatModel
 	var chatModels = make([]vo.ChatModel, 0)
-	session := h.DB.Session(&gorm.Session{}).Where("type", "chat").Where("enabled", true)
+	session := h.DB.Session(&gorm.Session{}).Where("enabled", true)
 	t := c.Query("type")
+	logger.Info("type: ", t)
 	if t != "" {
 		session = session.Where("type", t)
+	} else {
+		session = session.Where("type", "chat")
 	}

 	session = session.Where("open", true)
-	if h.IsLogin(c) {
+	if h.IsLogin(c) && t == "chat" {
 		user, _ := h.GetLoginUser(c)
 		var models []int
 		err := utils.JsonDecode(user.ChatModels, &models)
@@ -48,7 +51,7 @@ func (h *ChatModelHandler) List(c *gin.Context) {

 	}

-	res := session.Order("sort_num ASC").Find(&items)
+	res := session.Debug().Order("sort_num ASC").Find(&items)
 	if res.Error == nil {
 		for _, item := range items {
 			var cm vo.ChatModel