[feat] ollama chat support image input.

This commit is contained in:
dmd 2024-06-16 21:02:52 +08:00
parent 51b83722e7
commit 2378ab7930
2 changed files with 131 additions and 44 deletions

View File

@ -15,7 +15,9 @@ import (
"fmt" "fmt"
"html/template" "html/template"
"io" "io"
"io/ioutil"
"net/http" "net/http"
"regexp"
"strings" "strings"
"time" "time"
"unicode/utf8" "unicode/utf8"
@ -28,24 +30,35 @@ import (
"geekai/utils" "geekai/utils"
) )
type ollamaResp struct { // ChatResponse is the response returned by [Client.Chat]. Its fields are
Id string `json:"id"` // similar to [GenerateResponse].
type ChatResponse struct {
Model string `json:"model"` Model string `json:"model"`
CreatedAt time.Time `json:"created_at"`
Message types.Message `json:"message"`
DoneReason string `json:"done_reason,omitempty"`
CreatedAt string `json:"created_at"`
Response string `json:"response"`
Done bool `json:"done"` Done bool `json:"done"`
DoneReason string `json:"done_reason"`
Context []int `json:"context"`
TotalDuration int64 `json:"total_duration"` // 生成响应所花费的总时间 Metrics
LoadDuration int64 `json:"load_duration"` // 以纳秒为单位加载模型所花费的时间
PromptEvalCount int `json:"prompt_eval_count"` // 提示文本中的标记tokens数量
PromptEvalDuration int64 `json:"prompt_eval_duration"` // 以纳秒为单位评估提示文本所花费的时间
EvalCount int64 `json:"eval_count"` // 生成响应中的标记数量
EvalDuration int64 `json:"eval_duration"` // 以纳秒为单位生成响应所花费的时间
} }
type Metrics struct {
TotalDuration time.Duration `json:"total_duration,omitempty"`
LoadDuration time.Duration `json:"load_duration,omitempty"`
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
EvalCount int `json:"eval_count,omitempty"`
EvalDuration time.Duration `json:"eval_duration,omitempty"`
}
type Message struct {
types.Message
Images []ImageData `json:"images,omitempty"`
}
type ImageData []byte
// 通义千问消息发送实现 // 通义千问消息发送实现
func (h *ChatHandler) sendOllamaMessage( func (h *ChatHandler) sendOllamaMessage(
chatCtx []types.Message, chatCtx []types.Message,
@ -62,13 +75,15 @@ func (h *ChatHandler) sendOllamaMessage(
//var apiKey = model.ApiKey{} //var apiKey = model.ApiKey{}
//response, err := h.doRequest(ctx, req, session, &apiKey) //response, err := h.doRequest(ctx, req, session, &apiKey)
response, err := h.sendOllamaRequest(session, prompt) response, err := h.sendOllamaRequest(chatCtx, session, prompt)
defer response.Body.Close()
logger.Info("HTTP请求完成耗时", time.Now().Sub(start)) logger.Info("HTTP请求完成耗时", time.Now().Sub(start))
if err != nil { if err != nil {
h.processError(err, prompt, ws) h.processError(err, prompt, ws)
return nil
} else {
defer response.Body.Close()
} }
contentType := response.Header.Get("Content-Type") contentType := response.Header.Get("Content-Type")
@ -85,27 +100,67 @@ func (h *ChatHandler) sendOllamaMessage(
return nil return nil
} }
func (h *ChatHandler) sendOllamaRequest(session *types.ChatSession, prompt string) (*http.Response, error) { func (h *ChatHandler) sendOllamaRequest(chatCtx []types.Message, session *types.ChatSession, prompt string) (*http.Response, error) {
apiKey, err := h.queryApiKey(session) apiKey, err := h.queryApiKey(session)
if err != nil { if err != nil {
return nil, err return nil, err
} }
// todo add context to request body chatCtx = append(chatCtx, types.Message{
Role: "user",
Content: prompt,
})
messages := make([]Message, 0)
for _, ctx := range chatCtx {
if ctx.Role == "" {
continue
}
m := Message{
Message: ctx,
}
url := h.parseURL(ctx.Content)
if url != "" {
encode, err := h.downImgAndBase64Encode(url)
if err != nil {
logger.Infof("img url convert to binary err%s, will not send image to ollama", err)
continue
}
m.Content = strings.Replace(ctx.Content, url, "", 1)
m.Images = []ImageData{encode}
}
messages = append(messages, m)
}
postData := map[string]interface{}{ postData := map[string]interface{}{
"model": session.Model.Value, "model": session.Model.Value,
"stream": true, "stream": true,
"prompt": prompt, "messages": messages,
"options": map[string]interface{}{
"temperature": session.Model.Temperature,
},
} }
headers := map[string]string{ headers := map[string]string{
"Content-Type": "application/json", "Content-Type": "application/json",
"Authorization": "Bearer " + apiKey.Value, }
// 兼容ollama原生11343端口与ollama webui api-key的方式
if strings.HasPrefix(apiKey.Value, "sk-") {
headers["Authorization"] = "Bearer " + apiKey.Value
} }
ro := &grequests.RequestOptions{ ro := &grequests.RequestOptions{
JSON: postData, JSON: postData,
Headers: headers, Headers: headers,
} }
requestBody, err := json.Marshal(postData)
if err != nil {
return nil, err
}
logger.Debugf("ollama request body: %s", string(requestBody))
resp, err := grequests.Post(apiKey.ApiURL, ro) resp, err := grequests.Post(apiKey.ApiURL, ro)
if err != nil { if err != nil {
return nil, err return nil, err
@ -145,31 +200,29 @@ func (h *ChatHandler) processOllamaStreamResponse(
// 记录回复时间 // 记录回复时间
replyCreatedAt := time.Now() replyCreatedAt := time.Now()
// 循环读取 Chunk 消息
var message = types.Message{}
scanner := bufio.NewScanner(response.Body) scanner := bufio.NewScanner(response.Body)
var contents = make([]string, 0)
var content string var content string
var replyTokens int var outPutStart = true
// 循环读取 返回 消息
for scanner.Scan() { for scanner.Scan() {
var resp ollamaResp var resp ChatResponse
line := scanner.Text() line := scanner.Text()
err := utils.JsonDecode(line, &resp) err := utils.JsonDecode(line, &resp)
if err != nil { if err != nil {
logger.Error("error with parse data line: ", content) logger.Error("error with parse data line: ", line)
utils.ReplyMessage(ws, fmt.Sprintf("**解析数据行失败:%s**", err)) utils.ReplyMessage(ws, fmt.Sprintf("**解析数据行失败:%s**", err))
break break
} }
if resp.Done == true && resp.DoneReason == "stop" { if resp.Done == true && resp.DoneReason == "stop" {
utils.ReplyChunkMessage(ws, types.WsMessage{Type: types.WsEnd}) utils.ReplyChunkMessage(ws, types.WsMessage{Type: types.WsEnd})
message.Content = utils.InterfaceToString(resp.Context)
replyTokens = resp.PromptEvalCount
// 消息发送成功后做记录工作 // 消息发送成功后做记录工作
h.recordInfoAfterSendMessage(chatCtx, req, userVo, message, prompt, session, role, promptCreatedAt, replyTokens, replyCreatedAt) h.recordInfoAfterSendMessage(chatCtx, req, userVo, prompt, session, role, promptCreatedAt, replyCreatedAt, strings.Join(contents, ""))
break break
} else if resp.Done == true && resp.DoneReason != "stop" { } else if resp.Done == true && resp.DoneReason != "stop" {
@ -177,15 +230,20 @@ func (h *ChatHandler) processOllamaStreamResponse(
break break
} }
if len(resp.Id) > 0 { if len(contents) == 0 && outPutStart {
logger.Infof("开始输出消息:%s", resp.Message.Content)
utils.ReplyChunkMessage(ws, types.WsMessage{Type: types.WsStart}) utils.ReplyChunkMessage(ws, types.WsMessage{Type: types.WsStart})
outPutStart = false
} }
if len(resp.Response) > 0 { if len(resp.Message.Content) > 0 {
utils.ReplyChunkMessage(ws, types.WsMessage{ utils.ReplyChunkMessage(ws, types.WsMessage{
Type: types.WsMiddle, Type: types.WsMiddle,
Content: utils.InterfaceToString(resp.Response), Content: utils.InterfaceToString(resp.Message.Content),
}) })
content += resp.Message.Content
contents = append(contents, resp.Message.Content)
} }
} }
@ -217,11 +275,11 @@ func (h *ChatHandler) processOllamaJsonResponse(response *http.Response, ws *typ
return nil return nil
} }
func (h *ChatHandler) recordInfoAfterSendMessage(chatCtx []types.Message, req types.ApiRequest, userVo vo.User, message types.Message, prompt string, session *types.ChatSession, role model.ChatRole, promptCreatedAt time.Time, replyTokens int, replyCreatedAt time.Time) { func (h *ChatHandler) recordInfoAfterSendMessage(chatCtx []types.Message, req types.ApiRequest, userVo vo.User,
if message.Role == "" { prompt string, session *types.ChatSession, role model.ChatRole,
message.Role = "assistant" promptCreatedAt time.Time, replyCreatedAt time.Time, content string) {
}
message := types.Message{Role: "assistant", Content: content}
useMsg := types.Message{Role: "user", Content: prompt} useMsg := types.Message{Role: "user", Content: prompt}
// 更新上下文消息,如果是调用函数则不需要更新上下文 // 更新上下文消息,如果是调用函数则不需要更新上下文
@ -257,16 +315,16 @@ func (h *ChatHandler) recordInfoAfterSendMessage(chatCtx []types.Message, req ty
// for reply // for reply
// 计算本次对话消耗的总 token 数量 // 计算本次对话消耗的总 token 数量
//totalTokens := replyTokens + getTotalTokens(req) replyTokens, _ := utils.CalcTokens(message.Content, req.Model)
// todo rebuild the tokens totalTokens := replyTokens + getTotalTokens(req)
historyReplyMsg := model.ChatMessage{ historyReplyMsg := model.ChatMessage{
UserId: userVo.Id, UserId: userVo.Id,
ChatId: session.ChatId, ChatId: session.ChatId,
RoleId: role.Id, RoleId: role.Id,
Type: types.ReplyMsg, Type: types.ReplyMsg,
Icon: role.Icon, Icon: role.Icon,
Content: message.Content, Content: content,
Tokens: replyTokens, Tokens: totalTokens,
UseContext: true, UseContext: true,
Model: req.Model, Model: req.Model,
} }
@ -313,3 +371,32 @@ func (h *ChatHandler) processError(err error, prompt string, ws *types.WsClient)
utils.ReplyMessage(ws, ErrImg) utils.ReplyMessage(ws, ErrImg)
return return
} }
func (h *ChatHandler) downImgAndBase64Encode(url string) ([]byte, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, errors.New("download img failed")
}
return ioutil.ReadAll(resp.Body)
}
func (h *ChatHandler) parseURL(input string) string {
// 正则表达式模式匹配包含 HTTP URL 的字符串
regexStr := `(?i)\b((https?://|www\.)[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|]\.(jpg|jpeg|png|gif|bmp|webp))`
// 创建正则表达式对象,并验证输入字符串是否以 URL 开始
re := regexp.MustCompile(regexStr)
matches := re.FindStringSubmatch(input)
if len(matches) > 0 {
return matches[0] // 返回第一个匹配的URL
}
return ""
}

View File

@ -177,7 +177,7 @@ const platforms = ref([
{ {
name: "【Meta】Ollama", name: "【Meta】Ollama",
value: "Ollama", value: "Ollama",
api_url: "http://localhost:8080/ollama/api/generate" api_url: "http://localhost:8080/ollama/api/chat"
}, },
]) ])
const types = ref([ const types = ref([