feat: support gpt-4o-audio-preview

This commit is contained in:
CalciumIon 2024-11-04 15:27:12 +08:00
parent 8b8abfadaf
commit 139a104b26
5 changed files with 2715 additions and 2284 deletions

View File

@ -337,15 +337,15 @@ func GetCompletionRatio(name string) float64 {
name = "gpt-4o-gizmo-*" name = "gpt-4o-gizmo-*"
} }
if strings.HasPrefix(name, "gpt-4") && !strings.HasSuffix(name, "-all") && !strings.HasSuffix(name, "-gizmo-*") { if strings.HasPrefix(name, "gpt-4") && !strings.HasSuffix(name, "-all") && !strings.HasSuffix(name, "-gizmo-*") {
if strings.HasPrefix(name, "gpt-4-turbo") || strings.HasSuffix(name, "preview") {
return 3
}
if strings.HasPrefix(name, "gpt-4o") { if strings.HasPrefix(name, "gpt-4o") {
if name == "gpt-4o-2024-05-13" { if name == "gpt-4o-2024-05-13" {
return 3 return 3
} }
return 4 return 4
} }
if strings.HasPrefix(name, "gpt-4-turbo") || strings.HasSuffix(name, "preview") {
return 3
}
return 2 return 2
} }
if strings.HasPrefix(name, "o1-") { if strings.HasPrefix(name, "o1-") {

View File

@ -34,6 +34,8 @@ type GeneralOpenAIRequest struct {
LogProbs bool `json:"logprobs,omitempty"` LogProbs bool `json:"logprobs,omitempty"`
TopLogProbs int `json:"top_logprobs,omitempty"` TopLogProbs int `json:"top_logprobs,omitempty"`
Dimensions int `json:"dimensions,omitempty"` Dimensions int `json:"dimensions,omitempty"`
Modalities any `json:"modalities,omitempty"`
Audio any `json:"audio,omitempty"`
} }
type OpenAITools struct { type OpenAITools struct {
@ -83,9 +85,10 @@ type Message struct {
} }
type MediaMessage struct { type MediaMessage struct {
Type string `json:"type"` Type string `json:"type"`
Text string `json:"text"` Text string `json:"text"`
ImageUrl any `json:"image_url,omitempty"` ImageUrl any `json:"image_url,omitempty"`
InputAudio any `json:"input_audio,omitempty"`
} }
type MessageImageUrl struct { type MessageImageUrl struct {
@ -93,9 +96,15 @@ type MessageImageUrl struct {
Detail string `json:"detail"` Detail string `json:"detail"`
} }
type MessageInputAudio struct {
Data string `json:"data"` //base64
Format string `json:"format"`
}
const ( const (
ContentTypeText = "text" ContentTypeText = "text"
ContentTypeImageURL = "image_url" ContentTypeImageURL = "image_url"
ContentTypeInputAudio = "input_audio"
) )
func (m Message) StringContent() string { func (m Message) StringContent() string {
@ -168,11 +177,19 @@ func (m Message) ParseContent() []MediaMessage {
}, },
}) })
} }
case ContentTypeInputAudio:
if subObj, ok := contentMap["input_audio"].(map[string]any); ok {
contentList = append(contentList, MediaMessage{
Type: ContentTypeInputAudio,
InputAudio: MessageInputAudio{
Data: subObj["data"].(string),
Format: subObj["format"].(string),
},
})
}
} }
} }
return contentList return contentList
} }
return nil return nil
} }

View File

@ -223,7 +223,7 @@ func CountTokenMessages(messages []dto.Message, model string, stream bool) (int,
} else { } else {
arrayContent := message.ParseContent() arrayContent := message.ParseContent()
for _, m := range arrayContent { for _, m := range arrayContent {
if m.Type == "image_url" { if m.Type == dto.ContentTypeImageURL {
imageUrl := m.ImageUrl.(dto.MessageImageUrl) imageUrl := m.ImageUrl.(dto.MessageImageUrl)
imageTokenNum, err := getImageToken(&imageUrl, model, stream) imageTokenNum, err := getImageToken(&imageUrl, model, stream)
if err != nil { if err != nil {
@ -231,6 +231,9 @@ func CountTokenMessages(messages []dto.Message, model string, stream bool) (int,
} }
tokenNum += imageTokenNum tokenNum += imageTokenNum
log.Printf("image token num: %d", imageTokenNum) log.Printf("image token num: %d", imageTokenNum)
} else if m.Type == dto.ContentTypeInputAudio {
// TODO: 音频token数量计算
tokenNum += 100
} else { } else {
tokenNum += getTokenNum(tokenEncoder, m.Text) tokenNum += getTokenNum(tokenEncoder, m.Text)
} }

File diff suppressed because it is too large Load Diff