new-api/dto/realtime.go
1808837298@qq.com 74f9006b40 feat: realtime
(cherry picked from commit d4966246e68dbdcdab45ec5c5141362834d74425)
2024-11-05 19:27:47 +08:00

98 lines
3.8 KiB
Go

package dto
const (
RealtimeEventTypeError = "error"
RealtimeEventTypeSessionUpdate = "session.update"
RealtimeEventTypeConversationCreate = "conversation.item.create"
RealtimeEventTypeResponseCreate = "response.create"
RealtimeEventInputAudioBufferAppend = "input_audio_buffer.append"
)
const (
RealtimeEventTypeResponseDone = "response.done"
RealtimeEventTypeSessionUpdated = "session.updated"
RealtimeEventTypeSessionCreated = "session.created"
RealtimeEventResponseAudioDelta = "response.audio.delta"
RealtimeEventResponseAudioTranscriptionDelta = "response.audio_transcript.delta"
RealtimeEventResponseFunctionCallArgumentsDelta = "response.function_call_arguments.delta"
RealtimeEventResponseFunctionCallArgumentsDone = "response.function_call_arguments.done"
RealtimeEventConversationItemCreated = "conversation.item.created"
)
type RealtimeEvent struct {
EventId string `json:"event_id"`
Type string `json:"type"`
//PreviousItemId string `json:"previous_item_id"`
Session *RealtimeSession `json:"session,omitempty"`
Item *RealtimeItem `json:"item,omitempty"`
Error *OpenAIError `json:"error,omitempty"`
Response *RealtimeResponse `json:"response,omitempty"`
Delta string `json:"delta,omitempty"`
Audio string `json:"audio,omitempty"`
}
type RealtimeResponse struct {
Usage *RealtimeUsage `json:"usage"`
}
type RealtimeUsage struct {
TotalTokens int `json:"total_tokens"`
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
InputTokenDetails InputTokenDetails `json:"input_token_details"`
OutputTokenDetails OutputTokenDetails `json:"output_token_details"`
}
type InputTokenDetails struct {
CachedTokens int `json:"cached_tokens"`
TextTokens int `json:"text_tokens"`
AudioTokens int `json:"audio_tokens"`
}
type OutputTokenDetails struct {
TextTokens int `json:"text_tokens"`
AudioTokens int `json:"audio_tokens"`
}
type RealtimeSession struct {
Modalities []string `json:"modalities"`
Instructions string `json:"instructions"`
Voice string `json:"voice"`
InputAudioFormat string `json:"input_audio_format"`
OutputAudioFormat string `json:"output_audio_format"`
InputAudioTranscription InputAudioTranscription `json:"input_audio_transcription"`
TurnDetection interface{} `json:"turn_detection"`
Tools []RealTimeTool `json:"tools"`
ToolChoice string `json:"tool_choice"`
Temperature float64 `json:"temperature"`
//MaxResponseOutputTokens int `json:"max_response_output_tokens"`
}
type InputAudioTranscription struct {
Model string `json:"model"`
}
type RealTimeTool struct {
Type string `json:"type"`
Name string `json:"name"`
Description string `json:"description"`
Parameters any `json:"parameters"`
}
type RealtimeItem struct {
Id string `json:"id"`
Type string `json:"type"`
Status string `json:"status"`
Role string `json:"role"`
Content []RealtimeContent `json:"content"`
Name *string `json:"name,omitempty"`
ToolCalls any `json:"tool_calls,omitempty"`
CallId string `json:"call_id,omitempty"`
}
type RealtimeContent struct {
Type string `json:"type"`
Text string `json:"text,omitempty"`
Audio string `json:"audio,omitempty"` // Base64-encoded audio bytes.
Transcript string `json:"transcript,omitempty"`
}