diff --git a/common/model-ratio.go b/common/model-ratio.go index d4ea88b..898837d 100644 --- a/common/model-ratio.go +++ b/common/model-ratio.go @@ -31,10 +31,11 @@ var defaultModelRatio = map[string]float64{ "gpt-4-32k": 30, "gpt-4-32k-0314": 30, "gpt-4-32k-0613": 30, - "gpt-4o-mini": 0.075, // $0.00015 / 1K tokens - "gpt-4o-mini-2024-07-18": 0.075, + "gpt-4o-mini": 0.075, // $0.00015 / 1K tokens + "gpt-4o-mini-2024-07-18": 0.075, "gpt-4o": 2.5, // $0.005 / 1K tokens "gpt-4o-2024-05-13": 2.5, // $0.005 / 1K tokens + "gpt-4o-2024-08-06": 1.25, // $0.01 / 1K tokens "gpt-4-turbo": 5, // $0.01 / 1K tokens "gpt-4-turbo-2024-04-09": 5, // $0.01 / 1K tokens "gpt-4-1106-preview": 5, // $0.01 / 1K tokens @@ -74,13 +75,13 @@ var defaultModelRatio = map[string]float64{ "text-search-ada-doc-001": 10, "text-moderation-stable": 0.1, "text-moderation-latest": 0.1, - "claude-instant-1": 0.4, // $0.8 / 1M tokens - "claude-2.0": 4, // $8 / 1M tokens - "claude-2.1": 4, // $8 / 1M tokens - "claude-3-haiku-20240307": 0.125, // $0.25 / 1M tokens - "claude-3-5-sonnet-20240620": 1.5, // $3 / 1M tokens - "claude-3-sonnet-20240229": 1.5, // $3 / 1M tokens - "claude-3-opus-20240229": 7.5, // $15 / 1M tokens + "claude-instant-1": 0.4, // $0.8 / 1M tokens + "claude-2.0": 4, // $8 / 1M tokens + "claude-2.1": 4, // $8 / 1M tokens + "claude-3-haiku-20240307": 0.125, // $0.25 / 1M tokens + "claude-3-5-sonnet-20240620": 1.5, // $3 / 1M tokens + "claude-3-sonnet-20240229": 1.5, // $3 / 1M tokens + "claude-3-opus-20240229": 7.5, // $15 / 1M tokens "ERNIE-4.0-8K": 0.120 * RMB, "ERNIE-3.5-8K": 0.012 * RMB, "ERNIE-3.5-8K-0205": 0.024 * RMB, @@ -333,7 +334,7 @@ func GetCompletionRatio(name string) float64 { return 4.0 / 3.0 } if strings.HasPrefix(name, "gpt-4") && name != "gpt-4-all" && name != "gpt-4-gizmo-*" { - if strings.HasPrefix(name, "gpt-4o-mini") { + if strings.HasPrefix(name, "gpt-4o-mini") || "gpt-4o-2024-08-06" == name { return 4 } diff --git a/dto/text_request.go b/dto/text_request.go index f2edf6a..7efe5ba 100644 --- a/dto/text_request.go +++ b/dto/text_request.go @@ -2,40 +2,37 @@ package dto import "encoding/json" -type ResponseFormat struct { - Type string `json:"type,omitempty"` -} - type GeneralOpenAIRequest struct { - Model string `json:"model,omitempty"` - Messages []Message `json:"messages,omitempty"` - Prompt any `json:"prompt,omitempty"` - BestOf int `json:"best_of,omitempty"` - Echo bool `json:"echo,omitempty"` - Stream bool `json:"stream,omitempty"` - StreamOptions *StreamOptions `json:"stream_options,omitempty"` - Suffix string `json:"suffix,omitempty"` - MaxTokens uint `json:"max_tokens,omitempty"` - Temperature float64 `json:"temperature,omitempty"` - TopP float64 `json:"top_p,omitempty"` - TopK int `json:"top_k,omitempty"` - Stop any `json:"stop,omitempty"` - N int `json:"n,omitempty"` - Input any `json:"input,omitempty"` - Instruction string `json:"instruction,omitempty"` - Size string `json:"size,omitempty"` - Functions any `json:"functions,omitempty"` - FrequencyPenalty float64 `json:"frequency_penalty,omitempty"` - PresencePenalty float64 `json:"presence_penalty,omitempty"` - ResponseFormat *ResponseFormat `json:"response_format,omitempty"` - Seed float64 `json:"seed,omitempty"` - Tools []ToolCall `json:"tools,omitempty"` - ToolChoice any `json:"tool_choice,omitempty"` - User string `json:"user,omitempty"` - LogitBias any `json:"logit_bias,omitempty"` - LogProbs any `json:"logprobs,omitempty"` - TopLogProbs int `json:"top_logprobs,omitempty"` - Dimensions int `json:"dimensions,omitempty"` + Model string `json:"model,omitempty"` + Messages []Message `json:"messages,omitempty"` + Prompt any `json:"prompt,omitempty"` + BestOf int `json:"best_of,omitempty"` + Echo bool `json:"echo,omitempty"` + Stream bool `json:"stream,omitempty"` + StreamOptions *StreamOptions `json:"stream_options,omitempty"` + Suffix string `json:"suffix,omitempty"` + MaxTokens uint `json:"max_tokens,omitempty"` + Temperature float64 `json:"temperature,omitempty"` + TopP float64 `json:"top_p,omitempty"` + TopK int `json:"top_k,omitempty"` + Stop any `json:"stop,omitempty"` + N int `json:"n,omitempty"` + Input any `json:"input,omitempty"` + Instruction string `json:"instruction,omitempty"` + Size string `json:"size,omitempty"` + Functions any `json:"functions,omitempty"` + FrequencyPenalty float64 `json:"frequency_penalty,omitempty"` + PresencePenalty float64 `json:"presence_penalty,omitempty"` + ResponseFormat any `json:"response_format,omitempty"` + Seed float64 `json:"seed,omitempty"` + Tools []ToolCall `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` + User string `json:"user,omitempty"` + LogitBias any `json:"logit_bias,omitempty"` + LogProbs any `json:"logprobs,omitempty"` + TopLogProbs int `json:"top_logprobs,omitempty"` + Dimensions int `json:"dimensions,omitempty"` + ParallelToolCalls bool `json:"parallel_Tool_Calls,omitempty"` } type OpenAITools struct { diff --git a/relay/channel/ollama/dto.go b/relay/channel/ollama/dto.go index 4f99a24..fac6b7f 100644 --- a/relay/channel/ollama/dto.go +++ b/relay/channel/ollama/dto.go @@ -3,18 +3,18 @@ package ollama import "one-api/dto" type OllamaRequest struct { - Model string `json:"model,omitempty"` - Messages []dto.Message `json:"messages,omitempty"` - Stream bool `json:"stream,omitempty"` - Temperature float64 `json:"temperature,omitempty"` - Seed float64 `json:"seed,omitempty"` - Topp float64 `json:"top_p,omitempty"` - TopK int `json:"top_k,omitempty"` - Stop any `json:"stop,omitempty"` - Tools []dto.ToolCall `json:"tools,omitempty"` - ResponseFormat *dto.ResponseFormat `json:"response_format,omitempty"` - FrequencyPenalty float64 `json:"frequency_penalty,omitempty"` - PresencePenalty float64 `json:"presence_penalty,omitempty"` + Model string `json:"model,omitempty"` + Messages []dto.Message `json:"messages,omitempty"` + Stream bool `json:"stream,omitempty"` + Temperature float64 `json:"temperature,omitempty"` + Seed float64 `json:"seed,omitempty"` + Topp float64 `json:"top_p,omitempty"` + TopK int `json:"top_k,omitempty"` + Stop any `json:"stop,omitempty"` + Tools []dto.ToolCall `json:"tools,omitempty"` + ResponseFormat any `json:"response_format,omitempty"` + FrequencyPenalty float64 `json:"frequency_penalty,omitempty"` + PresencePenalty float64 `json:"presence_penalty,omitempty"` } type OllamaEmbeddingRequest struct {