feat: support llm chat on replicate

This commit is contained in:
Laisky.Cai
2024-12-19 03:14:32 +00:00
parent 4c86e7c506
commit 502cf3315d
9 changed files with 376 additions and 28 deletions

View File

@@ -245,6 +245,25 @@ var ModelRatio = map[string]float64{
"stability-ai/stable-diffusion-3.5-large": 0.065 * USD,
"stability-ai/stable-diffusion-3.5-large-turbo": 0.04 * USD,
"stability-ai/stable-diffusion-3.5-medium": 0.035 * USD,
// replicate chat models
"ibm-granite/granite-20b-code-instruct-8k": 0.100 * USD,
"ibm-granite/granite-3.0-2b-instruct": 0.030 * USD,
"ibm-granite/granite-3.0-8b-instruct": 0.050 * USD,
"ibm-granite/granite-8b-code-instruct-128k": 0.050 * USD,
"meta/llama-2-13b": 0.100 * USD,
"meta/llama-2-13b-chat": 0.100 * USD,
"meta/llama-2-70b": 0.650 * USD,
"meta/llama-2-70b-chat": 0.650 * USD,
"meta/llama-2-7b": 0.050 * USD,
"meta/llama-2-7b-chat": 0.050 * USD,
"meta/meta-llama-3.1-405b-instruct": 9.500 * USD,
"meta/meta-llama-3-70b": 0.650 * USD,
"meta/meta-llama-3-70b-instruct": 0.650 * USD,
"meta/meta-llama-3-8b": 0.050 * USD,
"meta/meta-llama-3-8b-instruct": 0.050 * USD,
"mistralai/mistral-7b-instruct-v0.2": 0.050 * USD,
"mistralai/mistral-7b-v0.1": 0.050 * USD,
"mistralai/mixtral-8x7b-instruct-v0.1": 0.300 * USD,
}
var CompletionRatio = map[string]float64{
@@ -402,6 +421,7 @@ func GetCompletionRatio(name string, channelType int) float64 {
if strings.HasPrefix(name, "deepseek-") {
return 2
}
switch name {
case "llama2-70b-4096":
return 0.8 / 0.64
@@ -417,6 +437,35 @@ func GetCompletionRatio(name string, channelType int) float64 {
return 5
case "grok-beta":
return 3
// Replicate Models
// https://replicate.com/pricing
case "ibm-granite/granite-20b-code-instruct-8k":
return 5
case "ibm-granite/granite-3.0-2b-instruct":
return 8.333333333333334
case "ibm-granite/granite-3.0-8b-instruct",
"ibm-granite/granite-8b-code-instruct-128k":
return 5
case "meta/llama-2-13b",
"meta/llama-2-13b-chat",
"meta/llama-2-7b",
"meta/llama-2-7b-chat",
"meta/meta-llama-3-8b",
"meta/meta-llama-3-8b-instruct":
return 5
case "meta/llama-2-70b",
"meta/llama-2-70b-chat",
"meta/meta-llama-3-70b",
"meta/meta-llama-3-70b-instruct":
return 2.750 / 0.650 // ≈4.230769
case "meta/meta-llama-3.1-405b-instruct":
return 1
case "mistralai/mistral-7b-instruct-v0.2",
"mistralai/mistral-7b-v0.1":
return 5
case "mistralai/mixtral-8x7b-instruct-v0.1":
return 1.000 / 0.300 // ≈3.333333
}
return 1
}