enable o3-mini-high, optimize timeout for thinking models

This commit is contained in:
Davidlasky 2025-04-01 17:47:16 -05:00
parent e30d90714b
commit 106db97f8c
2 changed files with 16 additions and 9 deletions

View File

@ -241,18 +241,18 @@ export class ChatGPTApi implements LLMApi {
// O1 使用 max_completion_tokens 控制token数 (https://platform.openai.com/docs/guides/reasoning#controlling-costs)
if (isO1OrO3) {
requestPayload["max_completion_tokens"] = 25000;
requestPayload["max_completion_tokens"] = modelConfig.max_tokens;
}
if (isO3) {
requestPayload["reasoning_effort"] = "high";
// make o3-mini defaults to high reasoning effort
}
// add max_tokens to vision model
if (visionModel) {
if (isO1) {
requestPayload["max_completion_tokens"] = modelConfig.max_tokens;
} else {
requestPayload["max_tokens"] = Math.max(modelConfig.max_tokens, 4000);
}
@ -297,6 +297,11 @@ export class ChatGPTApi implements LLMApi {
isDalle3 ? OpenaiPath.ImagePath : OpenaiPath.ChatPath,
);
}
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
getTimeoutMSByModel(options.config.model),
);
if (shouldStream) {
let index = -1;
const [tools, funcs] = usePluginStore
@ -404,12 +409,6 @@ export class ChatGPTApi implements LLMApi {
headers: getHeaders(),
};
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
getTimeoutMSByModel(options.config.model),
);
const res = await fetch(chatPath, chatPayload);
clearTimeout(requestTimeoutId);

View File

@ -306,8 +306,16 @@ export function getTimeoutMSByModel(model: string) {
model.includes("deepseek-r") ||
model.includes("-thinking") ||
model.includes("pro")
)
) {
console.log(
"thinking model is " +
model +
" timeout is " +
REQUEST_TIMEOUT_MS_FOR_THINKING,
);
return REQUEST_TIMEOUT_MS_FOR_THINKING;
}
console.log("normal model is " + model + " timeout is " + REQUEST_TIMEOUT_MS);
return REQUEST_TIMEOUT_MS;
}