Add performance and reliability QA gates

This commit is contained in:
huanghuoguoguo
2026-06-25 00:07:37 +08:00
parent 74a18191dd
commit 67437c2f5a
31 changed files with 2299 additions and 25 deletions
+79 -1
View File
@@ -54,6 +54,7 @@ const debugChatSessionType = env.LANGBOT_E2E_DEBUG_CHAT_SESSION_TYPE || "person"
const pipelineConfigDiagnosticPath = resolve(paths.evidenceDir, "pipeline-config-diagnostic.json");
const debugChatResetDiagnosticPath = resolve(paths.evidenceDir, "debug-chat-reset-diagnostic.json");
const pipelineConfigRestoreDiagnosticPath = resolve(paths.evidenceDir, "pipeline-config-restore-diagnostic.json");
const metricsPath = resolve(paths.evidenceDir, "metrics.json");
const startedAt = new Date();
let browser;
@@ -80,10 +81,11 @@ let result = {
console_log: paths.consoleLog,
network_log: paths.networkLog,
screenshot: paths.screenshot,
metrics_json: metricsPath,
automation_result_json: paths.automationResultJson,
result_json: paths.resultJson,
},
evidence_collected: ["ui", "screenshot", "console", "network"],
evidence_collected: ["ui", "screenshot", "console", "network", "metrics"],
};
function boolFromEnv(value, defaultValue) {
@@ -103,6 +105,29 @@ function parseJsonEnv(key, fallback) {
}
}
function positiveNumberEnv(key, fallback) {
const value = Number(env[key] || "");
return Number.isFinite(value) && value >= 0 ? value : fallback;
}
function percentile(values, percentileValue) {
if (values.length === 0) return 0;
const sorted = [...values].sort((a, b) => a - b);
const index = Math.min(sorted.length - 1, Math.ceil((percentileValue / 100) * sorted.length) - 1);
return Number(sorted[index].toFixed(3));
}
function stats(values) {
if (values.length === 0) return { min: 0, p50: 0, p95: 0, p99: 0, max: 0 };
return {
min: Number(Math.min(...values).toFixed(3)),
p50: percentile(values, 50),
p95: percentile(values, 95),
p99: percentile(values, 99),
max: Number(Math.max(...values).toFixed(3)),
};
}
function promptStepsFromEnv() {
const rawSteps = parseJsonEnv("LANGBOT_E2E_PROMPTS_JSON", null);
if (rawSteps === null) {
@@ -658,6 +683,7 @@ try {
} else {
for (let index = 0; index < promptSteps.length; index += 1) {
const step = promptSteps[index];
const promptStartedAt = Date.now();
const chatResult = await runDebugChatPrompt(page, {
prompt: step.prompt,
expectedText: step.expectedText,
@@ -665,11 +691,13 @@ try {
imagePath: index === 0 ? imagePath : "",
failureSignals: failureSignals.length > 0 ? failureSignals : undefined,
});
const promptDurationMs = Date.now() - promptStartedAt;
result.chat_results.push({
index,
expected_text: step.expectedText,
status: chatResult.status,
reason: chatResult.reason,
response_duration_ms: promptDurationMs,
min_expected_count: chatResult.min_expected_count,
final_count: chatResult.final_count,
before_assistant_expected_count: chatResult.before_assistant_expected_count,
@@ -714,6 +742,56 @@ try {
const finishedAt = new Date();
result.finished_at = finishedAt.toISOString();
result.finished_at_local = localIsoWithOffset(finishedAt);
result.duration_ms = finishedAt.getTime() - startedAt.getTime();
const responseDurations = result.chat_results
.map((item) => item.response_duration_ms)
.filter((value) => Number.isFinite(value));
const passedPrompts = result.chat_results.filter((item) => item.status === "pass").length;
const attemptedPrompts = result.chat_results.length;
const errorRate = attemptedPrompts === 0 ? 1 : Number(((attemptedPrompts - passedPrompts) / attemptedPrompts).toFixed(4));
const responseStats = stats(responseDurations);
const responseP95BudgetMs = positiveNumberEnv(
"LANGBOT_E2E_DEBUG_CHAT_RESPONSE_P95_MS",
positiveNumberEnv("LANGBOT_DEBUG_CHAT_RESPONSE_P95_MS", safeResponseTimeoutMs),
);
const maxErrorRate = positiveNumberEnv("LANGBOT_E2E_DEBUG_CHAT_MAX_ERROR_RATE", 0);
const metrics = {
probe: caseId,
url: result.url,
prompt_count: result.prompt_count,
attempted_prompt_count: attemptedPrompts,
passed_prompt_count: passedPrompts,
error_rate: errorRate,
response_duration_ms: responseStats,
total_duration_ms: result.duration_ms,
chat_results: result.chat_results,
};
result.metrics_summary = {
prompt_count: metrics.prompt_count,
attempted_prompt_count: metrics.attempted_prompt_count,
passed_prompt_count: metrics.passed_prompt_count,
error_rate: metrics.error_rate,
response_p50_ms: metrics.response_duration_ms.p50,
response_p95_ms: metrics.response_duration_ms.p95,
total_duration_ms: metrics.total_duration_ms,
};
result.thresholds_summary = {
response_p95_ms: {
actual: metrics.response_duration_ms.p95,
max: responseP95BudgetMs,
pass: attemptedPrompts > 0 && metrics.response_duration_ms.p95 <= responseP95BudgetMs,
},
error_rate: {
actual: metrics.error_rate,
max: maxErrorRate,
pass: metrics.error_rate <= maxErrorRate,
},
};
await writeFile(metricsPath, `${JSON.stringify(metrics, null, 2)}\n`, "utf8");
if (result.status === "pass" && !Object.values(result.thresholds_summary).every((item) => item.pass)) {
result.status = "fail";
result.reason = "Debug Chat performance breached response latency or error-rate thresholds.";
}
const existingEvidence = {};
for (const [key, value] of Object.entries(result.evidence)) {
if (typeof value !== "string") continue;