Add performance and reliability QA gates

This commit is contained in:
huanghuoguoguo
2026-06-25 00:07:37 +08:00
parent 74a18191dd
commit 67437c2f5a
31 changed files with 2299 additions and 25 deletions
+35
View File
@@ -1,5 +1,7 @@
import { existsSync } from "node:fs";
import { spawnSync } from "node:child_process";
import { Socket } from "node:net";
import { join } from "node:path";
import type { CommandContext } from "../types.ts";
import { parseOptions } from "../cli.ts";
import { loadEnv } from "../fs.ts";
@@ -88,6 +90,37 @@ function compareProxyPair(env: Record<string, string>, upper: string, lower: str
return null;
}
function envValue(env: Record<string, string>, key: string): string {
return process.env[key] ?? env[key] ?? "";
}
function activeSocksProxy(env: Record<string, string>): { key: string; value: string } | null {
for (const key of ["ALL_PROXY", "all_proxy", "HTTPS_PROXY", "https_proxy", "HTTP_PROXY", "http_proxy"]) {
const value = envValue(env, key);
if (/^socks/i.test(value)) return { key, value };
}
return null;
}
function checkSocksio(env: Record<string, string>): string | null {
const proxy = activeSocksProxy(env);
if (!proxy) return null;
const repo = env.LANGBOT_REPO;
const python = repo ? join(repo, ".venv", "bin", "python") : "";
if (!python || !existsSync(python)) {
return `SOCKS proxy ${proxy.key} is configured (${redactEnvValue(proxy.key, proxy.value)}), but LangBot venv python was not found; after creating the venv, verify it can import socksio.`;
}
const result = spawnSync(python, ["-c", "import socksio"], {
encoding: "utf8",
timeout: 5000,
});
if (result.status === 0) return null;
return `SOCKS proxy ${proxy.key} is configured (${redactEnvValue(proxy.key, proxy.value)}), but ${python} cannot import socksio; run \`${python} -m pip install socksio\` or start LangBot without SOCKS proxy env.`;
}
export async function commandEnvDoctor(ctx: CommandContext): Promise<number> {
const env = loadEnv(ctx.root);
const failures: string[] = [];
@@ -117,6 +150,8 @@ export async function commandEnvDoctor(ctx: CommandContext): Promise<number> {
]) {
if (mismatch) failures.push(mismatch);
}
const socksioFailure = checkSocksio(env);
if (socksioFailure) failures.push(socksioFailure);
for (const [label, result] of await Promise.all([
checkUrl("LANGBOT_BACKEND_URL", env.LANGBOT_BACKEND_URL).then((result) => ["LANGBOT_BACKEND_URL", result] as const),
+44 -3
View File
@@ -465,6 +465,41 @@ function outputTail(value: string | Buffer | null | undefined): string {
return String(value ?? "").trim().slice(-4000);
}
function exitStatusFromResultStatus(status: string): number {
if (status === "pass") return 0;
if (status === "blocked" || status === "env_issue" || status === "flaky") return 2;
return 1;
}
function executionStatusFromExitStatus(status: number): string {
if (status === 0) return "ok";
if (status === 2) return "classified";
return "nonzero";
}
function executionFromCaseResultFile(caseItem: Record<string, unknown>): Record<string, unknown> | null {
const resultPath = join(String(caseItem.evidence_dir), "result.json");
if (!existsSync(resultPath)) return null;
try {
const parsed = JSON.parse(readFileSync(resultPath, "utf8")) as Record<string, unknown>;
if (
parsed.case_id !== caseItem.id ||
parsed.run_id !== caseItem.run_id ||
typeof parsed.status !== "string"
) return null;
const exitStatus = exitStatusFromResultStatus(parsed.status);
return {
status: executionStatusFromExitStatus(exitStatus),
exit_status: exitStatus,
reason: typeof parsed.reason === "string" ? parsed.reason : "result.json completed",
result_status: parsed.status,
result_json: resultPath,
};
} catch {
return null;
}
}
function executionProblemStatus(executions: Array<Record<string, unknown>>): string {
const statuses = executions.map((item) => String(item.status));
if (statuses.includes("nonzero")) return "fail";
@@ -523,12 +558,18 @@ export function commandSuiteRun(ctx: CommandContext): number {
encoding: "utf8",
stdio: options.json === true ? "pipe" : "inherit",
});
const status = result.error ? 1 : result.status ?? 1;
const fileExecution = result.error ? executionFromCaseResultFile(caseItem) : null;
const status = typeof fileExecution?.exit_status === "number"
? fileExecution.exit_status
: result.error ? 1 : result.status ?? 1;
executions.push({
id: caseItem.id,
status: status === 0 ? "ok" : "nonzero",
status: fileExecution?.status ?? executionStatusFromExitStatus(status),
exit_status: status,
reason: result.error?.message || "",
reason: fileExecution?.reason ?? result.error?.message ?? "",
result_status: fileExecution?.result_status,
result_json: fileExecution?.result_json,
spawn_error: fileExecution && result.error ? result.error.message : undefined,
stdout: outputTail(result.stdout),
stderr: outputTail(result.stderr),
});
+95 -14
View File
@@ -271,7 +271,7 @@ function reportTemplate(mode: string): Record<string, string> {
target_tested: "Probe target, endpoint, file, command, or service actually checked",
execution_path: "automation script | shell command | direct API | other",
probe_result: "What the probe observed",
logs_or_artifacts: "Log, filesystem, API, or other artifact paths collected",
metrics_or_artifacts: "Metrics, logs, filesystem artifacts, traces, or profiles collected",
diagnostics: "Extra diagnostics used, if any",
matched_troubleshooting: "Troubleshooting ids matched, if any",
assets_to_update: "New case/reference/troubleshooting entries to add",
@@ -320,7 +320,7 @@ function manualEvidenceTemplate(mode: string): ManualEvidenceTemplate {
target_tested: "TODO: probe target, endpoint, file, command, or service actually checked",
execution_path: "TODO: automation script | shell command | direct API | other",
probe_result: "TODO: observed probe result",
logs_or_artifacts: "TODO: evidence paths or skipped reason",
metrics_or_artifacts: "TODO: metrics, logs, filesystem artifacts, traces, or profiles collected",
diagnostics: "TODO: additional diagnostics used, if any",
matched_troubleshooting: "TODO: troubleshooting ids matched, if any",
assets_to_update: "TODO: case/reference/troubleshooting updates to make",
@@ -1099,6 +1099,41 @@ function executionTail(value: string | Buffer | null | undefined): string {
return String(value ?? "").trim().slice(-4000);
}
function exitStatusFromResultStatus(status: string): number {
if (status === "pass") return 0;
if (status === "blocked" || status === "env_issue" || status === "flaky") return 2;
return 1;
}
function executionStatusFromExitStatus(status: number): string {
if (status === 0) return "ok";
if (status === 2) return "classified";
return "nonzero";
}
function executionFromAutomationResultFile(
evidenceDir: string,
caseId: string,
runId: string,
): { status: string; exit_status: number; reason: string; result_status: string; path: string } | null {
const resultPath = join(evidenceDir, "automation-result.json");
if (!existsSync(resultPath)) return null;
try {
const parsed = JSON.parse(readFileSync(resultPath, "utf8")) as Record<string, unknown>;
if (parsed.case_id !== caseId || parsed.run_id !== runId || typeof parsed.status !== "string") return null;
const exitStatus = exitStatusFromResultStatus(parsed.status);
return {
status: executionStatusFromExitStatus(exitStatus),
exit_status: exitStatus,
reason: typeof parsed.reason === "string" ? parsed.reason : "automation-result.json completed",
result_status: parsed.status,
path: resultPath,
};
} catch {
return null;
}
}
function runSetupAutomation(
ctx: CommandContext,
item: StructuredItem,
@@ -1224,6 +1259,30 @@ export function commandTestRun(ctx: CommandContext): number {
});
if (result.error) {
const fileExecution = executionFromAutomationResultFile(
run.automation.evidence_dir,
String(run.case.id),
run.run_id,
);
if (fileExecution) {
if (options.json !== true) {
console.error(`WARN: automation spawn reported an error, but ${fileExecution.path} completed: ${result.error.message}`);
}
if (options.json === true) {
console.log(JSON.stringify({
run,
setup_executions: setupExecutions,
automation_execution: {
...fileExecution,
spawn_error: result.error.message,
stdout: executionTail(result.stdout),
stderr: executionTail(result.stderr),
},
exit_status: fileExecution.exit_status,
}, null, 2));
}
return fileExecution.exit_status;
}
if (options.json !== true) console.error(`ERROR: failed to run automation: ${result.error.message}`);
if (options.json === true) {
console.log(JSON.stringify({
@@ -1247,7 +1306,7 @@ export function commandTestRun(ctx: CommandContext): number {
run,
setup_executions: setupExecutions,
automation_execution: {
status: status === 0 ? "ok" : "nonzero",
status: executionStatusFromExitStatus(status),
exit_status: status,
stdout: executionTail(result.stdout),
stderr: executionTail(result.stderr),
@@ -1311,6 +1370,7 @@ function renderMarkdownReport(report: TestReport): string {
const environment = report.environment;
const logGuard = report.log_guard;
const troubleshooting = report.troubleshooting;
const automation = report.automation_result;
const lines: string[] = [];
lines.push(`# Test Report: ${reportCase.id}`);
@@ -1323,20 +1383,41 @@ function renderMarkdownReport(report: TestReport): string {
lines.push(`Type: ${reportCase.type}`);
lines.push("");
lines.push("## Result");
lines.push(`- result: ${evidence.result}`);
for (const [key, value] of Object.entries(evidence)) {
if (key !== "result") lines.push(`- ${key}: ${value}`);
if (automation.status === "loaded" && automation.result) {
lines.push(`- result: ${automation.result}`);
if (automation.reason) lines.push(`- reason: ${automation.reason}`);
if (automation.url) lines.push(`- target_tested: ${automation.url}`);
if (automation.path) lines.push(`- automation_result: ${automation.path}`);
if (automation.artifacts) lines.push(`- artifacts: ${JSON.stringify(automation.artifacts)}`);
} else {
lines.push(`- result: ${evidence.result}`);
for (const [key, value] of Object.entries(evidence)) {
if (key !== "result") lines.push(`- ${key}: ${value}`);
}
}
lines.push("");
lines.push("## Automation Result");
lines.push(`- status: ${report.automation_result.status}`);
if (report.automation_result.path) lines.push(`- path: ${report.automation_result.path}`);
if (report.automation_result.result) lines.push(`- result: ${report.automation_result.result}`);
if (report.automation_result.reason) lines.push(`- reason: ${report.automation_result.reason}`);
if (report.automation_result.started_at_local) lines.push(`- started_at_local: ${report.automation_result.started_at_local}`);
if (report.automation_result.finished_at_local) lines.push(`- finished_at_local: ${report.automation_result.finished_at_local}`);
if (report.automation_result.url) lines.push(`- url: ${report.automation_result.url}`);
if (report.automation_result.expected_text) lines.push(`- expected_text: ${report.automation_result.expected_text}`);
lines.push(`- status: ${automation.status}`);
if (automation.path) lines.push(`- path: ${automation.path}`);
if (automation.result) lines.push(`- result: ${automation.result}`);
if (automation.reason) lines.push(`- reason: ${automation.reason}`);
if (automation.duration_ms !== undefined) lines.push(`- duration_ms: ${automation.duration_ms}`);
if (automation.started_at_local) lines.push(`- started_at_local: ${automation.started_at_local}`);
if (automation.finished_at_local) lines.push(`- finished_at_local: ${automation.finished_at_local}`);
if (automation.url) lines.push(`- url: ${automation.url}`);
if (automation.expected_text) lines.push(`- expected_text: ${automation.expected_text}`);
if (automation.metrics_summary) {
lines.push("- metrics_summary:");
lines.push(` ${JSON.stringify(automation.metrics_summary)}`);
}
if (automation.thresholds_summary) {
lines.push("- thresholds_summary:");
lines.push(` ${JSON.stringify(automation.thresholds_summary)}`);
}
if (automation.artifacts) {
lines.push("- artifacts:");
lines.push(` ${JSON.stringify(automation.artifacts)}`);
}
lines.push("");
lines.push("## Environment");
for (const [key, value] of Object.entries(environment)) lines.push(`- ${key}=${value}`);
+3
View File
@@ -126,6 +126,9 @@ function validateCaseItem(root: string, item: StructuredItem, skillNames: Set<st
...validateEnvKeyScalar(item, "automation_pipeline_url_env"),
...validateEnvKeyScalar(item, "automation_pipeline_name_env"),
...validateJsonScalar(item, "automation_filesystem_checks_json"),
...validateJsonScalar(item, "metrics_thresholds_json"),
...validateJsonScalar(item, "load_profile_json"),
...validateJsonScalar(item, "fault_model_json"),
...listValue(item.fields, "setup_automation").flatMap((entry) => (
validateSetupAutomationEntry(root, entry, caseIds).map((error) => `${item.path}: ${error}`)
)),
+27 -2
View File
@@ -9,7 +9,18 @@ export const requiredEnvKeys = [
];
export const caseModeValues = ["agent-browser", "probe"];
export const caseTypeValues = ["smoke", "regression", "feature", "provider", "exploratory"];
export const caseTypeValues = [
"smoke",
"regression",
"feature",
"provider",
"exploratory",
"contract",
"performance",
"reliability",
"chaos",
"security",
];
export const casePriorityValues = ["p0", "p1", "p2"];
export const caseRiskValues = ["low", "medium", "high"];
export const caseEvidenceValues = [
@@ -21,10 +32,24 @@ export const caseEvidenceValues = [
"frontend_log",
"api_diagnostic",
"filesystem",
"metrics",
"trace",
"profile",
"resource_log",
];
export const testResultStatusValues = ["pass", "fail", "blocked", "env_issue", "flaky"];
export const troubleshootingCategoryValues = ["product", "env_issue", "external_dependency", "blocked", "flaky"];
export const suiteTypeValues = ["smoke", "regression", "release_gate", "exploratory"];
export const suiteTypeValues = [
"smoke",
"regression",
"release_gate",
"exploratory",
"contract",
"performance",
"reliability",
"chaos",
"security",
];
export const suiteRequiredStrings = ["id", "title", "description", "type", "priority"];
export const suiteRequiredLists = ["tags", "cases"];
+20
View File
@@ -91,6 +91,7 @@ export type AutomationResultEvidence = {
path?: string;
result?: string;
reason?: string;
duration_ms?: number;
started_at?: string;
started_at_local?: string;
finished_at?: string;
@@ -98,6 +99,9 @@ export type AutomationResultEvidence = {
url?: string;
prompt?: string;
expected_text?: string;
metrics_summary?: Record<string, unknown>;
thresholds_summary?: Record<string, unknown>;
artifacts?: Record<string, unknown>;
};
type MutableScanState = {
@@ -594,6 +598,18 @@ function stringField(data: Record<string, unknown>, key: string): string | undef
return typeof value === "string" && value.trim() ? value : undefined;
}
function numberField(data: Record<string, unknown>, key: string): number | undefined {
const value = data[key];
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
}
function objectField(data: Record<string, unknown>, key: string): Record<string, unknown> | undefined {
const value = data[key];
return value && typeof value === "object" && !Array.isArray(value)
? value as Record<string, unknown>
: undefined;
}
function evidenceDirFromOptions(options: Record<string, string | boolean>): string | undefined {
const explicit = typeof options["evidence-dir"] === "string" ? options["evidence-dir"] : undefined;
if (explicit) return resolve(explicit);
@@ -628,6 +644,7 @@ export function readAutomationResultEvidence(options: Record<string, string | bo
path: resultPath,
result: stringField(result, "status"),
reason: stringField(result, "reason"),
duration_ms: numberField(result, "duration_ms"),
started_at: stringField(result, "started_at"),
started_at_local: stringField(result, "started_at_local"),
finished_at: stringField(result, "finished_at"),
@@ -635,6 +652,9 @@ export function readAutomationResultEvidence(options: Record<string, string | bo
url: stringField(result, "url"),
prompt: redactSecrets(stringField(result, "prompt") ?? ""),
expected_text: stringField(result, "expected_text"),
metrics_summary: objectField(result, "metrics_summary"),
thresholds_summary: objectField(result, "thresholds_summary"),
artifacts: objectField(result, "artifacts"),
};
} catch (error) {
return { status: "invalid", path: resultPath, reason: String(error) };
+2
View File
@@ -114,6 +114,8 @@ export function automationEnvDefaults(item: StructuredItem, env: EnvSource = pro
["automation_expected_runner_id", "LANGBOT_E2E_EXPECTED_RUNNER_ID"],
["automation_reset_debug_chat", "LANGBOT_E2E_RESET_DEBUG_CHAT"],
["automation_debug_chat_session_type", "LANGBOT_E2E_DEBUG_CHAT_SESSION_TYPE"],
["automation_debug_chat_response_p95_ms", "LANGBOT_E2E_DEBUG_CHAT_RESPONSE_P95_MS"],
["automation_debug_chat_max_error_rate", "LANGBOT_E2E_DEBUG_CHAT_MAX_ERROR_RATE"],
["automation_filesystem_checks_json", "LANGBOT_E2E_FILESYSTEM_CHECKS_JSON"],
["automation_plugin_package", "LANGBOT_E2E_PLUGIN_PACKAGE"],
["automation_expected_plugin_id", "LANGBOT_E2E_EXPECTED_PLUGIN_ID"],