mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-26 23:44:19 +00:00
Add performance and reliability QA gates
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
import { existsSync } from "node:fs";
|
||||
import { spawnSync } from "node:child_process";
|
||||
import { Socket } from "node:net";
|
||||
import { join } from "node:path";
|
||||
import type { CommandContext } from "../types.ts";
|
||||
import { parseOptions } from "../cli.ts";
|
||||
import { loadEnv } from "../fs.ts";
|
||||
@@ -88,6 +90,37 @@ function compareProxyPair(env: Record<string, string>, upper: string, lower: str
|
||||
return null;
|
||||
}
|
||||
|
||||
function envValue(env: Record<string, string>, key: string): string {
|
||||
return process.env[key] ?? env[key] ?? "";
|
||||
}
|
||||
|
||||
function activeSocksProxy(env: Record<string, string>): { key: string; value: string } | null {
|
||||
for (const key of ["ALL_PROXY", "all_proxy", "HTTPS_PROXY", "https_proxy", "HTTP_PROXY", "http_proxy"]) {
|
||||
const value = envValue(env, key);
|
||||
if (/^socks/i.test(value)) return { key, value };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function checkSocksio(env: Record<string, string>): string | null {
|
||||
const proxy = activeSocksProxy(env);
|
||||
if (!proxy) return null;
|
||||
|
||||
const repo = env.LANGBOT_REPO;
|
||||
const python = repo ? join(repo, ".venv", "bin", "python") : "";
|
||||
if (!python || !existsSync(python)) {
|
||||
return `SOCKS proxy ${proxy.key} is configured (${redactEnvValue(proxy.key, proxy.value)}), but LangBot venv python was not found; after creating the venv, verify it can import socksio.`;
|
||||
}
|
||||
|
||||
const result = spawnSync(python, ["-c", "import socksio"], {
|
||||
encoding: "utf8",
|
||||
timeout: 5000,
|
||||
});
|
||||
if (result.status === 0) return null;
|
||||
|
||||
return `SOCKS proxy ${proxy.key} is configured (${redactEnvValue(proxy.key, proxy.value)}), but ${python} cannot import socksio; run \`${python} -m pip install socksio\` or start LangBot without SOCKS proxy env.`;
|
||||
}
|
||||
|
||||
export async function commandEnvDoctor(ctx: CommandContext): Promise<number> {
|
||||
const env = loadEnv(ctx.root);
|
||||
const failures: string[] = [];
|
||||
@@ -117,6 +150,8 @@ export async function commandEnvDoctor(ctx: CommandContext): Promise<number> {
|
||||
]) {
|
||||
if (mismatch) failures.push(mismatch);
|
||||
}
|
||||
const socksioFailure = checkSocksio(env);
|
||||
if (socksioFailure) failures.push(socksioFailure);
|
||||
|
||||
for (const [label, result] of await Promise.all([
|
||||
checkUrl("LANGBOT_BACKEND_URL", env.LANGBOT_BACKEND_URL).then((result) => ["LANGBOT_BACKEND_URL", result] as const),
|
||||
|
||||
@@ -465,6 +465,41 @@ function outputTail(value: string | Buffer | null | undefined): string {
|
||||
return String(value ?? "").trim().slice(-4000);
|
||||
}
|
||||
|
||||
function exitStatusFromResultStatus(status: string): number {
|
||||
if (status === "pass") return 0;
|
||||
if (status === "blocked" || status === "env_issue" || status === "flaky") return 2;
|
||||
return 1;
|
||||
}
|
||||
|
||||
function executionStatusFromExitStatus(status: number): string {
|
||||
if (status === 0) return "ok";
|
||||
if (status === 2) return "classified";
|
||||
return "nonzero";
|
||||
}
|
||||
|
||||
function executionFromCaseResultFile(caseItem: Record<string, unknown>): Record<string, unknown> | null {
|
||||
const resultPath = join(String(caseItem.evidence_dir), "result.json");
|
||||
if (!existsSync(resultPath)) return null;
|
||||
try {
|
||||
const parsed = JSON.parse(readFileSync(resultPath, "utf8")) as Record<string, unknown>;
|
||||
if (
|
||||
parsed.case_id !== caseItem.id ||
|
||||
parsed.run_id !== caseItem.run_id ||
|
||||
typeof parsed.status !== "string"
|
||||
) return null;
|
||||
const exitStatus = exitStatusFromResultStatus(parsed.status);
|
||||
return {
|
||||
status: executionStatusFromExitStatus(exitStatus),
|
||||
exit_status: exitStatus,
|
||||
reason: typeof parsed.reason === "string" ? parsed.reason : "result.json completed",
|
||||
result_status: parsed.status,
|
||||
result_json: resultPath,
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function executionProblemStatus(executions: Array<Record<string, unknown>>): string {
|
||||
const statuses = executions.map((item) => String(item.status));
|
||||
if (statuses.includes("nonzero")) return "fail";
|
||||
@@ -523,12 +558,18 @@ export function commandSuiteRun(ctx: CommandContext): number {
|
||||
encoding: "utf8",
|
||||
stdio: options.json === true ? "pipe" : "inherit",
|
||||
});
|
||||
const status = result.error ? 1 : result.status ?? 1;
|
||||
const fileExecution = result.error ? executionFromCaseResultFile(caseItem) : null;
|
||||
const status = typeof fileExecution?.exit_status === "number"
|
||||
? fileExecution.exit_status
|
||||
: result.error ? 1 : result.status ?? 1;
|
||||
executions.push({
|
||||
id: caseItem.id,
|
||||
status: status === 0 ? "ok" : "nonzero",
|
||||
status: fileExecution?.status ?? executionStatusFromExitStatus(status),
|
||||
exit_status: status,
|
||||
reason: result.error?.message || "",
|
||||
reason: fileExecution?.reason ?? result.error?.message ?? "",
|
||||
result_status: fileExecution?.result_status,
|
||||
result_json: fileExecution?.result_json,
|
||||
spawn_error: fileExecution && result.error ? result.error.message : undefined,
|
||||
stdout: outputTail(result.stdout),
|
||||
stderr: outputTail(result.stderr),
|
||||
});
|
||||
|
||||
+95
-14
@@ -271,7 +271,7 @@ function reportTemplate(mode: string): Record<string, string> {
|
||||
target_tested: "Probe target, endpoint, file, command, or service actually checked",
|
||||
execution_path: "automation script | shell command | direct API | other",
|
||||
probe_result: "What the probe observed",
|
||||
logs_or_artifacts: "Log, filesystem, API, or other artifact paths collected",
|
||||
metrics_or_artifacts: "Metrics, logs, filesystem artifacts, traces, or profiles collected",
|
||||
diagnostics: "Extra diagnostics used, if any",
|
||||
matched_troubleshooting: "Troubleshooting ids matched, if any",
|
||||
assets_to_update: "New case/reference/troubleshooting entries to add",
|
||||
@@ -320,7 +320,7 @@ function manualEvidenceTemplate(mode: string): ManualEvidenceTemplate {
|
||||
target_tested: "TODO: probe target, endpoint, file, command, or service actually checked",
|
||||
execution_path: "TODO: automation script | shell command | direct API | other",
|
||||
probe_result: "TODO: observed probe result",
|
||||
logs_or_artifacts: "TODO: evidence paths or skipped reason",
|
||||
metrics_or_artifacts: "TODO: metrics, logs, filesystem artifacts, traces, or profiles collected",
|
||||
diagnostics: "TODO: additional diagnostics used, if any",
|
||||
matched_troubleshooting: "TODO: troubleshooting ids matched, if any",
|
||||
assets_to_update: "TODO: case/reference/troubleshooting updates to make",
|
||||
@@ -1099,6 +1099,41 @@ function executionTail(value: string | Buffer | null | undefined): string {
|
||||
return String(value ?? "").trim().slice(-4000);
|
||||
}
|
||||
|
||||
function exitStatusFromResultStatus(status: string): number {
|
||||
if (status === "pass") return 0;
|
||||
if (status === "blocked" || status === "env_issue" || status === "flaky") return 2;
|
||||
return 1;
|
||||
}
|
||||
|
||||
function executionStatusFromExitStatus(status: number): string {
|
||||
if (status === 0) return "ok";
|
||||
if (status === 2) return "classified";
|
||||
return "nonzero";
|
||||
}
|
||||
|
||||
function executionFromAutomationResultFile(
|
||||
evidenceDir: string,
|
||||
caseId: string,
|
||||
runId: string,
|
||||
): { status: string; exit_status: number; reason: string; result_status: string; path: string } | null {
|
||||
const resultPath = join(evidenceDir, "automation-result.json");
|
||||
if (!existsSync(resultPath)) return null;
|
||||
try {
|
||||
const parsed = JSON.parse(readFileSync(resultPath, "utf8")) as Record<string, unknown>;
|
||||
if (parsed.case_id !== caseId || parsed.run_id !== runId || typeof parsed.status !== "string") return null;
|
||||
const exitStatus = exitStatusFromResultStatus(parsed.status);
|
||||
return {
|
||||
status: executionStatusFromExitStatus(exitStatus),
|
||||
exit_status: exitStatus,
|
||||
reason: typeof parsed.reason === "string" ? parsed.reason : "automation-result.json completed",
|
||||
result_status: parsed.status,
|
||||
path: resultPath,
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function runSetupAutomation(
|
||||
ctx: CommandContext,
|
||||
item: StructuredItem,
|
||||
@@ -1224,6 +1259,30 @@ export function commandTestRun(ctx: CommandContext): number {
|
||||
});
|
||||
|
||||
if (result.error) {
|
||||
const fileExecution = executionFromAutomationResultFile(
|
||||
run.automation.evidence_dir,
|
||||
String(run.case.id),
|
||||
run.run_id,
|
||||
);
|
||||
if (fileExecution) {
|
||||
if (options.json !== true) {
|
||||
console.error(`WARN: automation spawn reported an error, but ${fileExecution.path} completed: ${result.error.message}`);
|
||||
}
|
||||
if (options.json === true) {
|
||||
console.log(JSON.stringify({
|
||||
run,
|
||||
setup_executions: setupExecutions,
|
||||
automation_execution: {
|
||||
...fileExecution,
|
||||
spawn_error: result.error.message,
|
||||
stdout: executionTail(result.stdout),
|
||||
stderr: executionTail(result.stderr),
|
||||
},
|
||||
exit_status: fileExecution.exit_status,
|
||||
}, null, 2));
|
||||
}
|
||||
return fileExecution.exit_status;
|
||||
}
|
||||
if (options.json !== true) console.error(`ERROR: failed to run automation: ${result.error.message}`);
|
||||
if (options.json === true) {
|
||||
console.log(JSON.stringify({
|
||||
@@ -1247,7 +1306,7 @@ export function commandTestRun(ctx: CommandContext): number {
|
||||
run,
|
||||
setup_executions: setupExecutions,
|
||||
automation_execution: {
|
||||
status: status === 0 ? "ok" : "nonzero",
|
||||
status: executionStatusFromExitStatus(status),
|
||||
exit_status: status,
|
||||
stdout: executionTail(result.stdout),
|
||||
stderr: executionTail(result.stderr),
|
||||
@@ -1311,6 +1370,7 @@ function renderMarkdownReport(report: TestReport): string {
|
||||
const environment = report.environment;
|
||||
const logGuard = report.log_guard;
|
||||
const troubleshooting = report.troubleshooting;
|
||||
const automation = report.automation_result;
|
||||
const lines: string[] = [];
|
||||
|
||||
lines.push(`# Test Report: ${reportCase.id}`);
|
||||
@@ -1323,20 +1383,41 @@ function renderMarkdownReport(report: TestReport): string {
|
||||
lines.push(`Type: ${reportCase.type}`);
|
||||
lines.push("");
|
||||
lines.push("## Result");
|
||||
lines.push(`- result: ${evidence.result}`);
|
||||
for (const [key, value] of Object.entries(evidence)) {
|
||||
if (key !== "result") lines.push(`- ${key}: ${value}`);
|
||||
if (automation.status === "loaded" && automation.result) {
|
||||
lines.push(`- result: ${automation.result}`);
|
||||
if (automation.reason) lines.push(`- reason: ${automation.reason}`);
|
||||
if (automation.url) lines.push(`- target_tested: ${automation.url}`);
|
||||
if (automation.path) lines.push(`- automation_result: ${automation.path}`);
|
||||
if (automation.artifacts) lines.push(`- artifacts: ${JSON.stringify(automation.artifacts)}`);
|
||||
} else {
|
||||
lines.push(`- result: ${evidence.result}`);
|
||||
for (const [key, value] of Object.entries(evidence)) {
|
||||
if (key !== "result") lines.push(`- ${key}: ${value}`);
|
||||
}
|
||||
}
|
||||
lines.push("");
|
||||
lines.push("## Automation Result");
|
||||
lines.push(`- status: ${report.automation_result.status}`);
|
||||
if (report.automation_result.path) lines.push(`- path: ${report.automation_result.path}`);
|
||||
if (report.automation_result.result) lines.push(`- result: ${report.automation_result.result}`);
|
||||
if (report.automation_result.reason) lines.push(`- reason: ${report.automation_result.reason}`);
|
||||
if (report.automation_result.started_at_local) lines.push(`- started_at_local: ${report.automation_result.started_at_local}`);
|
||||
if (report.automation_result.finished_at_local) lines.push(`- finished_at_local: ${report.automation_result.finished_at_local}`);
|
||||
if (report.automation_result.url) lines.push(`- url: ${report.automation_result.url}`);
|
||||
if (report.automation_result.expected_text) lines.push(`- expected_text: ${report.automation_result.expected_text}`);
|
||||
lines.push(`- status: ${automation.status}`);
|
||||
if (automation.path) lines.push(`- path: ${automation.path}`);
|
||||
if (automation.result) lines.push(`- result: ${automation.result}`);
|
||||
if (automation.reason) lines.push(`- reason: ${automation.reason}`);
|
||||
if (automation.duration_ms !== undefined) lines.push(`- duration_ms: ${automation.duration_ms}`);
|
||||
if (automation.started_at_local) lines.push(`- started_at_local: ${automation.started_at_local}`);
|
||||
if (automation.finished_at_local) lines.push(`- finished_at_local: ${automation.finished_at_local}`);
|
||||
if (automation.url) lines.push(`- url: ${automation.url}`);
|
||||
if (automation.expected_text) lines.push(`- expected_text: ${automation.expected_text}`);
|
||||
if (automation.metrics_summary) {
|
||||
lines.push("- metrics_summary:");
|
||||
lines.push(` ${JSON.stringify(automation.metrics_summary)}`);
|
||||
}
|
||||
if (automation.thresholds_summary) {
|
||||
lines.push("- thresholds_summary:");
|
||||
lines.push(` ${JSON.stringify(automation.thresholds_summary)}`);
|
||||
}
|
||||
if (automation.artifacts) {
|
||||
lines.push("- artifacts:");
|
||||
lines.push(` ${JSON.stringify(automation.artifacts)}`);
|
||||
}
|
||||
lines.push("");
|
||||
lines.push("## Environment");
|
||||
for (const [key, value] of Object.entries(environment)) lines.push(`- ${key}=${value}`);
|
||||
|
||||
@@ -126,6 +126,9 @@ function validateCaseItem(root: string, item: StructuredItem, skillNames: Set<st
|
||||
...validateEnvKeyScalar(item, "automation_pipeline_url_env"),
|
||||
...validateEnvKeyScalar(item, "automation_pipeline_name_env"),
|
||||
...validateJsonScalar(item, "automation_filesystem_checks_json"),
|
||||
...validateJsonScalar(item, "metrics_thresholds_json"),
|
||||
...validateJsonScalar(item, "load_profile_json"),
|
||||
...validateJsonScalar(item, "fault_model_json"),
|
||||
...listValue(item.fields, "setup_automation").flatMap((entry) => (
|
||||
validateSetupAutomationEntry(root, entry, caseIds).map((error) => `${item.path}: ${error}`)
|
||||
)),
|
||||
|
||||
+27
-2
@@ -9,7 +9,18 @@ export const requiredEnvKeys = [
|
||||
];
|
||||
|
||||
export const caseModeValues = ["agent-browser", "probe"];
|
||||
export const caseTypeValues = ["smoke", "regression", "feature", "provider", "exploratory"];
|
||||
export const caseTypeValues = [
|
||||
"smoke",
|
||||
"regression",
|
||||
"feature",
|
||||
"provider",
|
||||
"exploratory",
|
||||
"contract",
|
||||
"performance",
|
||||
"reliability",
|
||||
"chaos",
|
||||
"security",
|
||||
];
|
||||
export const casePriorityValues = ["p0", "p1", "p2"];
|
||||
export const caseRiskValues = ["low", "medium", "high"];
|
||||
export const caseEvidenceValues = [
|
||||
@@ -21,10 +32,24 @@ export const caseEvidenceValues = [
|
||||
"frontend_log",
|
||||
"api_diagnostic",
|
||||
"filesystem",
|
||||
"metrics",
|
||||
"trace",
|
||||
"profile",
|
||||
"resource_log",
|
||||
];
|
||||
export const testResultStatusValues = ["pass", "fail", "blocked", "env_issue", "flaky"];
|
||||
export const troubleshootingCategoryValues = ["product", "env_issue", "external_dependency", "blocked", "flaky"];
|
||||
export const suiteTypeValues = ["smoke", "regression", "release_gate", "exploratory"];
|
||||
export const suiteTypeValues = [
|
||||
"smoke",
|
||||
"regression",
|
||||
"release_gate",
|
||||
"exploratory",
|
||||
"contract",
|
||||
"performance",
|
||||
"reliability",
|
||||
"chaos",
|
||||
"security",
|
||||
];
|
||||
export const suiteRequiredStrings = ["id", "title", "description", "type", "priority"];
|
||||
export const suiteRequiredLists = ["tags", "cases"];
|
||||
|
||||
|
||||
@@ -91,6 +91,7 @@ export type AutomationResultEvidence = {
|
||||
path?: string;
|
||||
result?: string;
|
||||
reason?: string;
|
||||
duration_ms?: number;
|
||||
started_at?: string;
|
||||
started_at_local?: string;
|
||||
finished_at?: string;
|
||||
@@ -98,6 +99,9 @@ export type AutomationResultEvidence = {
|
||||
url?: string;
|
||||
prompt?: string;
|
||||
expected_text?: string;
|
||||
metrics_summary?: Record<string, unknown>;
|
||||
thresholds_summary?: Record<string, unknown>;
|
||||
artifacts?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
type MutableScanState = {
|
||||
@@ -594,6 +598,18 @@ function stringField(data: Record<string, unknown>, key: string): string | undef
|
||||
return typeof value === "string" && value.trim() ? value : undefined;
|
||||
}
|
||||
|
||||
function numberField(data: Record<string, unknown>, key: string): number | undefined {
|
||||
const value = data[key];
|
||||
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function objectField(data: Record<string, unknown>, key: string): Record<string, unknown> | undefined {
|
||||
const value = data[key];
|
||||
return value && typeof value === "object" && !Array.isArray(value)
|
||||
? value as Record<string, unknown>
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function evidenceDirFromOptions(options: Record<string, string | boolean>): string | undefined {
|
||||
const explicit = typeof options["evidence-dir"] === "string" ? options["evidence-dir"] : undefined;
|
||||
if (explicit) return resolve(explicit);
|
||||
@@ -628,6 +644,7 @@ export function readAutomationResultEvidence(options: Record<string, string | bo
|
||||
path: resultPath,
|
||||
result: stringField(result, "status"),
|
||||
reason: stringField(result, "reason"),
|
||||
duration_ms: numberField(result, "duration_ms"),
|
||||
started_at: stringField(result, "started_at"),
|
||||
started_at_local: stringField(result, "started_at_local"),
|
||||
finished_at: stringField(result, "finished_at"),
|
||||
@@ -635,6 +652,9 @@ export function readAutomationResultEvidence(options: Record<string, string | bo
|
||||
url: stringField(result, "url"),
|
||||
prompt: redactSecrets(stringField(result, "prompt") ?? ""),
|
||||
expected_text: stringField(result, "expected_text"),
|
||||
metrics_summary: objectField(result, "metrics_summary"),
|
||||
thresholds_summary: objectField(result, "thresholds_summary"),
|
||||
artifacts: objectField(result, "artifacts"),
|
||||
};
|
||||
} catch (error) {
|
||||
return { status: "invalid", path: resultPath, reason: String(error) };
|
||||
|
||||
@@ -114,6 +114,8 @@ export function automationEnvDefaults(item: StructuredItem, env: EnvSource = pro
|
||||
["automation_expected_runner_id", "LANGBOT_E2E_EXPECTED_RUNNER_ID"],
|
||||
["automation_reset_debug_chat", "LANGBOT_E2E_RESET_DEBUG_CHAT"],
|
||||
["automation_debug_chat_session_type", "LANGBOT_E2E_DEBUG_CHAT_SESSION_TYPE"],
|
||||
["automation_debug_chat_response_p95_ms", "LANGBOT_E2E_DEBUG_CHAT_RESPONSE_P95_MS"],
|
||||
["automation_debug_chat_max_error_rate", "LANGBOT_E2E_DEBUG_CHAT_MAX_ERROR_RATE"],
|
||||
["automation_filesystem_checks_json", "LANGBOT_E2E_FILESYSTEM_CHECKS_JSON"],
|
||||
["automation_plugin_package", "LANGBOT_E2E_PLUGIN_PACKAGE"],
|
||||
["automation_expected_plugin_id", "LANGBOT_E2E_EXPECTED_PLUGIN_ID"],
|
||||
|
||||
Reference in New Issue
Block a user