Add performance and reliability QA gates

This commit is contained in:
huanghuoguoguo
2026-06-25 00:07:37 +08:00
parent 74a18191dd
commit 67437c2f5a
31 changed files with 2299 additions and 25 deletions
+32 -2
View File
@@ -48,7 +48,18 @@
},
"type": {
"type": "string",
"enum": ["smoke", "regression", "feature", "provider", "exploratory"]
"enum": [
"smoke",
"regression",
"feature",
"provider",
"exploratory",
"contract",
"performance",
"reliability",
"chaos",
"security"
]
},
"priority": {
"type": "string",
@@ -102,7 +113,11 @@
"backend_log",
"frontend_log",
"api_diagnostic",
"filesystem"
"filesystem",
"metrics",
"trace",
"profile",
"resource_log"
]
},
"minItems": 1
@@ -188,9 +203,24 @@
"type": "string",
"enum": ["person", "group"]
},
"automation_debug_chat_response_p95_ms": {
"type": "string"
},
"automation_debug_chat_max_error_rate": {
"type": "string"
},
"automation_filesystem_checks_json": {
"type": "string"
},
"metrics_thresholds_json": {
"type": "string"
},
"load_profile_json": {
"type": "string"
},
"fault_model_json": {
"type": "string"
},
"automation_pipeline_url_env": {
"type": "string",
"pattern": "^[A-Z][A-Z0-9_]*$"
+11 -1
View File
@@ -18,7 +18,17 @@
},
"type": {
"type": "string",
"enum": ["smoke", "regression", "release_gate", "exploratory"]
"enum": [
"smoke",
"regression",
"release_gate",
"exploratory",
"contract",
"performance",
"reliability",
"chaos",
"security"
]
},
"priority": {
"type": "string",
+79 -1
View File
@@ -54,6 +54,7 @@ const debugChatSessionType = env.LANGBOT_E2E_DEBUG_CHAT_SESSION_TYPE || "person"
const pipelineConfigDiagnosticPath = resolve(paths.evidenceDir, "pipeline-config-diagnostic.json");
const debugChatResetDiagnosticPath = resolve(paths.evidenceDir, "debug-chat-reset-diagnostic.json");
const pipelineConfigRestoreDiagnosticPath = resolve(paths.evidenceDir, "pipeline-config-restore-diagnostic.json");
const metricsPath = resolve(paths.evidenceDir, "metrics.json");
const startedAt = new Date();
let browser;
@@ -80,10 +81,11 @@ let result = {
console_log: paths.consoleLog,
network_log: paths.networkLog,
screenshot: paths.screenshot,
metrics_json: metricsPath,
automation_result_json: paths.automationResultJson,
result_json: paths.resultJson,
},
evidence_collected: ["ui", "screenshot", "console", "network"],
evidence_collected: ["ui", "screenshot", "console", "network", "metrics"],
};
function boolFromEnv(value, defaultValue) {
@@ -103,6 +105,29 @@ function parseJsonEnv(key, fallback) {
}
}
function positiveNumberEnv(key, fallback) {
const value = Number(env[key] || "");
return Number.isFinite(value) && value >= 0 ? value : fallback;
}
function percentile(values, percentileValue) {
if (values.length === 0) return 0;
const sorted = [...values].sort((a, b) => a - b);
const index = Math.min(sorted.length - 1, Math.ceil((percentileValue / 100) * sorted.length) - 1);
return Number(sorted[index].toFixed(3));
}
function stats(values) {
if (values.length === 0) return { min: 0, p50: 0, p95: 0, p99: 0, max: 0 };
return {
min: Number(Math.min(...values).toFixed(3)),
p50: percentile(values, 50),
p95: percentile(values, 95),
p99: percentile(values, 99),
max: Number(Math.max(...values).toFixed(3)),
};
}
function promptStepsFromEnv() {
const rawSteps = parseJsonEnv("LANGBOT_E2E_PROMPTS_JSON", null);
if (rawSteps === null) {
@@ -658,6 +683,7 @@ try {
} else {
for (let index = 0; index < promptSteps.length; index += 1) {
const step = promptSteps[index];
const promptStartedAt = Date.now();
const chatResult = await runDebugChatPrompt(page, {
prompt: step.prompt,
expectedText: step.expectedText,
@@ -665,11 +691,13 @@ try {
imagePath: index === 0 ? imagePath : "",
failureSignals: failureSignals.length > 0 ? failureSignals : undefined,
});
const promptDurationMs = Date.now() - promptStartedAt;
result.chat_results.push({
index,
expected_text: step.expectedText,
status: chatResult.status,
reason: chatResult.reason,
response_duration_ms: promptDurationMs,
min_expected_count: chatResult.min_expected_count,
final_count: chatResult.final_count,
before_assistant_expected_count: chatResult.before_assistant_expected_count,
@@ -714,6 +742,56 @@ try {
const finishedAt = new Date();
result.finished_at = finishedAt.toISOString();
result.finished_at_local = localIsoWithOffset(finishedAt);
result.duration_ms = finishedAt.getTime() - startedAt.getTime();
const responseDurations = result.chat_results
.map((item) => item.response_duration_ms)
.filter((value) => Number.isFinite(value));
const passedPrompts = result.chat_results.filter((item) => item.status === "pass").length;
const attemptedPrompts = result.chat_results.length;
const errorRate = attemptedPrompts === 0 ? 1 : Number(((attemptedPrompts - passedPrompts) / attemptedPrompts).toFixed(4));
const responseStats = stats(responseDurations);
const responseP95BudgetMs = positiveNumberEnv(
"LANGBOT_E2E_DEBUG_CHAT_RESPONSE_P95_MS",
positiveNumberEnv("LANGBOT_DEBUG_CHAT_RESPONSE_P95_MS", safeResponseTimeoutMs),
);
const maxErrorRate = positiveNumberEnv("LANGBOT_E2E_DEBUG_CHAT_MAX_ERROR_RATE", 0);
const metrics = {
probe: caseId,
url: result.url,
prompt_count: result.prompt_count,
attempted_prompt_count: attemptedPrompts,
passed_prompt_count: passedPrompts,
error_rate: errorRate,
response_duration_ms: responseStats,
total_duration_ms: result.duration_ms,
chat_results: result.chat_results,
};
result.metrics_summary = {
prompt_count: metrics.prompt_count,
attempted_prompt_count: metrics.attempted_prompt_count,
passed_prompt_count: metrics.passed_prompt_count,
error_rate: metrics.error_rate,
response_p50_ms: metrics.response_duration_ms.p50,
response_p95_ms: metrics.response_duration_ms.p95,
total_duration_ms: metrics.total_duration_ms,
};
result.thresholds_summary = {
response_p95_ms: {
actual: metrics.response_duration_ms.p95,
max: responseP95BudgetMs,
pass: attemptedPrompts > 0 && metrics.response_duration_ms.p95 <= responseP95BudgetMs,
},
error_rate: {
actual: metrics.error_rate,
max: maxErrorRate,
pass: metrics.error_rate <= maxErrorRate,
},
};
await writeFile(metricsPath, `${JSON.stringify(metrics, null, 2)}\n`, "utf8");
if (result.status === "pass" && !Object.values(result.thresholds_summary).every((item) => item.pass)) {
result.status = "fail";
result.reason = "Debug Chat performance breached response latency or error-rate thresholds.";
}
const existingEvidence = {};
for (const [key, value] of Object.entries(result.evidence)) {
if (typeof value !== "string") continue;
+231
View File
@@ -130,6 +130,7 @@
"references/local-agent-runner.md",
"references/mcp-stdio-testing.md",
"references/model-provider-testing.md",
"references/performance-reliability-testing.md",
"references/pipeline-debug-chat.md",
"references/plugin-e2e-smoke.md",
"references/sandbox-skill-authoring.md",
@@ -150,6 +151,11 @@
"agent-runner-release-preflight",
"agent-runner-runtime-chaos",
"dify-agent-debug-chat",
"langbot-fault-taxonomy-contract",
"langbot-live-backend-latency",
"langbot-live-backend-log-health",
"langbot-live-control-plane-api",
"langbot-overhead-accounting-contract",
"langrag-kb-retrieve",
"langrag-parser-golden-e2e",
"langrag-sentinel-kb-discover",
@@ -165,6 +171,7 @@
"mcp-stdio-register",
"mcp-stdio-tool-call",
"pipeline-debug-chat",
"pipeline-debug-chat-performance",
"plugin-e2e-smoke",
"provider-deepseek",
"qa-plugin-smoke-live-install",
@@ -486,6 +493,128 @@
"backend_log"
]
},
{
"id": "langbot-fault-taxonomy-contract",
"title": "LangBot fault taxonomy and cleanup contract",
"mode": "probe",
"area": "reliability",
"type": "chaos",
"priority": "p1",
"risk": "medium",
"ci_eligible": true,
"tags": [
"reliability",
"chaos",
"contract",
"synthetic"
],
"automation": "skills/langbot-testing/probes/langbot-fault-taxonomy-contract.mjs",
"setup_automation": [],
"setup_provides_env": [],
"evidence_required": [
"metrics",
"filesystem"
]
},
{
"id": "langbot-live-backend-latency",
"title": "LangBot live backend basic latency probe",
"mode": "probe",
"area": "performance",
"type": "performance",
"priority": "p1",
"risk": "medium",
"ci_eligible": false,
"tags": [
"performance",
"live-backend",
"latency",
"metrics"
],
"automation": "skills/langbot-testing/probes/langbot-live-backend-latency.mjs",
"setup_automation": [],
"setup_provides_env": [],
"evidence_required": [
"metrics",
"network",
"api_diagnostic",
"filesystem"
]
},
{
"id": "langbot-live-backend-log-health",
"title": "LangBot live backend log health probe",
"mode": "probe",
"area": "reliability",
"type": "reliability",
"priority": "p1",
"risk": "medium",
"ci_eligible": false,
"tags": [
"reliability",
"live-backend",
"backend-log",
"metrics"
],
"automation": "skills/langbot-testing/probes/langbot-live-backend-log-health.mjs",
"setup_automation": [],
"setup_provides_env": [],
"evidence_required": [
"metrics",
"backend_log",
"filesystem"
]
},
{
"id": "langbot-live-control-plane-api",
"title": "LangBot live control-plane API probe",
"mode": "probe",
"area": "performance",
"type": "performance",
"priority": "p1",
"risk": "medium",
"ci_eligible": false,
"tags": [
"performance",
"reliability",
"live-backend",
"control-plane",
"metrics"
],
"automation": "skills/langbot-testing/probes/langbot-live-control-plane-api.mjs",
"setup_automation": [],
"setup_provides_env": [],
"evidence_required": [
"metrics",
"network",
"api_diagnostic",
"filesystem"
]
},
{
"id": "langbot-overhead-accounting-contract",
"title": "LangBot overhead accounting metrics contract",
"mode": "probe",
"area": "performance",
"type": "performance",
"priority": "p1",
"risk": "medium",
"ci_eligible": true,
"tags": [
"performance",
"metrics",
"contract",
"synthetic"
],
"automation": "skills/langbot-testing/probes/langbot-overhead-accounting-contract.mjs",
"setup_automation": [],
"setup_provides_env": [],
"evidence_required": [
"metrics",
"resource_log",
"filesystem"
]
},
{
"id": "langrag-kb-retrieve",
"title": "LangRAG knowledge base ingests and retrieves a sentinel document",
@@ -911,6 +1040,33 @@
"backend_log"
]
},
{
"id": "pipeline-debug-chat-performance",
"title": "Pipeline Debug Chat user-path performance probe",
"mode": "agent-browser",
"area": "pipeline",
"type": "performance",
"priority": "p1",
"risk": "medium",
"ci_eligible": false,
"tags": [
"performance",
"pipeline",
"debug-chat",
"user-path",
"metrics"
],
"automation": "scripts/e2e/pipeline-debug-chat.mjs",
"setup_automation": [],
"setup_provides_env": [],
"evidence_required": [
"ui",
"screenshot",
"console",
"network",
"metrics"
]
},
{
"id": "plugin-e2e-smoke",
"title": "Plugin system installs a local plugin and exposes tool/page APIs",
@@ -1059,6 +1215,10 @@
"suites": [
"agent-runner-release-gate",
"core-smoke",
"langbot-live-backend-gate",
"langbot-performance-contract-gate",
"langbot-performance-reliability-gate",
"langbot-user-path-performance-gate",
"local-agent-gate"
],
"suite_summaries": [
@@ -1121,6 +1281,77 @@
"local-agent-basic-debug-chat"
]
},
{
"id": "langbot-live-backend-gate",
"title": "LangBot live backend reliability gate",
"description": "Live backend control-plane responsiveness and runtime log health checks for a locally running LangBot instance.",
"type": "reliability",
"priority": "p1",
"tags": [
"performance",
"reliability",
"live-backend",
"metrics"
],
"cases": [
"langbot-live-backend-latency",
"langbot-live-control-plane-api",
"langbot-live-backend-log-health"
]
},
{
"id": "langbot-performance-contract-gate",
"title": "LangBot performance contract gate",
"description": "Fast synthetic contract checks for performance metric accounting and non-destructive reliability fault taxonomy.",
"type": "contract",
"priority": "p1",
"tags": [
"performance",
"reliability",
"contract",
"metrics"
],
"cases": [
"langbot-overhead-accounting-contract",
"langbot-fault-taxonomy-contract"
]
},
{
"id": "langbot-performance-reliability-gate",
"title": "LangBot performance and reliability starter gate",
"description": "Starter gate for LangBot performance accounting, live backend control-plane latency, and non-destructive fault taxonomy checks.",
"type": "reliability",
"priority": "p1",
"tags": [
"performance",
"reliability",
"metrics",
"chaos"
],
"cases": [
"langbot-overhead-accounting-contract",
"langbot-fault-taxonomy-contract",
"langbot-live-backend-latency",
"langbot-live-control-plane-api",
"langbot-live-backend-log-health"
]
},
{
"id": "langbot-user-path-performance-gate",
"title": "LangBot user-path performance gate",
"description": "Browser-visible performance checks for user-facing LangBot paths such as Pipeline Debug Chat.",
"type": "performance",
"priority": "p1",
"tags": [
"performance",
"browser",
"debug-chat",
"user-path"
],
"cases": [
"pipeline-debug-chat-performance"
]
},
{
"id": "local-agent-gate",
"title": "Local Agent runner regression gate",
+3
View File
@@ -21,6 +21,7 @@ Use this skill when an agent needs to verify LangBot behavior through the WebUI
- **Sandbox-backed skill authoring**: read `references/sandbox-skill-authoring.md`.
- **LangRAG knowledge bases**: read `references/langrag-knowledge-base.md`.
- **MCP stdio tool testing**: read `references/mcp-stdio-testing.md`.
- **Performance, reliability, or chaos probes**: read `references/performance-reliability-testing.md`.
- **Drive a live instance over MCP (not raw HTTP)**: use the `langbot-mcp-ops` skill — the instance exposes an MCP server at `http://<host>:5300/mcp` (reuses API keys). Useful for setting up bots/pipelines/models as test fixtures programmatically.
- **Known failures and fixes**: read `references/troubleshooting.md`.
- **Reusable test groups**: run `bin/lbs suite list` and `bin/lbs suite plan <suite-id>` before manually assembling a case set.
@@ -36,6 +37,8 @@ Use this skill when an agent needs to verify LangBot behavior through the WebUI
- Use an authenticated browser profile prepared by `langbot-env-setup`.
- Do not expose API keys, OAuth secrets, tokens, or localStorage token values in output.
- A WebUI test is not complete until the visible UI result is checked against backend logs or network behavior.
- A performance result is not complete without `metrics` evidence and a clear split between LangBot overhead and external provider/tool/network time.
- A chaos or reliability result is not complete until the fault scope, cleanup, and recovery checks are recorded.
- For a suite, use `bin/lbs suite start <suite-id>` to create the suite evidence root, per-case directories, and `suite-start.json`/`suite-start.md` handoff files; use `bin/lbs test result <case-id>` to write final per-case `result.json`, then run `bin/lbs suite report <suite-id> --evidence-dir <dir>`.
- Do not mark a case `pass` until `test result --evidence` covers every value in the case's `evidence_required`.
- For runner-specific Debug Chat cases, use the case-specific pipeline env declared by `automation_pipeline_url_env` / `automation_pipeline_name_env`; do not silently reuse a generic `LANGBOT_PIPELINE_URL`.
@@ -0,0 +1,35 @@
id: langbot-fault-taxonomy-contract
title: "LangBot fault taxonomy and cleanup contract"
mode: probe
area: reliability
type: chaos
priority: p1
risk: medium
ci_eligible: true
tags:
- reliability
- chaos
- contract
- synthetic
skills:
- langbot-testing
automation: skills/langbot-testing/probes/langbot-fault-taxonomy-contract.mjs
fault_model_json: '{"kind":"taxonomy-contract","destructive":false,"scenarios":["provider-timeout","plugin-runtime-disconnect","mcp-stdio-server-exit","operator-missing-login","transient-marketplace-timeout"]}'
steps:
- "Run `rtk bin/lbs test run langbot-fault-taxonomy-contract --dry-run` first; remove `--dry-run` after checking the evidence directory."
- "Automation validates that representative fault scenarios declare target, injected fault, expected status, recovery check, and cleanup."
- "Review metrics.json, fault-model.json, and automation-result.json under LBS_EVIDENCE_DIR."
checks:
- "automation-result.json status is pass."
- "Every scenario has an expected status in pass, fail, blocked, env_issue, or flaky."
- "Every scenario declares a cleanup action and recovery check."
evidence_required:
- metrics
- filesystem
diagnostics:
- "This is a non-destructive taxonomy contract probe; it does not inject real runtime faults."
- "Use it as a gate before adding live chaos cases that kill runtimes, route traffic through a proxy, or disrupt a backend dependency."
success_patterns:
- "Fault taxonomy contract declares status"
failure_patterns:
- "missing required scenario fields"
@@ -0,0 +1,42 @@
id: langbot-live-backend-latency
title: "LangBot live backend basic latency probe"
mode: probe
area: performance
type: performance
priority: p1
risk: medium
ci_eligible: false
tags:
- performance
- live-backend
- latency
- metrics
skills:
- langbot-testing
env:
- LANGBOT_BACKEND_URL
automation: skills/langbot-testing/probes/langbot-live-backend-latency.mjs
metrics_thresholds_json: '{"backend_p95_ms":{"max":1000},"error_rate":{"max":0}}'
load_profile_json: '{"requests":12,"concurrency":2,"endpoints":["/healthz"]}'
steps:
- "Confirm the selected LangBot backend is the intended test target."
- "Run `rtk bin/lbs test run langbot-live-backend-latency --dry-run` first; remove `--dry-run` after checking LANGBOT_BACKEND_URL and evidence directory."
- "Automation sends a small request batch to LANGBOT_BACKEND_URL/healthz and records latency, status counts, and network errors."
checks:
- "automation-result.json status is pass when the backend responds and p95/error-rate thresholds pass."
- "automation-result.json status is env_issue when the backend is not reachable."
- "metrics.json and network.log are written under LBS_EVIDENCE_DIR."
evidence_required:
- metrics
- network
- api_diagnostic
- filesystem
diagnostics:
- "This probe measures backend health endpoint reachability latency only; it does not cover model/provider, browser, Debug Chat, RAG, or plugin runtime latency."
success_patterns:
- "Live backend latency probe passed"
failure_patterns:
- "Backend did not respond"
- "breached latency or error-rate thresholds"
troubleshooting:
- socks-proxy-without-socksio
@@ -0,0 +1,45 @@
id: langbot-live-backend-log-health
title: "LangBot live backend log health probe"
mode: probe
area: reliability
type: reliability
priority: p1
risk: medium
ci_eligible: false
tags:
- reliability
- live-backend
- backend-log
- metrics
skills:
- langbot-testing
env:
- LANGBOT_BACKEND_URL
automation: skills/langbot-testing/probes/langbot-live-backend-log-health.mjs
metrics_thresholds_json: '{"fail_count":{"max":0}}'
load_profile_json: '{"lookback_seconds":300,"log_source":"LANGBOT_BACKEND_LOG or latest LANGBOT_REPO/data/logs/langbot-*.log"}'
steps:
- "Confirm the selected LangBot backend log belongs to the intended test target."
- "Run `rtk bin/lbs test run langbot-live-backend-log-health --dry-run` first; remove `--dry-run` after checking evidence directory and log source."
- "Automation scans the recent backend log window for fail-severity runtime findings such as Traceback, ImportError, ERROR, unclosed sessions, and unawaited coroutines."
checks:
- "automation-result.json status is pass only when fail_count is 0."
- "metrics_summary includes scanned_line_count, fail_count, warning_count, and finding_count."
- "findings.json and scanned-backend.log are written under LBS_EVIDENCE_DIR."
evidence_required:
- metrics
- backend_log
- filesystem
diagnostics:
- "Set LANGBOT_BACKEND_LOG to an explicit log path when the latest log file is not the run target."
- "Set LANGBOT_BACKEND_LOG_SINCE or LANGBOT_BACKEND_LOG_LOOKBACK_SECONDS to control the scan window."
- "This probe measures runtime log health; it does not prove user-facing Debug Chat, plugin, model, or RAG behavior."
success_patterns:
- "Live backend log health passed"
failure_patterns:
- "Traceback"
- "ImportError"
- "ERROR"
- "unclosed"
troubleshooting:
- socks-proxy-without-socksio
@@ -0,0 +1,44 @@
id: langbot-live-control-plane-api
title: "LangBot live control-plane API probe"
mode: probe
area: performance
type: performance
priority: p1
risk: medium
ci_eligible: false
tags:
- performance
- reliability
- live-backend
- control-plane
- metrics
skills:
- langbot-testing
env:
- LANGBOT_BACKEND_URL
automation: skills/langbot-testing/probes/langbot-live-control-plane-api.mjs
metrics_thresholds_json: '{"error_rate":{"max":0},"response_shape_failures":{"max":0},"healthz_p95_ms":{"max":500},"system_info_p95_ms":{"max":1000}}'
load_profile_json: '{"requests":20,"concurrency":4,"endpoints":["/healthz","/api/v1/system/info"],"auth_required":false}'
steps:
- "Confirm the selected LangBot backend is the intended test target."
- "Run `rtk bin/lbs test run langbot-live-control-plane-api --dry-run` first; remove `--dry-run` after checking LANGBOT_BACKEND_URL and evidence directory."
- "Automation sends a small request batch to /healthz and /api/v1/system/info, then validates status code, JSON shape, and latency budgets."
checks:
- "automation-result.json status is pass when every control-plane request returns HTTP 200, JSON code 0, and required response fields."
- "metrics_summary includes per-endpoint p50/p95 latency, error rate, status counts, and response_shape_failures."
- "thresholds_summary shows error_rate, response_shape_failures, healthz_p95_ms, and system_info_p95_ms all pass."
evidence_required:
- metrics
- network
- api_diagnostic
- filesystem
diagnostics:
- "This probe measures unauthenticated backend control-plane readiness; it does not cover authenticated UI flows, Debug Chat, model calls, plugins, or RAG."
- "A system_info shape failure usually means the API contract or startup state changed and should be investigated before treating latency as healthy."
success_patterns:
- "Live control-plane API probe passed"
failure_patterns:
- "Backend did not respond"
- "breached shape, latency, or error-rate thresholds"
troubleshooting:
- socks-proxy-without-socksio
@@ -0,0 +1,37 @@
id: langbot-overhead-accounting-contract
title: "LangBot overhead accounting metrics contract"
mode: probe
area: performance
type: performance
priority: p1
risk: medium
ci_eligible: true
tags:
- performance
- metrics
- contract
- synthetic
skills:
- langbot-testing
automation: skills/langbot-testing/probes/langbot-overhead-accounting-contract.mjs
metrics_thresholds_json: '{"sample_count":{"min":50},"langbot_overhead_p95_ms":{"max":25},"accounting_gap_max_ms":{"max":0.001}}'
load_profile_json: '{"kind":"synthetic-overhead-accounting","samples":80,"external_latency_segments":["provider","external_tool","network"]}'
steps:
- "Run `rtk bin/lbs test run langbot-overhead-accounting-contract --dry-run` first; remove `--dry-run` after checking the evidence directory."
- "Automation generates deterministic message-path latency samples and separates LangBot overhead from provider/tool/network latency."
- "Review metrics.json, thresholds.json, resource-log.json, and automation-result.json under LBS_EVIDENCE_DIR."
checks:
- "automation-result.json status is pass."
- "metrics_summary includes sample_count, langbot_overhead_p95_ms, e2e_latency_p95_ms, external_latency_p95_ms, and accounting_gap_max_ms."
- "thresholds_summary shows sample_count, langbot_overhead_p95_ms, and accounting_gap_max_ms all pass."
evidence_required:
- metrics
- resource_log
- filesystem
diagnostics:
- "This is a synthetic contract probe for the QA harness; it is not live product performance."
- "Use it to verify that reports can carry overhead accounting metrics before running live backend or browser performance probes."
success_patterns:
- "Overhead accounting contract passed"
failure_patterns:
- "breached one or more thresholds"
@@ -0,0 +1,75 @@
id: pipeline-debug-chat-performance
title: "Pipeline Debug Chat user-path performance probe"
mode: agent-browser
area: pipeline
type: performance
priority: p1
risk: medium
ci_eligible: false
tags:
- performance
- pipeline
- debug-chat
- user-path
- metrics
skills:
- langbot-env-setup
- langbot-testing
env:
- LANGBOT_FRONTEND_URL
- LANGBOT_BACKEND_URL
env_any:
- LANGBOT_PIPELINE_URL|LANGBOT_PIPELINE_NAME
automation: scripts/e2e/pipeline-debug-chat.mjs
automation_env:
- LANGBOT_FRONTEND_URL
- LANGBOT_BACKEND_URL
- LANGBOT_BROWSER_PROFILE
- LANGBOT_CHROMIUM_EXECUTABLE
- LANGBOT_E2E_PROMPT
- LANGBOT_E2E_EXPECTED_TEXT
- LANGBOT_E2E_RESPONSE_TIMEOUT_MS
automation_env_any:
- LANGBOT_PIPELINE_URL|LANGBOT_PIPELINE_NAME
automation_prompt: "请只回复 OK,用于性能测试。"
automation_expected_text: "OK"
automation_response_timeout_ms: "120000"
automation_reset_debug_chat: "true"
automation_debug_chat_response_p95_ms: "120000"
automation_debug_chat_max_error_rate: "0"
metrics_thresholds_json: '{"response_p95_ms":{"max":120000},"error_rate":{"max":0}}'
load_profile_json: '{"prompts":1,"browser":true,"path":"Pipeline Debug Chat","metric":"send-to-visible-completion"}'
preconditions:
- "LANGBOT_PIPELINE_URL or LANGBOT_PIPELINE_NAME points to the pipeline intended for this Debug Chat performance run."
- "The target pipeline is safe to reset Debug Chat history for this run."
- "The target pipeline has a known-good runner/model; provider latency should be interpreted separately from LangBot overhead."
steps:
- "Open LANGBOT_FRONTEND_URL with the prepared browser profile."
- "Open the target pipeline and select Debug Chat."
- "Reset Debug Chat history through the backend API when configured."
- "Send the deterministic prompt and wait for the expected assistant response."
checks:
- "automation-result.json status is pass when the expected assistant response appears."
- "metrics_summary includes response_p50_ms, response_p95_ms, error_rate, and total_duration_ms."
- "thresholds_summary shows response_p95_ms and error_rate pass."
evidence_required:
- ui
- screenshot
- console
- network
- metrics
diagnostics:
- "This case measures browser-visible send-to-completion latency; it does not split provider latency from LangBot overhead."
- "Use backend logs and provider diagnostics to explain slow runs before calling them LangBot regressions."
success_patterns:
- "Processing request from person_websocket"
- "Streaming completed"
failure_patterns:
- "Action invoke_llm_stream call timed out"
- "Task exception was never retrieved"
- "All models failed during streaming setup"
troubleshooting:
- debug-chat-history-contaminates-automation
- local-agent-model-route-unavailable
- plugin-runtime-timeout
- proxy-env-mismatch
@@ -1 +1,3 @@
dist/
dist/*
!dist/
!dist/qa-plugin-smoke-0.1.0.lbpkg
@@ -0,0 +1,159 @@
#!/usr/bin/env node
import { mkdir, writeFile } from "node:fs/promises";
import { join, resolve } from "node:path";
import { env, exit } from "node:process";
function pad(value, size = 2) {
return String(value).padStart(size, "0");
}
function localIsoWithOffset(date = new Date()) {
const offsetMinutes = -date.getTimezoneOffset();
const sign = offsetMinutes >= 0 ? "+" : "-";
const absolute = Math.abs(offsetMinutes);
return [
`${date.getFullYear()}-${pad(date.getMonth() + 1)}-${pad(date.getDate())}`,
`T${pad(date.getHours())}:${pad(date.getMinutes())}:${pad(date.getSeconds())}.${pad(date.getMilliseconds(), 3)}`,
`${sign}${pad(Math.floor(absolute / 60))}:${pad(absolute % 60)}`,
].join("");
}
function timestampSlug(date = new Date()) {
return date.toISOString().replace(/\.\d{3}Z$/, "Z").replace(/[^0-9A-Za-z]+/g, "-").replace(/^-|-$/g, "");
}
const scenarios = [
{
id: "provider-timeout",
target: "provider",
injected_fault: "fake provider request exceeds the configured timeout",
expected_status: "env_issue",
recovery_check: "provider route is reachable or the case remains outside product pass/fail",
cleanup: "stop fake provider or reset proxy route",
},
{
id: "plugin-runtime-disconnect",
target: "plugin-runtime",
injected_fault: "runtime control channel disconnects during an action",
expected_status: "fail",
recovery_check: "runtime reconnects and a deterministic plugin action succeeds",
cleanup: "restart the local plugin runtime process",
},
{
id: "mcp-stdio-server-exit",
target: "mcp",
injected_fault: "stdio server exits mid-call",
expected_status: "fail",
recovery_check: "server can be registered again and exposes the expected tool",
cleanup: "remove temporary MCP server registration",
},
{
id: "operator-missing-login",
target: "webui",
injected_fault: "browser profile is not authenticated",
expected_status: "blocked",
recovery_check: "authenticated profile can open the same WebUI origin",
cleanup: "no product cleanup; refresh local login state",
},
{
id: "transient-marketplace-timeout",
target: "marketplace",
injected_fault: "marketplace request times out once and then succeeds",
expected_status: "flaky",
recovery_check: "rerun passes with the same product revision and no code change",
cleanup: "clear retry-only evidence and keep the run classified as flaky",
},
];
function validateScenario(scenario) {
const missing = ["id", "target", "injected_fault", "expected_status", "recovery_check", "cleanup"]
.filter((key) => !scenario[key]);
const allowedStatuses = new Set(["pass", "fail", "blocked", "env_issue", "flaky"]);
return {
id: scenario.id,
pass: missing.length === 0 && allowedStatuses.has(scenario.expected_status),
missing,
expected_status: scenario.expected_status,
};
}
async function main() {
const root = resolve(env.LBS_ROOT || process.cwd());
const caseId = "langbot-fault-taxonomy-contract";
const runId = env.LBS_RUN_ID || `${timestampSlug()}-${caseId}`;
const evidenceDir = resolve(env.LBS_EVIDENCE_DIR || join(root, "reports", "evidence", runId));
await mkdir(evidenceDir, { recursive: true });
const startedAt = new Date();
const validations = scenarios.map(validateScenario);
const statusCounts = {};
for (const scenario of scenarios) {
statusCounts[scenario.expected_status] = (statusCounts[scenario.expected_status] || 0) + 1;
}
const metrics = {
probe: caseId,
scenario_count: scenarios.length,
status_counts: statusCounts,
scenarios,
validations,
};
const thresholds = {
scenario_count: { actual: scenarios.length, min: 5, pass: scenarios.length >= 5 },
invalid_scenario_count: {
actual: validations.filter((item) => !item.pass).length,
max: 0,
pass: validations.every((item) => item.pass),
},
cleanup_declared_count: {
actual: scenarios.filter((item) => item.cleanup).length,
min: scenarios.length,
pass: scenarios.every((item) => item.cleanup),
},
};
const status = Object.values(thresholds).every((item) => item.pass) ? "pass" : "fail";
const metricsPath = join(evidenceDir, "metrics.json");
const faultModelPath = join(evidenceDir, "fault-model.json");
const automationResultPath = join(evidenceDir, "automation-result.json");
const resultPath = join(evidenceDir, "result.json");
await writeFile(metricsPath, `${JSON.stringify(metrics, null, 2)}\n`, "utf8");
await writeFile(faultModelPath, `${JSON.stringify({ scenarios }, null, 2)}\n`, "utf8");
const finishedAt = new Date();
const result = {
source: "automation",
case_id: caseId,
run_id: runId,
status,
reason: status === "pass"
? "Fault taxonomy contract declares status, recovery, and cleanup for every scenario."
: "Fault taxonomy contract is missing required scenario fields.",
started_at: startedAt.toISOString(),
started_at_local: localIsoWithOffset(startedAt),
finished_at: finishedAt.toISOString(),
finished_at_local: localIsoWithOffset(finishedAt),
duration_ms: finishedAt.getTime() - startedAt.getTime(),
metrics_summary: {
scenario_count: metrics.scenario_count,
status_counts: metrics.status_counts,
invalid_scenario_count: thresholds.invalid_scenario_count.actual,
},
thresholds_summary: thresholds,
artifacts: {
metrics_json: metricsPath,
fault_model_json: faultModelPath,
automation_result_json: automationResultPath,
result_json: resultPath,
},
evidence_collected: ["metrics", "filesystem"],
};
const resultText = `${JSON.stringify(result, null, 2)}\n`;
await writeFile(automationResultPath, resultText, "utf8");
await writeFile(resultPath, resultText, "utf8");
console.log(JSON.stringify(result, null, 2));
exit(status === "pass" ? 0 : 1);
}
await main();
@@ -0,0 +1,212 @@
#!/usr/bin/env node
import { mkdir, writeFile } from "node:fs/promises";
import { join, resolve } from "node:path";
import { env, exit } from "node:process";
function pad(value, size = 2) {
return String(value).padStart(size, "0");
}
function localIsoWithOffset(date = new Date()) {
const offsetMinutes = -date.getTimezoneOffset();
const sign = offsetMinutes >= 0 ? "+" : "-";
const absolute = Math.abs(offsetMinutes);
return [
`${date.getFullYear()}-${pad(date.getMonth() + 1)}-${pad(date.getDate())}`,
`T${pad(date.getHours())}:${pad(date.getMinutes())}:${pad(date.getSeconds())}.${pad(date.getMilliseconds(), 3)}`,
`${sign}${pad(Math.floor(absolute / 60))}:${pad(absolute % 60)}`,
].join("");
}
function timestampSlug(date = new Date()) {
return date.toISOString().replace(/\.\d{3}Z$/, "Z").replace(/[^0-9A-Za-z]+/g, "-").replace(/^-|-$/g, "");
}
function percentile(values, percentileValue) {
if (values.length === 0) return 0;
const sorted = [...values].sort((a, b) => a - b);
const index = Math.min(sorted.length - 1, Math.ceil((percentileValue / 100) * sorted.length) - 1);
return Number(sorted[index].toFixed(3));
}
function stats(values) {
if (values.length === 0) return { min: 0, p50: 0, p95: 0, p99: 0, max: 0 };
return {
min: Number(Math.min(...values).toFixed(3)),
p50: percentile(values, 50),
p95: percentile(values, 95),
p99: percentile(values, 99),
max: Number(Math.max(...values).toFixed(3)),
};
}
function parseJsonList(value, fallback) {
if (!value) return fallback;
try {
const parsed = JSON.parse(value);
return Array.isArray(parsed) && parsed.every((item) => typeof item === "string") ? parsed : fallback;
} catch {
return fallback;
}
}
function joinUrl(baseUrl, path) {
const base = baseUrl.replace(/\/+$/, "");
const suffix = path.startsWith("/") ? path : `/${path}`;
return `${base}${suffix}`;
}
async function fetchOnce(url, timeoutMs) {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs);
const started = performance.now();
try {
const response = await fetch(url, { method: "GET", signal: controller.signal });
await response.arrayBuffer();
const latencyMs = performance.now() - started;
return {
url,
ok: response.status < 500,
status: response.status,
latency_ms: Number(latencyMs.toFixed(3)),
error: "",
};
} catch (error) {
const latencyMs = performance.now() - started;
return {
url,
ok: false,
status: 0,
latency_ms: Number(latencyMs.toFixed(3)),
error: error instanceof Error ? error.message : String(error),
};
} finally {
clearTimeout(timeout);
}
}
async function runBatches(urls, totalRequests, concurrency, timeoutMs) {
const queue = Array.from({ length: totalRequests }, (_, index) => urls[index % urls.length]);
const results = [];
while (queue.length > 0) {
const batch = queue.splice(0, concurrency);
results.push(...await Promise.all(batch.map((url) => fetchOnce(url, timeoutMs))));
}
return results;
}
async function main() {
const root = resolve(env.LBS_ROOT || process.cwd());
const caseId = "langbot-live-backend-latency";
const runId = env.LBS_RUN_ID || `${timestampSlug()}-${caseId}`;
const evidenceDir = resolve(env.LBS_EVIDENCE_DIR || join(root, "reports", "evidence", runId));
await mkdir(evidenceDir, { recursive: true });
const startedAt = new Date();
const backendUrl = env.LANGBOT_BACKEND_URL || "";
const endpoints = parseJsonList(env.LANGBOT_PERF_ENDPOINTS_JSON, ["/healthz"]);
const totalRequests = Number(env.LANGBOT_PERF_REQUESTS || "12");
const concurrency = Number(env.LANGBOT_PERF_CONCURRENCY || "2");
const timeoutMs = Number(env.LANGBOT_PERF_TIMEOUT_MS || "5000");
const p95BudgetMs = Number(env.LANGBOT_PERF_BACKEND_P95_MS || "1000");
const maxErrorRate = Number(env.LANGBOT_PERF_MAX_ERROR_RATE || "0");
const metricsPath = join(evidenceDir, "metrics.json");
const networkLogPath = join(evidenceDir, "network.log");
const automationResultPath = join(evidenceDir, "automation-result.json");
const resultPath = join(evidenceDir, "result.json");
let status = "fail";
let reason = "";
let results = [];
if (!backendUrl) {
status = "env_issue";
reason = "LANGBOT_BACKEND_URL is not configured.";
} else {
const urls = endpoints.map((path) => joinUrl(backendUrl, path));
results = await runBatches(urls, totalRequests, concurrency, timeoutMs);
const okCount = results.filter((item) => item.ok).length;
const errorCount = results.length - okCount;
const errorRate = results.length === 0 ? 1 : errorCount / results.length;
const latencies = results.filter((item) => item.ok).map((item) => item.latency_ms);
const latencyStats = stats(latencies);
const allConnectionFailures = results.length > 0 && results.every((item) => item.status === 0);
if (allConnectionFailures) {
status = "env_issue";
reason = `Backend did not respond at ${backendUrl}.`;
} else if (latencyStats.p95 <= p95BudgetMs && errorRate <= maxErrorRate) {
status = "pass";
reason = "Live backend latency probe passed all thresholds.";
} else {
status = "fail";
reason = "Live backend latency probe breached latency or error-rate thresholds.";
}
}
const statusCounts = {};
for (const item of results) {
const key = item.status === 0 ? "network_error" : String(item.status);
statusCounts[key] = (statusCounts[key] || 0) + 1;
}
const okResults = results.filter((item) => item.ok);
const metrics = {
probe: caseId,
backend_url: backendUrl,
endpoints,
total_requests: totalRequests,
concurrency,
timeout_ms: timeoutMs,
ok_count: okResults.length,
error_count: results.length - okResults.length,
error_rate: results.length === 0 ? 1 : Number(((results.length - okResults.length) / results.length).toFixed(4)),
latency_ms: stats(okResults.map((item) => item.latency_ms)),
status_counts: statusCounts,
};
const thresholds = {
backend_p95_ms: { actual: metrics.latency_ms.p95, max: p95BudgetMs, pass: metrics.latency_ms.p95 <= p95BudgetMs },
error_rate: { actual: metrics.error_rate, max: maxErrorRate, pass: metrics.error_rate <= maxErrorRate },
};
await writeFile(metricsPath, `${JSON.stringify({ ...metrics, samples: results }, null, 2)}\n`, "utf8");
await writeFile(networkLogPath, results.map((item) => JSON.stringify(item)).join("\n") + (results.length > 0 ? "\n" : ""), "utf8");
const finishedAt = new Date();
const result = {
source: "automation",
case_id: caseId,
run_id: runId,
status,
reason,
started_at: startedAt.toISOString(),
started_at_local: localIsoWithOffset(startedAt),
finished_at: finishedAt.toISOString(),
finished_at_local: localIsoWithOffset(finishedAt),
duration_ms: finishedAt.getTime() - startedAt.getTime(),
url: backendUrl,
metrics_summary: {
requests: metrics.total_requests,
concurrency: metrics.concurrency,
ok_count: metrics.ok_count,
error_rate: metrics.error_rate,
latency_p50_ms: metrics.latency_ms.p50,
latency_p95_ms: metrics.latency_ms.p95,
status_counts: metrics.status_counts,
},
thresholds_summary: thresholds,
artifacts: {
metrics_json: metricsPath,
network_log: networkLogPath,
automation_result_json: automationResultPath,
result_json: resultPath,
},
evidence_collected: ["metrics", "network", "api_diagnostic", "filesystem"],
};
const resultText = `${JSON.stringify(result, null, 2)}\n`;
await writeFile(automationResultPath, resultText, "utf8");
await writeFile(resultPath, resultText, "utf8");
console.log(JSON.stringify(result, null, 2));
exit(status === "pass" ? 0 : status === "env_issue" ? 2 : 1);
}
await main();
@@ -0,0 +1,205 @@
#!/usr/bin/env node
import { existsSync, readdirSync, statSync } from "node:fs";
import { mkdir, readFile, writeFile } from "node:fs/promises";
import { join, resolve } from "node:path";
import { env, exit } from "node:process";
function pad(value, size = 2) {
return String(value).padStart(size, "0");
}
function localIsoWithOffset(date = new Date()) {
const offsetMinutes = -date.getTimezoneOffset();
const sign = offsetMinutes >= 0 ? "+" : "-";
const absolute = Math.abs(offsetMinutes);
return [
`${date.getFullYear()}-${pad(date.getMonth() + 1)}-${pad(date.getDate())}`,
`T${pad(date.getHours())}:${pad(date.getMinutes())}:${pad(date.getSeconds())}.${pad(date.getMilliseconds(), 3)}`,
`${sign}${pad(Math.floor(absolute / 60))}:${pad(absolute % 60)}`,
].join("");
}
function timestampSlug(date = new Date()) {
return date.toISOString().replace(/\.\d{3}Z$/, "Z").replace(/[^0-9A-Za-z]+/g, "-").replace(/^-|-$/g, "");
}
function repoRootFromEnv(root) {
return env.LANGBOT_REPO ? resolve(env.LANGBOT_REPO) : resolve(root, "..");
}
function latestBackendLog(root) {
const explicit = env.LANGBOT_BACKEND_LOG;
if (explicit) return resolve(explicit);
const logsDir = join(repoRootFromEnv(root), "data", "logs");
if (!existsSync(logsDir)) return "";
const candidates = readdirSync(logsDir)
.filter((name) => /^langbot-.*\.log$/.test(name))
.map((name) => join(logsDir, name))
.filter((path) => {
try {
return statSync(path).isFile();
} catch {
return false;
}
})
.sort((left, right) => statSync(right).mtimeMs - statSync(left).mtimeMs);
return candidates[0] || "";
}
function parseSince(startedAt) {
if (env.LANGBOT_BACKEND_LOG_SINCE) return new Date(env.LANGBOT_BACKEND_LOG_SINCE);
const lookbackSeconds = Number(env.LANGBOT_BACKEND_LOG_LOOKBACK_SECONDS || "300");
return new Date(startedAt.getTime() - lookbackSeconds * 1000);
}
function parseTimestamp(line, year) {
const localMatch = line.match(/^\[(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})\.(\d{3})\]/);
if (localMatch) {
const [, month, day, hour, minute, second, millisecond] = localMatch;
return new Date(`${year}-${month}-${day}T${hour}:${minute}:${second}.${millisecond}+08:00`);
}
const accessMatch = line.match(/^\[(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) ([+-]\d{4})\]/);
if (accessMatch) {
const [, fullYear, month, day, hour, minute, second, offset] = accessMatch;
const normalizedOffset = `${offset.slice(0, 3)}:${offset.slice(3)}`;
return new Date(`${fullYear}-${month}-${day}T${hour}:${minute}:${second}${normalizedOffset}`);
}
return null;
}
function findingForLine(line, number) {
const rules = [
{ severity: "fail", kind: "python_traceback", pattern: /\bTraceback(?: \(most recent call last\))?/i },
{ severity: "fail", kind: "unretrieved_task_exception", pattern: /Task exception was never retrieved/i },
{ severity: "fail", kind: "unawaited_coroutine", pattern: /RuntimeWarning:\s+coroutine .* was never awaited/i },
{ severity: "fail", kind: "unclosed_client_session", pattern: /Unclosed client session/i },
{ severity: "fail", kind: "unclosed_connector", pattern: /Unclosed connector/i },
{ severity: "fail", kind: "import_error", pattern: /\bImportError\b/i },
{ severity: "fail", kind: "error_log", pattern: /\b(?:ERROR|CRITICAL)\b/ },
{ severity: "warning", kind: "warning_log", pattern: /\bWARNING\b/ },
];
for (const rule of rules) {
if (rule.pattern.test(line)) {
return {
severity: rule.severity,
kind: rule.kind,
line: number,
excerpt: line,
};
}
}
return null;
}
function scanLines(text, since, year) {
const findings = [];
const scanned = [];
let includeContinuation = false;
const lines = text.split(/\r?\n/);
for (const [index, line] of lines.entries()) {
const number = index + 1;
const timestamp = parseTimestamp(line, year);
if (timestamp) includeContinuation = timestamp >= since;
if (!includeContinuation) continue;
scanned.push({ number, text: line });
const finding = findingForLine(line, number);
if (finding) findings.push(finding);
}
return { findings, scanned, total_lines: lines.length };
}
async function main() {
const root = resolve(env.LBS_ROOT || process.cwd());
const caseId = "langbot-live-backend-log-health";
const runId = env.LBS_RUN_ID || `${timestampSlug()}-${caseId}`;
const evidenceDir = resolve(env.LBS_EVIDENCE_DIR || join(root, "reports", "evidence", runId));
await mkdir(evidenceDir, { recursive: true });
const startedAt = new Date();
const since = parseSince(startedAt);
const logPath = latestBackendLog(root);
const metricsPath = join(evidenceDir, "metrics.json");
const findingsPath = join(evidenceDir, "findings.json");
const scannedLogPath = join(evidenceDir, "scanned-backend.log");
const automationResultPath = join(evidenceDir, "automation-result.json");
const resultPath = join(evidenceDir, "result.json");
let status = "fail";
let reason = "";
let scan = { findings: [], scanned: [], total_lines: 0 };
if (!logPath || !existsSync(logPath)) {
status = "env_issue";
reason = "No LangBot backend log file was found. Set LANGBOT_BACKEND_LOG or LANGBOT_REPO.";
} else {
const text = await readFile(logPath, "utf8");
scan = scanLines(text, since, startedAt.getFullYear());
const failCount = scan.findings.filter((item) => item.severity === "fail").length;
status = failCount === 0 ? "pass" : "fail";
reason = status === "pass"
? "Live backend log health passed; no fail-severity findings in the scanned window."
: "Live backend log health found fail-severity backend log findings.";
}
const warningCount = scan.findings.filter((item) => item.severity === "warning").length;
const failCount = scan.findings.filter((item) => item.severity === "fail").length;
const metrics = {
probe: caseId,
backend_log: logPath,
since: since.toISOString(),
scanned_line_count: scan.scanned.length,
total_line_count: scan.total_lines,
fail_count: failCount,
warning_count: warningCount,
finding_count: scan.findings.length,
};
const thresholds = {
fail_count: { actual: failCount, max: 0, pass: failCount === 0 },
};
await writeFile(metricsPath, `${JSON.stringify(metrics, null, 2)}\n`, "utf8");
await writeFile(findingsPath, `${JSON.stringify(scan.findings, null, 2)}\n`, "utf8");
await writeFile(scannedLogPath, scan.scanned.map((item) => `${item.number}: ${item.text}`).join("\n") + (scan.scanned.length > 0 ? "\n" : ""), "utf8");
const finishedAt = new Date();
const result = {
source: "automation",
case_id: caseId,
run_id: runId,
status,
reason,
started_at: startedAt.toISOString(),
started_at_local: localIsoWithOffset(startedAt),
finished_at: finishedAt.toISOString(),
finished_at_local: localIsoWithOffset(finishedAt),
duration_ms: finishedAt.getTime() - startedAt.getTime(),
url: logPath,
metrics_summary: {
scanned_line_count: metrics.scanned_line_count,
fail_count: metrics.fail_count,
warning_count: metrics.warning_count,
finding_count: metrics.finding_count,
},
thresholds_summary: thresholds,
artifacts: {
metrics_json: metricsPath,
findings_json: findingsPath,
scanned_backend_log: scannedLogPath,
automation_result_json: automationResultPath,
result_json: resultPath,
},
evidence_collected: ["metrics", "backend_log", "filesystem"],
};
const resultText = `${JSON.stringify(result, null, 2)}\n`;
await writeFile(automationResultPath, resultText, "utf8");
await writeFile(resultPath, resultText, "utf8");
console.log(JSON.stringify(result, null, 2));
exit(status === "pass" ? 0 : status === "env_issue" ? 2 : 1);
}
await main();
@@ -0,0 +1,311 @@
#!/usr/bin/env node
import { mkdir, writeFile } from "node:fs/promises";
import { join, resolve } from "node:path";
import { env, exit } from "node:process";
function pad(value, size = 2) {
return String(value).padStart(size, "0");
}
function localIsoWithOffset(date = new Date()) {
const offsetMinutes = -date.getTimezoneOffset();
const sign = offsetMinutes >= 0 ? "+" : "-";
const absolute = Math.abs(offsetMinutes);
return [
`${date.getFullYear()}-${pad(date.getMonth() + 1)}-${pad(date.getDate())}`,
`T${pad(date.getHours())}:${pad(date.getMinutes())}:${pad(date.getSeconds())}.${pad(date.getMilliseconds(), 3)}`,
`${sign}${pad(Math.floor(absolute / 60))}:${pad(absolute % 60)}`,
].join("");
}
function timestampSlug(date = new Date()) {
return date.toISOString().replace(/\.\d{3}Z$/, "Z").replace(/[^0-9A-Za-z]+/g, "-").replace(/^-|-$/g, "");
}
function percentile(values, percentileValue) {
if (values.length === 0) return 0;
const sorted = [...values].sort((a, b) => a - b);
const index = Math.min(sorted.length - 1, Math.ceil((percentileValue / 100) * sorted.length) - 1);
return Number(sorted[index].toFixed(3));
}
function stats(values) {
if (values.length === 0) return { min: 0, p50: 0, p95: 0, p99: 0, max: 0 };
return {
min: Number(Math.min(...values).toFixed(3)),
p50: percentile(values, 50),
p95: percentile(values, 95),
p99: percentile(values, 99),
max: Number(Math.max(...values).toFixed(3)),
};
}
function joinUrl(baseUrl, path) {
const base = baseUrl.replace(/\/+$/, "");
const suffix = path.startsWith("/") ? path : `/${path}`;
return `${base}${suffix}`;
}
function parseJsonObject(value, fallback) {
if (!value) return fallback;
try {
const parsed = JSON.parse(value);
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : fallback;
} catch {
return fallback;
}
}
function controlPlaneEndpoints() {
return [
{
id: "healthz",
path: "/healthz",
expected_status: 200,
expected_code: 0,
p95_budget_ms: Number(env.LANGBOT_PERF_HEALTHZ_P95_MS || "500"),
required_data_fields: [],
},
{
id: "system_info",
path: "/api/v1/system/info",
expected_status: 200,
expected_code: 0,
p95_budget_ms: Number(env.LANGBOT_PERF_SYSTEM_INFO_P95_MS || "1000"),
required_data_fields: ["version", "edition", "enable_marketplace"],
},
];
}
async function fetchEndpoint(backendUrl, endpoint, timeoutMs) {
const url = joinUrl(backendUrl, endpoint.path);
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs);
const started = performance.now();
let bodyText = "";
let json = null;
let jsonValid = false;
let error = "";
try {
const response = await fetch(url, {
method: "GET",
headers: { "accept": "application/json" },
signal: controller.signal,
});
bodyText = await response.text();
try {
json = bodyText ? JSON.parse(bodyText) : null;
jsonValid = json !== null;
} catch (parseError) {
error = parseError instanceof Error ? parseError.message : String(parseError);
}
const data = json && typeof json === "object" && json.data && typeof json.data === "object" ? json.data : {};
const missingFields = endpoint.required_data_fields.filter((field) => !(field in data));
const statusOk = response.status === endpoint.expected_status;
const codeOk = !json || typeof json !== "object" ? false : json.code === endpoint.expected_code;
const shapeOk = jsonValid && missingFields.length === 0;
const latencyMs = performance.now() - started;
return {
endpoint_id: endpoint.id,
path: endpoint.path,
url,
status: response.status,
ok: statusOk && codeOk && shapeOk,
status_ok: statusOk,
code_ok: codeOk,
json_valid: jsonValid,
missing_fields: missingFields,
response_code: json && typeof json === "object" ? json.code : null,
latency_ms: Number(latencyMs.toFixed(3)),
error,
};
} catch (fetchError) {
const latencyMs = performance.now() - started;
return {
endpoint_id: endpoint.id,
path: endpoint.path,
url,
status: 0,
ok: false,
status_ok: false,
code_ok: false,
json_valid: false,
missing_fields: endpoint.required_data_fields,
response_code: null,
latency_ms: Number(latencyMs.toFixed(3)),
error: fetchError instanceof Error ? fetchError.message : String(fetchError),
};
} finally {
clearTimeout(timeout);
}
}
async function runBatches(backendUrl, endpoints, totalRequests, concurrency, timeoutMs) {
const queue = Array.from({ length: totalRequests }, (_, index) => endpoints[index % endpoints.length]);
const results = [];
while (queue.length > 0) {
const batch = queue.splice(0, concurrency);
results.push(...await Promise.all(batch.map((endpoint) => fetchEndpoint(backendUrl, endpoint, timeoutMs))));
}
return results;
}
function endpointMetrics(endpoints, results) {
return Object.fromEntries(endpoints.map((endpoint) => {
const samples = results.filter((item) => item.endpoint_id === endpoint.id);
const okSamples = samples.filter((item) => item.ok);
return [
endpoint.id,
{
path: endpoint.path,
requests: samples.length,
ok_count: okSamples.length,
error_rate: samples.length === 0 ? 1 : Number(((samples.length - okSamples.length) / samples.length).toFixed(4)),
latency_ms: stats(okSamples.map((item) => item.latency_ms)),
p95_budget_ms: endpoint.p95_budget_ms,
},
];
}));
}
async function main() {
const root = resolve(env.LBS_ROOT || process.cwd());
const caseId = "langbot-live-control-plane-api";
const runId = env.LBS_RUN_ID || `${timestampSlug()}-${caseId}`;
const evidenceDir = resolve(env.LBS_EVIDENCE_DIR || join(root, "reports", "evidence", runId));
await mkdir(evidenceDir, { recursive: true });
const startedAt = new Date();
const backendUrl = env.LANGBOT_BACKEND_URL || "";
const endpoints = controlPlaneEndpoints();
const configuredBudgets = parseJsonObject(env.LANGBOT_CONTROL_PLANE_P95_BUDGETS_JSON, {});
for (const endpoint of endpoints) {
const budget = configuredBudgets[endpoint.id];
if (typeof budget === "number" && Number.isFinite(budget)) endpoint.p95_budget_ms = budget;
}
const totalRequests = Number(env.LANGBOT_CONTROL_PLANE_REQUESTS || "20");
const concurrency = Number(env.LANGBOT_CONTROL_PLANE_CONCURRENCY || "4");
const timeoutMs = Number(env.LANGBOT_CONTROL_PLANE_TIMEOUT_MS || "5000");
const maxErrorRate = Number(env.LANGBOT_CONTROL_PLANE_MAX_ERROR_RATE || "0");
const metricsPath = join(evidenceDir, "metrics.json");
const endpointsPath = join(evidenceDir, "endpoints.json");
const networkLogPath = join(evidenceDir, "network.log");
const automationResultPath = join(evidenceDir, "automation-result.json");
const resultPath = join(evidenceDir, "result.json");
let status = "fail";
let reason = "";
let results = [];
if (!backendUrl) {
status = "env_issue";
reason = "LANGBOT_BACKEND_URL is not configured.";
} else {
results = await runBatches(backendUrl, endpoints, totalRequests, concurrency, timeoutMs);
const allConnectionFailures = results.length > 0 && results.every((item) => item.status === 0);
if (allConnectionFailures) {
status = "env_issue";
reason = `Backend did not respond at ${backendUrl}.`;
}
}
const okResults = results.filter((item) => item.ok);
const statusCounts = {};
for (const item of results) {
const key = item.status === 0 ? "network_error" : String(item.status);
statusCounts[key] = (statusCounts[key] || 0) + 1;
}
const perEndpoint = endpointMetrics(endpoints, results);
const responseShapeFailures = results.filter((item) => !item.json_valid || item.missing_fields.length > 0 || !item.code_ok).length;
const errorRate = results.length === 0 ? 1 : Number(((results.length - okResults.length) / results.length).toFixed(4));
const thresholds = {
error_rate: { actual: errorRate, max: maxErrorRate, pass: errorRate <= maxErrorRate },
response_shape_failures: { actual: responseShapeFailures, max: 0, pass: responseShapeFailures === 0 },
};
for (const endpoint of endpoints) {
const actual = perEndpoint[endpoint.id].latency_ms.p95;
thresholds[`${endpoint.id}_p95_ms`] = {
actual,
max: endpoint.p95_budget_ms,
pass: actual <= endpoint.p95_budget_ms,
};
}
if (status !== "env_issue") {
const passed = Object.values(thresholds).every((item) => item.pass);
status = passed ? "pass" : "fail";
reason = passed
? "Live control-plane API probe passed all thresholds."
: "Live control-plane API probe breached shape, latency, or error-rate thresholds.";
}
const metrics = {
probe: caseId,
backend_url: backendUrl,
total_requests: totalRequests,
concurrency,
timeout_ms: timeoutMs,
ok_count: okResults.length,
error_count: results.length - okResults.length,
error_rate: errorRate,
status_counts: statusCounts,
response_shape_failures: responseShapeFailures,
endpoints: perEndpoint,
};
await writeFile(metricsPath, `${JSON.stringify({ ...metrics, samples: results }, null, 2)}\n`, "utf8");
await writeFile(endpointsPath, `${JSON.stringify(endpoints, null, 2)}\n`, "utf8");
await writeFile(networkLogPath, results.map((item) => JSON.stringify(item)).join("\n") + (results.length > 0 ? "\n" : ""), "utf8");
const finishedAt = new Date();
const result = {
source: "automation",
case_id: caseId,
run_id: runId,
status,
reason,
started_at: startedAt.toISOString(),
started_at_local: localIsoWithOffset(startedAt),
finished_at: finishedAt.toISOString(),
finished_at_local: localIsoWithOffset(finishedAt),
duration_ms: finishedAt.getTime() - startedAt.getTime(),
url: backendUrl,
metrics_summary: {
requests: metrics.total_requests,
concurrency: metrics.concurrency,
ok_count: metrics.ok_count,
error_rate: metrics.error_rate,
response_shape_failures: metrics.response_shape_failures,
endpoints: Object.fromEntries(Object.entries(metrics.endpoints).map(([id, value]) => [
id,
{
path: value.path,
ok_count: value.ok_count,
error_rate: value.error_rate,
latency_p50_ms: value.latency_ms.p50,
latency_p95_ms: value.latency_ms.p95,
},
])),
status_counts: metrics.status_counts,
},
thresholds_summary: thresholds,
artifacts: {
metrics_json: metricsPath,
endpoints_json: endpointsPath,
network_log: networkLogPath,
automation_result_json: automationResultPath,
result_json: resultPath,
},
evidence_collected: ["metrics", "network", "api_diagnostic", "filesystem"],
};
const resultText = `${JSON.stringify(result, null, 2)}\n`;
await writeFile(automationResultPath, resultText, "utf8");
await writeFile(resultPath, resultText, "utf8");
console.log(JSON.stringify(result, null, 2));
exit(status === "pass" ? 0 : status === "env_issue" ? 2 : 1);
}
await main();
@@ -0,0 +1,162 @@
#!/usr/bin/env node
import { mkdir, writeFile } from "node:fs/promises";
import { join, resolve } from "node:path";
import { env, exit } from "node:process";
function pad(value, size = 2) {
return String(value).padStart(size, "0");
}
function localIsoWithOffset(date = new Date()) {
const offsetMinutes = -date.getTimezoneOffset();
const sign = offsetMinutes >= 0 ? "+" : "-";
const absolute = Math.abs(offsetMinutes);
return [
`${date.getFullYear()}-${pad(date.getMonth() + 1)}-${pad(date.getDate())}`,
`T${pad(date.getHours())}:${pad(date.getMinutes())}:${pad(date.getSeconds())}.${pad(date.getMilliseconds(), 3)}`,
`${sign}${pad(Math.floor(absolute / 60))}:${pad(absolute % 60)}`,
].join("");
}
function timestampSlug(date = new Date()) {
return date.toISOString().replace(/\.\d{3}Z$/, "Z").replace(/[^0-9A-Za-z]+/g, "-").replace(/^-|-$/g, "");
}
function percentile(values, percentileValue) {
if (values.length === 0) return 0;
const sorted = [...values].sort((a, b) => a - b);
const index = Math.min(sorted.length - 1, Math.ceil((percentileValue / 100) * sorted.length) - 1);
return Number(sorted[index].toFixed(3));
}
function stats(values) {
return {
min: Number(Math.min(...values).toFixed(3)),
p50: percentile(values, 50),
p95: percentile(values, 95),
p99: percentile(values, 99),
max: Number(Math.max(...values).toFixed(3)),
};
}
function threshold(actual, limit, operator) {
const pass = operator === "<=" ? actual <= limit : actual >= limit;
return { actual, [operator === "<=" ? "max" : "min"]: limit, pass };
}
function makeSample(index) {
const ingress = 1 + (index % 5) * 0.22;
const pipeline = 2.8 + (index % 7) * 0.31;
const persistence = 1.1 + (index % 4) * 0.2;
const pluginIpc = 1.9 + (index % 6) * 0.27;
const rag = index % 3 === 0 ? 4.4 : 0.8 + (index % 5) * 0.18;
const streaming = 1.5 + (index % 8) * 0.24;
const provider = 80 + (index % 13) * 11;
const externalTool = index % 4 === 0 ? 25 + (index % 9) * 3 : 0;
const network = 8 + (index % 10) * 1.7;
const overhead = ingress + pipeline + persistence + pluginIpc + rag + streaming;
const external = provider + externalTool + network;
const total = overhead + external;
return {
index,
segments_ms: {
ingress,
pipeline,
persistence,
plugin_ipc: pluginIpc,
rag,
streaming,
provider,
external_tool: externalTool,
network,
},
langbot_overhead_ms: Number(overhead.toFixed(3)),
external_latency_ms: Number(external.toFixed(3)),
e2e_latency_ms: Number(total.toFixed(3)),
accounting_gap_ms: Number((total - external - overhead).toFixed(6)),
};
}
async function main() {
const root = resolve(env.LBS_ROOT || process.cwd());
const caseId = "langbot-overhead-accounting-contract";
const runId = env.LBS_RUN_ID || `${timestampSlug()}-${caseId}`;
const evidenceDir = resolve(env.LBS_EVIDENCE_DIR || join(root, "reports", "evidence", runId));
await mkdir(evidenceDir, { recursive: true });
const startedAt = new Date();
const sampleCount = Number(env.LANGBOT_PERF_CONTRACT_SAMPLES || "80");
const overheadP95BudgetMs = Number(env.LANGBOT_PERF_OVERHEAD_P95_MS || "25");
const samples = Array.from({ length: sampleCount }, (_, index) => makeSample(index));
const overheads = samples.map((sample) => sample.langbot_overhead_ms);
const e2e = samples.map((sample) => sample.e2e_latency_ms);
const external = samples.map((sample) => sample.external_latency_ms);
const gaps = samples.map((sample) => Math.abs(sample.accounting_gap_ms));
const memory = process.memoryUsage();
const metrics = {
probe: caseId,
sample_count: sampleCount,
langbot_overhead_ms: stats(overheads),
e2e_latency_ms: stats(e2e),
external_latency_ms: stats(external),
accounting_gap_max_ms: Number(Math.max(...gaps).toFixed(6)),
samples,
};
const thresholds = {
sample_count: threshold(sampleCount, 50, ">="),
langbot_overhead_p95_ms: threshold(metrics.langbot_overhead_ms.p95, overheadP95BudgetMs, "<="),
accounting_gap_max_ms: threshold(metrics.accounting_gap_max_ms, 0.001, "<="),
};
const status = Object.values(thresholds).every((item) => item.pass) ? "pass" : "fail";
const metricsPath = join(evidenceDir, "metrics.json");
const thresholdsPath = join(evidenceDir, "thresholds.json");
const resourceLogPath = join(evidenceDir, "resource-log.json");
const automationResultPath = join(evidenceDir, "automation-result.json");
const resultPath = join(evidenceDir, "result.json");
await writeFile(metricsPath, `${JSON.stringify(metrics, null, 2)}\n`, "utf8");
await writeFile(thresholdsPath, `${JSON.stringify(thresholds, null, 2)}\n`, "utf8");
await writeFile(resourceLogPath, `${JSON.stringify({ memory, pid: process.pid }, null, 2)}\n`, "utf8");
const finishedAt = new Date();
const result = {
source: "automation",
case_id: caseId,
run_id: runId,
status,
reason: status === "pass"
? "Overhead accounting contract passed all thresholds."
: "Overhead accounting contract breached one or more thresholds.",
started_at: startedAt.toISOString(),
started_at_local: localIsoWithOffset(startedAt),
finished_at: finishedAt.toISOString(),
finished_at_local: localIsoWithOffset(finishedAt),
duration_ms: finishedAt.getTime() - startedAt.getTime(),
metrics_summary: {
sample_count: metrics.sample_count,
langbot_overhead_p95_ms: metrics.langbot_overhead_ms.p95,
e2e_latency_p95_ms: metrics.e2e_latency_ms.p95,
external_latency_p95_ms: metrics.external_latency_ms.p95,
accounting_gap_max_ms: metrics.accounting_gap_max_ms,
},
thresholds_summary: thresholds,
artifacts: {
metrics_json: metricsPath,
thresholds_json: thresholdsPath,
resource_log_json: resourceLogPath,
automation_result_json: automationResultPath,
result_json: resultPath,
},
evidence_collected: ["metrics", "resource_log", "filesystem"],
};
const resultText = `${JSON.stringify(result, null, 2)}\n`;
await writeFile(automationResultPath, resultText, "utf8");
await writeFile(resultPath, resultText, "utf8");
console.log(JSON.stringify(result, null, 2));
exit(status === "pass" ? 0 : 1);
}
await main();
@@ -0,0 +1,173 @@
# Performance And Reliability Testing
Use this reference when a QA request asks whether LangBot is fast enough,
stable under load, or resilient to controlled faults.
## Scope
Treat `skills/` as the QA control plane:
- Cases define intent, readiness, thresholds, and required evidence.
- Probe scripts collect metrics, traces, resource logs, and artifacts.
- Reports classify the same run as `pass`, `fail`, `blocked`,
`env_issue`, or `flaky`.
Do not turn `skills/` into a load generator or chaos engine. Call a focused
tool from a `mode: probe` case when the test needs one, for example k6,
Locust, pytest-benchmark, Playwright trace collection, Toxiproxy, Docker, or a
Kubernetes disruption tool.
## LangBot Performance Model
For LangBot, performance is the cost LangBot adds around external systems:
```text
LangBot overhead = end-to-end latency - provider latency - external tool latency - network/fault injection latency
```
Measure user experience and internal composition separately:
- WebUI load and interaction latency.
- Debug Chat send-to-first-visible-token and send-to-completion latency.
- Pipeline, RAG, plugin runtime, MCP, AgentRunner, and persistence segment
latency.
- Queue wait time, concurrency, throughput, timeout rate, and p95/p99 latency.
- Startup, plugin install, knowledge-base ingestion, migration, and recovery
time.
Do not report a single message round-trip time as "LangBot performance" unless
the report also explains external provider/tool/network time.
## Evidence Contract
Performance and reliability cases should declare the evidence they need:
- `metrics`: machine-readable latency, throughput, error-rate, or recovery
metrics, usually `metrics.json`.
- `resource_log`: CPU, memory, process, connection, queue, or file descriptor
samples.
- `trace`: browser, HTTP, database, or runtime trace artifacts.
- `profile`: CPU, memory, or flamegraph profile artifacts.
- `backend_log`, `network`, `api_diagnostic`, and `filesystem` as supporting
evidence when relevant.
Automation should write `automation-result.json` with these fields when
available:
```json
{
"status": "pass",
"reason": "Probe passed all thresholds.",
"metrics_summary": {
"langbot_overhead_p95_ms": 12.4,
"error_rate": 0
},
"thresholds_summary": {
"langbot_overhead_p95_ms": { "actual": 12.4, "max": 50, "pass": true }
},
"artifacts": {
"metrics_json": "/path/to/metrics.json"
},
"evidence_collected": ["metrics", "filesystem"]
}
```
Synthetic contract probes are useful for checking the QA harness, but they are
not live product performance results. Label them as contract probes in the case
title, checks, and report.
## Chaos And Reliability Rules
Chaos tests must be narrow and reversible:
- Declare the fault model in `fault_model_json`.
- Record blast radius, target component, injection method, duration, and abort
conditions.
- Capture recovery checks and cleanup steps in the case.
- Classify unavailable dependencies as `env_issue` unless the target behavior
is LangBot's handling of that dependency failure.
- Do not run destructive fault injection against a shared or production-like
instance without explicit operator approval.
Recommended first fault models:
- Provider timeout or HTTP 429 from a fake provider endpoint.
- Plugin runtime disconnect/reconnect in a local instance.
- MCP stdio server exits mid-call.
- RAG parser fixture fails once and recovers on retry.
- Backend API endpoint returns 5xx from a controlled local proxy.
## Starter Live Probes
The starter gate separates QA-harness contracts from live product checks:
- `langbot-overhead-accounting-contract` verifies that reports can carry
overhead accounting metrics. It uses deterministic synthetic samples and is
not live product performance.
- `langbot-fault-taxonomy-contract` verifies that fault scenarios declare
expected status, recovery, and cleanup before destructive chaos tests are
added.
- `langbot-live-backend-latency` checks the unauthenticated `/healthz`
endpoint for basic backend responsiveness.
- `langbot-live-control-plane-api` checks `/healthz` and
`/api/v1/system/info` for HTTP 200, JSON `code: 0`, response shape, and
per-endpoint p95 latency.
- `langbot-live-backend-log-health` scans the recent backend log window for
fail-severity runtime findings. It is the reliability guard that should fail
the gate when HTTP probes pass but backend logs contain Traceback, ImportError,
ERROR, unclosed sessions, or unawaited coroutine signals.
Do not treat these starter live probes as Debug Chat or model-provider
performance. They are control-plane readiness checks; user-facing performance
needs browser/WebSocket/message-path measurements.
## Gate Layers
Use the smallest gate that answers the quality question:
- `langbot-performance-contract-gate`: fast synthetic checks for report shape,
threshold accounting, and fault taxonomy. Good for PR feedback when no live
service is running.
- `langbot-live-backend-gate`: live backend `/healthz`,
`/api/v1/system/info`, and backend log health. Good after starting a local
LangBot backend.
- `langbot-user-path-performance-gate`: browser-visible user path performance,
starting with Pipeline Debug Chat send-to-visible-completion latency. Run it
only when the browser profile and target pipeline are ready.
- `langbot-performance-reliability-gate`: combined starter gate for synthetic
contracts plus live backend checks.
Keep environment diagnostics separate from product regressions. For example, a
SOCKS proxy without Python `socksio` support should be fixed or clearly
classified by `bin/lbs env doctor`; do not hide the resulting backend
Traceback in reports.
## Debug Chat Performance
`pipeline-debug-chat-performance` reuses the browser Debug Chat automation and
adds `metrics.json`, `metrics_summary`, and `thresholds_summary` to
`automation-result.json`.
Current metric:
```text
response_duration_ms = prompt send -> expected assistant response visible and stable
```
This is a user-path metric, not pure LangBot overhead. If it regresses, inspect
provider latency, model route health, plugin/runtime logs, WebSocket behavior,
and browser console/network evidence before attributing the whole duration to
LangBot.
## Running The First Gate
Start with the reusable suite:
```bash
rtk bin/lbs suite plan langbot-performance-reliability-gate
rtk bin/lbs suite start langbot-performance-reliability-gate --run-id langbot-perf-rel-local
```
Run synthetic contract probes first. Run live probes only after the selected
backend/frontend instance is reachable and the run owner accepts any fault
scope.
@@ -0,0 +1,14 @@
id: langbot-live-backend-gate
title: "LangBot live backend reliability gate"
description: "Live backend control-plane responsiveness and runtime log health checks for a locally running LangBot instance."
type: reliability
priority: p1
tags:
- performance
- reliability
- live-backend
- metrics
cases:
- langbot-live-backend-latency
- langbot-live-control-plane-api
- langbot-live-backend-log-health
@@ -0,0 +1,13 @@
id: langbot-performance-contract-gate
title: "LangBot performance contract gate"
description: "Fast synthetic contract checks for performance metric accounting and non-destructive reliability fault taxonomy."
type: contract
priority: p1
tags:
- performance
- reliability
- contract
- metrics
cases:
- langbot-overhead-accounting-contract
- langbot-fault-taxonomy-contract
@@ -0,0 +1,16 @@
id: langbot-performance-reliability-gate
title: "LangBot performance and reliability starter gate"
description: "Starter gate for LangBot performance accounting, live backend control-plane latency, and non-destructive fault taxonomy checks."
type: reliability
priority: p1
tags:
- performance
- reliability
- metrics
- chaos
cases:
- langbot-overhead-accounting-contract
- langbot-fault-taxonomy-contract
- langbot-live-backend-latency
- langbot-live-control-plane-api
- langbot-live-backend-log-health
@@ -0,0 +1,12 @@
id: langbot-user-path-performance-gate
title: "LangBot user-path performance gate"
description: "Browser-visible performance checks for user-facing LangBot paths such as Pipeline Debug Chat."
type: performance
priority: p1
tags:
- performance
- browser
- debug-chat
- user-path
cases:
- pipeline-debug-chat-performance
+35
View File
@@ -1,5 +1,7 @@
import { existsSync } from "node:fs";
import { spawnSync } from "node:child_process";
import { Socket } from "node:net";
import { join } from "node:path";
import type { CommandContext } from "../types.ts";
import { parseOptions } from "../cli.ts";
import { loadEnv } from "../fs.ts";
@@ -88,6 +90,37 @@ function compareProxyPair(env: Record<string, string>, upper: string, lower: str
return null;
}
function envValue(env: Record<string, string>, key: string): string {
return process.env[key] ?? env[key] ?? "";
}
function activeSocksProxy(env: Record<string, string>): { key: string; value: string } | null {
for (const key of ["ALL_PROXY", "all_proxy", "HTTPS_PROXY", "https_proxy", "HTTP_PROXY", "http_proxy"]) {
const value = envValue(env, key);
if (/^socks/i.test(value)) return { key, value };
}
return null;
}
function checkSocksio(env: Record<string, string>): string | null {
const proxy = activeSocksProxy(env);
if (!proxy) return null;
const repo = env.LANGBOT_REPO;
const python = repo ? join(repo, ".venv", "bin", "python") : "";
if (!python || !existsSync(python)) {
return `SOCKS proxy ${proxy.key} is configured (${redactEnvValue(proxy.key, proxy.value)}), but LangBot venv python was not found; after creating the venv, verify it can import socksio.`;
}
const result = spawnSync(python, ["-c", "import socksio"], {
encoding: "utf8",
timeout: 5000,
});
if (result.status === 0) return null;
return `SOCKS proxy ${proxy.key} is configured (${redactEnvValue(proxy.key, proxy.value)}), but ${python} cannot import socksio; run \`${python} -m pip install socksio\` or start LangBot without SOCKS proxy env.`;
}
export async function commandEnvDoctor(ctx: CommandContext): Promise<number> {
const env = loadEnv(ctx.root);
const failures: string[] = [];
@@ -117,6 +150,8 @@ export async function commandEnvDoctor(ctx: CommandContext): Promise<number> {
]) {
if (mismatch) failures.push(mismatch);
}
const socksioFailure = checkSocksio(env);
if (socksioFailure) failures.push(socksioFailure);
for (const [label, result] of await Promise.all([
checkUrl("LANGBOT_BACKEND_URL", env.LANGBOT_BACKEND_URL).then((result) => ["LANGBOT_BACKEND_URL", result] as const),
+44 -3
View File
@@ -465,6 +465,41 @@ function outputTail(value: string | Buffer | null | undefined): string {
return String(value ?? "").trim().slice(-4000);
}
function exitStatusFromResultStatus(status: string): number {
if (status === "pass") return 0;
if (status === "blocked" || status === "env_issue" || status === "flaky") return 2;
return 1;
}
function executionStatusFromExitStatus(status: number): string {
if (status === 0) return "ok";
if (status === 2) return "classified";
return "nonzero";
}
function executionFromCaseResultFile(caseItem: Record<string, unknown>): Record<string, unknown> | null {
const resultPath = join(String(caseItem.evidence_dir), "result.json");
if (!existsSync(resultPath)) return null;
try {
const parsed = JSON.parse(readFileSync(resultPath, "utf8")) as Record<string, unknown>;
if (
parsed.case_id !== caseItem.id ||
parsed.run_id !== caseItem.run_id ||
typeof parsed.status !== "string"
) return null;
const exitStatus = exitStatusFromResultStatus(parsed.status);
return {
status: executionStatusFromExitStatus(exitStatus),
exit_status: exitStatus,
reason: typeof parsed.reason === "string" ? parsed.reason : "result.json completed",
result_status: parsed.status,
result_json: resultPath,
};
} catch {
return null;
}
}
function executionProblemStatus(executions: Array<Record<string, unknown>>): string {
const statuses = executions.map((item) => String(item.status));
if (statuses.includes("nonzero")) return "fail";
@@ -523,12 +558,18 @@ export function commandSuiteRun(ctx: CommandContext): number {
encoding: "utf8",
stdio: options.json === true ? "pipe" : "inherit",
});
const status = result.error ? 1 : result.status ?? 1;
const fileExecution = result.error ? executionFromCaseResultFile(caseItem) : null;
const status = typeof fileExecution?.exit_status === "number"
? fileExecution.exit_status
: result.error ? 1 : result.status ?? 1;
executions.push({
id: caseItem.id,
status: status === 0 ? "ok" : "nonzero",
status: fileExecution?.status ?? executionStatusFromExitStatus(status),
exit_status: status,
reason: result.error?.message || "",
reason: fileExecution?.reason ?? result.error?.message ?? "",
result_status: fileExecution?.result_status,
result_json: fileExecution?.result_json,
spawn_error: fileExecution && result.error ? result.error.message : undefined,
stdout: outputTail(result.stdout),
stderr: outputTail(result.stderr),
});
+95 -14
View File
@@ -271,7 +271,7 @@ function reportTemplate(mode: string): Record<string, string> {
target_tested: "Probe target, endpoint, file, command, or service actually checked",
execution_path: "automation script | shell command | direct API | other",
probe_result: "What the probe observed",
logs_or_artifacts: "Log, filesystem, API, or other artifact paths collected",
metrics_or_artifacts: "Metrics, logs, filesystem artifacts, traces, or profiles collected",
diagnostics: "Extra diagnostics used, if any",
matched_troubleshooting: "Troubleshooting ids matched, if any",
assets_to_update: "New case/reference/troubleshooting entries to add",
@@ -320,7 +320,7 @@ function manualEvidenceTemplate(mode: string): ManualEvidenceTemplate {
target_tested: "TODO: probe target, endpoint, file, command, or service actually checked",
execution_path: "TODO: automation script | shell command | direct API | other",
probe_result: "TODO: observed probe result",
logs_or_artifacts: "TODO: evidence paths or skipped reason",
metrics_or_artifacts: "TODO: metrics, logs, filesystem artifacts, traces, or profiles collected",
diagnostics: "TODO: additional diagnostics used, if any",
matched_troubleshooting: "TODO: troubleshooting ids matched, if any",
assets_to_update: "TODO: case/reference/troubleshooting updates to make",
@@ -1099,6 +1099,41 @@ function executionTail(value: string | Buffer | null | undefined): string {
return String(value ?? "").trim().slice(-4000);
}
function exitStatusFromResultStatus(status: string): number {
if (status === "pass") return 0;
if (status === "blocked" || status === "env_issue" || status === "flaky") return 2;
return 1;
}
function executionStatusFromExitStatus(status: number): string {
if (status === 0) return "ok";
if (status === 2) return "classified";
return "nonzero";
}
function executionFromAutomationResultFile(
evidenceDir: string,
caseId: string,
runId: string,
): { status: string; exit_status: number; reason: string; result_status: string; path: string } | null {
const resultPath = join(evidenceDir, "automation-result.json");
if (!existsSync(resultPath)) return null;
try {
const parsed = JSON.parse(readFileSync(resultPath, "utf8")) as Record<string, unknown>;
if (parsed.case_id !== caseId || parsed.run_id !== runId || typeof parsed.status !== "string") return null;
const exitStatus = exitStatusFromResultStatus(parsed.status);
return {
status: executionStatusFromExitStatus(exitStatus),
exit_status: exitStatus,
reason: typeof parsed.reason === "string" ? parsed.reason : "automation-result.json completed",
result_status: parsed.status,
path: resultPath,
};
} catch {
return null;
}
}
function runSetupAutomation(
ctx: CommandContext,
item: StructuredItem,
@@ -1224,6 +1259,30 @@ export function commandTestRun(ctx: CommandContext): number {
});
if (result.error) {
const fileExecution = executionFromAutomationResultFile(
run.automation.evidence_dir,
String(run.case.id),
run.run_id,
);
if (fileExecution) {
if (options.json !== true) {
console.error(`WARN: automation spawn reported an error, but ${fileExecution.path} completed: ${result.error.message}`);
}
if (options.json === true) {
console.log(JSON.stringify({
run,
setup_executions: setupExecutions,
automation_execution: {
...fileExecution,
spawn_error: result.error.message,
stdout: executionTail(result.stdout),
stderr: executionTail(result.stderr),
},
exit_status: fileExecution.exit_status,
}, null, 2));
}
return fileExecution.exit_status;
}
if (options.json !== true) console.error(`ERROR: failed to run automation: ${result.error.message}`);
if (options.json === true) {
console.log(JSON.stringify({
@@ -1247,7 +1306,7 @@ export function commandTestRun(ctx: CommandContext): number {
run,
setup_executions: setupExecutions,
automation_execution: {
status: status === 0 ? "ok" : "nonzero",
status: executionStatusFromExitStatus(status),
exit_status: status,
stdout: executionTail(result.stdout),
stderr: executionTail(result.stderr),
@@ -1311,6 +1370,7 @@ function renderMarkdownReport(report: TestReport): string {
const environment = report.environment;
const logGuard = report.log_guard;
const troubleshooting = report.troubleshooting;
const automation = report.automation_result;
const lines: string[] = [];
lines.push(`# Test Report: ${reportCase.id}`);
@@ -1323,20 +1383,41 @@ function renderMarkdownReport(report: TestReport): string {
lines.push(`Type: ${reportCase.type}`);
lines.push("");
lines.push("## Result");
lines.push(`- result: ${evidence.result}`);
for (const [key, value] of Object.entries(evidence)) {
if (key !== "result") lines.push(`- ${key}: ${value}`);
if (automation.status === "loaded" && automation.result) {
lines.push(`- result: ${automation.result}`);
if (automation.reason) lines.push(`- reason: ${automation.reason}`);
if (automation.url) lines.push(`- target_tested: ${automation.url}`);
if (automation.path) lines.push(`- automation_result: ${automation.path}`);
if (automation.artifacts) lines.push(`- artifacts: ${JSON.stringify(automation.artifacts)}`);
} else {
lines.push(`- result: ${evidence.result}`);
for (const [key, value] of Object.entries(evidence)) {
if (key !== "result") lines.push(`- ${key}: ${value}`);
}
}
lines.push("");
lines.push("## Automation Result");
lines.push(`- status: ${report.automation_result.status}`);
if (report.automation_result.path) lines.push(`- path: ${report.automation_result.path}`);
if (report.automation_result.result) lines.push(`- result: ${report.automation_result.result}`);
if (report.automation_result.reason) lines.push(`- reason: ${report.automation_result.reason}`);
if (report.automation_result.started_at_local) lines.push(`- started_at_local: ${report.automation_result.started_at_local}`);
if (report.automation_result.finished_at_local) lines.push(`- finished_at_local: ${report.automation_result.finished_at_local}`);
if (report.automation_result.url) lines.push(`- url: ${report.automation_result.url}`);
if (report.automation_result.expected_text) lines.push(`- expected_text: ${report.automation_result.expected_text}`);
lines.push(`- status: ${automation.status}`);
if (automation.path) lines.push(`- path: ${automation.path}`);
if (automation.result) lines.push(`- result: ${automation.result}`);
if (automation.reason) lines.push(`- reason: ${automation.reason}`);
if (automation.duration_ms !== undefined) lines.push(`- duration_ms: ${automation.duration_ms}`);
if (automation.started_at_local) lines.push(`- started_at_local: ${automation.started_at_local}`);
if (automation.finished_at_local) lines.push(`- finished_at_local: ${automation.finished_at_local}`);
if (automation.url) lines.push(`- url: ${automation.url}`);
if (automation.expected_text) lines.push(`- expected_text: ${automation.expected_text}`);
if (automation.metrics_summary) {
lines.push("- metrics_summary:");
lines.push(` ${JSON.stringify(automation.metrics_summary)}`);
}
if (automation.thresholds_summary) {
lines.push("- thresholds_summary:");
lines.push(` ${JSON.stringify(automation.thresholds_summary)}`);
}
if (automation.artifacts) {
lines.push("- artifacts:");
lines.push(` ${JSON.stringify(automation.artifacts)}`);
}
lines.push("");
lines.push("## Environment");
for (const [key, value] of Object.entries(environment)) lines.push(`- ${key}=${value}`);
+3
View File
@@ -126,6 +126,9 @@ function validateCaseItem(root: string, item: StructuredItem, skillNames: Set<st
...validateEnvKeyScalar(item, "automation_pipeline_url_env"),
...validateEnvKeyScalar(item, "automation_pipeline_name_env"),
...validateJsonScalar(item, "automation_filesystem_checks_json"),
...validateJsonScalar(item, "metrics_thresholds_json"),
...validateJsonScalar(item, "load_profile_json"),
...validateJsonScalar(item, "fault_model_json"),
...listValue(item.fields, "setup_automation").flatMap((entry) => (
validateSetupAutomationEntry(root, entry, caseIds).map((error) => `${item.path}: ${error}`)
)),
+27 -2
View File
@@ -9,7 +9,18 @@ export const requiredEnvKeys = [
];
export const caseModeValues = ["agent-browser", "probe"];
export const caseTypeValues = ["smoke", "regression", "feature", "provider", "exploratory"];
export const caseTypeValues = [
"smoke",
"regression",
"feature",
"provider",
"exploratory",
"contract",
"performance",
"reliability",
"chaos",
"security",
];
export const casePriorityValues = ["p0", "p1", "p2"];
export const caseRiskValues = ["low", "medium", "high"];
export const caseEvidenceValues = [
@@ -21,10 +32,24 @@ export const caseEvidenceValues = [
"frontend_log",
"api_diagnostic",
"filesystem",
"metrics",
"trace",
"profile",
"resource_log",
];
export const testResultStatusValues = ["pass", "fail", "blocked", "env_issue", "flaky"];
export const troubleshootingCategoryValues = ["product", "env_issue", "external_dependency", "blocked", "flaky"];
export const suiteTypeValues = ["smoke", "regression", "release_gate", "exploratory"];
export const suiteTypeValues = [
"smoke",
"regression",
"release_gate",
"exploratory",
"contract",
"performance",
"reliability",
"chaos",
"security",
];
export const suiteRequiredStrings = ["id", "title", "description", "type", "priority"];
export const suiteRequiredLists = ["tags", "cases"];
+20
View File
@@ -91,6 +91,7 @@ export type AutomationResultEvidence = {
path?: string;
result?: string;
reason?: string;
duration_ms?: number;
started_at?: string;
started_at_local?: string;
finished_at?: string;
@@ -98,6 +99,9 @@ export type AutomationResultEvidence = {
url?: string;
prompt?: string;
expected_text?: string;
metrics_summary?: Record<string, unknown>;
thresholds_summary?: Record<string, unknown>;
artifacts?: Record<string, unknown>;
};
type MutableScanState = {
@@ -594,6 +598,18 @@ function stringField(data: Record<string, unknown>, key: string): string | undef
return typeof value === "string" && value.trim() ? value : undefined;
}
function numberField(data: Record<string, unknown>, key: string): number | undefined {
const value = data[key];
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
}
function objectField(data: Record<string, unknown>, key: string): Record<string, unknown> | undefined {
const value = data[key];
return value && typeof value === "object" && !Array.isArray(value)
? value as Record<string, unknown>
: undefined;
}
function evidenceDirFromOptions(options: Record<string, string | boolean>): string | undefined {
const explicit = typeof options["evidence-dir"] === "string" ? options["evidence-dir"] : undefined;
if (explicit) return resolve(explicit);
@@ -628,6 +644,7 @@ export function readAutomationResultEvidence(options: Record<string, string | bo
path: resultPath,
result: stringField(result, "status"),
reason: stringField(result, "reason"),
duration_ms: numberField(result, "duration_ms"),
started_at: stringField(result, "started_at"),
started_at_local: stringField(result, "started_at_local"),
finished_at: stringField(result, "finished_at"),
@@ -635,6 +652,9 @@ export function readAutomationResultEvidence(options: Record<string, string | bo
url: stringField(result, "url"),
prompt: redactSecrets(stringField(result, "prompt") ?? ""),
expected_text: stringField(result, "expected_text"),
metrics_summary: objectField(result, "metrics_summary"),
thresholds_summary: objectField(result, "thresholds_summary"),
artifacts: objectField(result, "artifacts"),
};
} catch (error) {
return { status: "invalid", path: resultPath, reason: String(error) };
+2
View File
@@ -114,6 +114,8 @@ export function automationEnvDefaults(item: StructuredItem, env: EnvSource = pro
["automation_expected_runner_id", "LANGBOT_E2E_EXPECTED_RUNNER_ID"],
["automation_reset_debug_chat", "LANGBOT_E2E_RESET_DEBUG_CHAT"],
["automation_debug_chat_session_type", "LANGBOT_E2E_DEBUG_CHAT_SESSION_TYPE"],
["automation_debug_chat_response_p95_ms", "LANGBOT_E2E_DEBUG_CHAT_RESPONSE_P95_MS"],
["automation_debug_chat_max_error_rate", "LANGBOT_E2E_DEBUG_CHAT_MAX_ERROR_RATE"],
["automation_filesystem_checks_json", "LANGBOT_E2E_FILESYSTEM_CHECKS_JSON"],
["automation_plugin_package", "LANGBOT_E2E_PLUGIN_PACKAGE"],
["automation_expected_plugin_id", "LANGBOT_E2E_EXPECTED_PLUGIN_ID"],
+159 -1
View File
@@ -1,6 +1,6 @@
import assert from "node:assert/strict";
import { test } from "node:test";
import { appendFileSync, existsSync, mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
import { appendFileSync, chmodSync, existsSync, mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
import { spawnSync } from "node:child_process";
import { tmpdir } from "node:os";
import { join } from "node:path";
@@ -676,6 +676,82 @@ test("suite run JSON captures failed case output", () => {
}
});
test("suite run preserves classified env_issue automation results", () => {
const tmp = mkdtempSync(join(tmpdir(), "lbs-suite-run-env-issue-"));
try {
const skillDir = join(tmp, "skills", "langbot-testing");
const casesDir = join(skillDir, "cases");
const suitesDir = join(skillDir, "suites");
const scriptsDir = join(tmp, "scripts");
mkdirSync(casesDir, { recursive: true });
mkdirSync(suitesDir, { recursive: true });
mkdirSync(scriptsDir, { recursive: true });
writeFileSync(join(skillDir, "SKILL.md"), "---\nname: langbot-testing\ndescription: Testing.\n---\n\n# Testing\n");
writeFileSync(join(tmp, "skills", ".env"), "");
writeFileSync(
join(casesDir, "env-case.yaml"),
[
"id: env-case",
"title: Env Case",
"mode: probe",
"area: qa",
"type: smoke",
"priority: p2",
"risk: low",
"ci_eligible: true",
"automation: scripts/env-issue.mjs",
"evidence_required:",
" - filesystem",
].join("\n"),
);
writeFileSync(
join(suitesDir, "mini.yaml"),
[
"id: mini",
"title: Mini",
"description: Mini suite.",
"type: smoke",
"priority: p2",
"tags:",
" - qa",
"cases:",
" - env-case",
].join("\n"),
);
writeFileSync(
join(scriptsDir, "env-issue.mjs"),
[
"import { mkdirSync, writeFileSync } from 'node:fs';",
"import { join } from 'node:path';",
"mkdirSync(process.env.LBS_EVIDENCE_DIR, { recursive: true });",
"const result = {",
" case_id: process.env.LBS_CASE_ID,",
" run_id: process.env.LBS_RUN_ID,",
" status: 'env_issue',",
" reason: 'backend not reachable',",
" evidence_collected: ['filesystem']",
"};",
"writeFileSync(join(process.env.LBS_EVIDENCE_DIR, 'result.json'), JSON.stringify(result));",
"writeFileSync(join(process.env.LBS_EVIDENCE_DIR, 'automation-result.json'), JSON.stringify({ ...result, source: 'automation' }));",
"process.exit(2);",
].join("\n"),
);
const result = capture(() => commandSuiteRun({
root: tmp,
args: ["suite", "run", "mini", "--run-id", "mini-run", "--evidence-dir", join(tmp, "evidence"), "--json"],
}));
assert.equal(result.code, 2);
const payload = JSON.parse(result.output);
assert.equal(payload.executions[0].status, "classified");
assert.equal(payload.report.status, "env_issue");
assert.equal(payload.report.execution_status, "ok");
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("suite run failure cannot be masked by stale pass result", () => {
const tmp = mkdtempSync(join(tmpdir(), "lbs-suite-run-stale-pass-"));
try {
@@ -1369,6 +1445,56 @@ test("env doctor does not require proxy variables", async () => {
}
});
test("env doctor reports missing socksio for active SOCKS proxy", async () => {
const tmp = mkdtempSync(join(tmpdir(), "lbs-env-doctor-socksio-"));
const originalAllProxy = process.env.ALL_PROXY;
const originalAllProxyLower = process.env.all_proxy;
try {
delete process.env.ALL_PROXY;
delete process.env.all_proxy;
const skillsDir = join(tmp, "skills");
const repoDir = join(tmp, "LangBot");
const webDir = join(repoDir, "web");
const venvBin = join(repoDir, ".venv", "bin");
const browserProfile = join(tmp, "browser-profile");
const chromium = join(tmp, "chromium");
mkdirSync(skillsDir, { recursive: true });
mkdirSync(webDir, { recursive: true });
mkdirSync(venvBin, { recursive: true });
mkdirSync(browserProfile, { recursive: true });
writeFileSync(chromium, "");
const python = join(venvBin, "python");
writeFileSync(python, "#!/bin/sh\nexit 1\n");
chmodSync(python, 0o755);
writeFileSync(
join(skillsDir, ".env"),
[
"LANGBOT_BACKEND_URL=http://127.0.0.1:59996",
"LANGBOT_FRONTEND_URL=http://127.0.0.1:59996",
"LANGBOT_DEV_FRONTEND_URL=http://127.0.0.1:59996",
`LANGBOT_REPO=${repoDir}`,
`LANGBOT_WEB_REPO=${webDir}`,
`LANGBOT_BROWSER_PROFILE=${browserProfile}`,
`LANGBOT_CHROMIUM_EXECUTABLE=${chromium}`,
"ALL_PROXY=socks5://127.0.0.1:7890",
].join("\n"),
);
const result = await captureAsync(() => commandEnvDoctor({ root: tmp, args: ["env", "doctor"] }));
assert.equal(result.code, 1);
assert.match(result.output, /FAIL: SOCKS proxy ALL_PROXY is configured/);
assert.match(result.output, /cannot import socksio/);
assert.match(result.output, /-m pip install socksio/);
} finally {
if (originalAllProxy === undefined) delete process.env.ALL_PROXY;
else process.env.ALL_PROXY = originalAllProxy;
if (originalAllProxyLower === undefined) delete process.env.all_proxy;
else process.env.all_proxy = originalAllProxyLower;
rmSync(tmp, { recursive: true, force: true });
}
});
test("env show redacts secret-like values by default", () => {
const tmp = mkdtempSync(join(tmpdir(), "lbs-env-show-redact-"));
try {
@@ -2521,6 +2647,38 @@ test("test report renders a reusable evidence template", () => {
assert.match(result.output, /no log files provided/);
});
test("test report promotes loaded automation evidence into result section", () => {
const tmp = mkdtempSync(join(tmpdir(), "lbs-report-automation-"));
try {
writeFileSync(
join(tmp, "automation-result.json"),
JSON.stringify({
status: "pass",
reason: "latency thresholds passed",
url: "http://127.0.0.1:5300",
artifacts: { metrics_json: join(tmp, "metrics.json") },
}),
);
const result = capture(() => commandTestReport(ctx([
"test",
"report",
"langbot-live-backend-latency",
"--evidence-dir",
tmp,
"--no-auto-log",
])));
assert.equal(result.code, 0);
assert.match(result.output, /## Result\n- result: pass\n- reason: latency thresholds passed/);
assert.match(result.output, /- target_tested: http:\/\/127\.0\.0\.1:5300/);
assert.doesNotMatch(result.output, /target_tested: TODO/);
assert.match(result.output, /## Automation Result/);
} finally {
rmSync(tmp, { recursive: true, force: true });
}
});
test("validate rejects dangling case references and missing automation scripts", () => {
const tmp = mkdtempSync(join(tmpdir(), "lbs-validate-strict-"));
try {