mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-25 06:54:19 +00:00
Add performance and reliability QA gates
This commit is contained in:
@@ -48,7 +48,18 @@
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": ["smoke", "regression", "feature", "provider", "exploratory"]
|
||||
"enum": [
|
||||
"smoke",
|
||||
"regression",
|
||||
"feature",
|
||||
"provider",
|
||||
"exploratory",
|
||||
"contract",
|
||||
"performance",
|
||||
"reliability",
|
||||
"chaos",
|
||||
"security"
|
||||
]
|
||||
},
|
||||
"priority": {
|
||||
"type": "string",
|
||||
@@ -102,7 +113,11 @@
|
||||
"backend_log",
|
||||
"frontend_log",
|
||||
"api_diagnostic",
|
||||
"filesystem"
|
||||
"filesystem",
|
||||
"metrics",
|
||||
"trace",
|
||||
"profile",
|
||||
"resource_log"
|
||||
]
|
||||
},
|
||||
"minItems": 1
|
||||
@@ -188,9 +203,24 @@
|
||||
"type": "string",
|
||||
"enum": ["person", "group"]
|
||||
},
|
||||
"automation_debug_chat_response_p95_ms": {
|
||||
"type": "string"
|
||||
},
|
||||
"automation_debug_chat_max_error_rate": {
|
||||
"type": "string"
|
||||
},
|
||||
"automation_filesystem_checks_json": {
|
||||
"type": "string"
|
||||
},
|
||||
"metrics_thresholds_json": {
|
||||
"type": "string"
|
||||
},
|
||||
"load_profile_json": {
|
||||
"type": "string"
|
||||
},
|
||||
"fault_model_json": {
|
||||
"type": "string"
|
||||
},
|
||||
"automation_pipeline_url_env": {
|
||||
"type": "string",
|
||||
"pattern": "^[A-Z][A-Z0-9_]*$"
|
||||
|
||||
@@ -18,7 +18,17 @@
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": ["smoke", "regression", "release_gate", "exploratory"]
|
||||
"enum": [
|
||||
"smoke",
|
||||
"regression",
|
||||
"release_gate",
|
||||
"exploratory",
|
||||
"contract",
|
||||
"performance",
|
||||
"reliability",
|
||||
"chaos",
|
||||
"security"
|
||||
]
|
||||
},
|
||||
"priority": {
|
||||
"type": "string",
|
||||
|
||||
@@ -54,6 +54,7 @@ const debugChatSessionType = env.LANGBOT_E2E_DEBUG_CHAT_SESSION_TYPE || "person"
|
||||
const pipelineConfigDiagnosticPath = resolve(paths.evidenceDir, "pipeline-config-diagnostic.json");
|
||||
const debugChatResetDiagnosticPath = resolve(paths.evidenceDir, "debug-chat-reset-diagnostic.json");
|
||||
const pipelineConfigRestoreDiagnosticPath = resolve(paths.evidenceDir, "pipeline-config-restore-diagnostic.json");
|
||||
const metricsPath = resolve(paths.evidenceDir, "metrics.json");
|
||||
const startedAt = new Date();
|
||||
|
||||
let browser;
|
||||
@@ -80,10 +81,11 @@ let result = {
|
||||
console_log: paths.consoleLog,
|
||||
network_log: paths.networkLog,
|
||||
screenshot: paths.screenshot,
|
||||
metrics_json: metricsPath,
|
||||
automation_result_json: paths.automationResultJson,
|
||||
result_json: paths.resultJson,
|
||||
},
|
||||
evidence_collected: ["ui", "screenshot", "console", "network"],
|
||||
evidence_collected: ["ui", "screenshot", "console", "network", "metrics"],
|
||||
};
|
||||
|
||||
function boolFromEnv(value, defaultValue) {
|
||||
@@ -103,6 +105,29 @@ function parseJsonEnv(key, fallback) {
|
||||
}
|
||||
}
|
||||
|
||||
function positiveNumberEnv(key, fallback) {
|
||||
const value = Number(env[key] || "");
|
||||
return Number.isFinite(value) && value >= 0 ? value : fallback;
|
||||
}
|
||||
|
||||
function percentile(values, percentileValue) {
|
||||
if (values.length === 0) return 0;
|
||||
const sorted = [...values].sort((a, b) => a - b);
|
||||
const index = Math.min(sorted.length - 1, Math.ceil((percentileValue / 100) * sorted.length) - 1);
|
||||
return Number(sorted[index].toFixed(3));
|
||||
}
|
||||
|
||||
function stats(values) {
|
||||
if (values.length === 0) return { min: 0, p50: 0, p95: 0, p99: 0, max: 0 };
|
||||
return {
|
||||
min: Number(Math.min(...values).toFixed(3)),
|
||||
p50: percentile(values, 50),
|
||||
p95: percentile(values, 95),
|
||||
p99: percentile(values, 99),
|
||||
max: Number(Math.max(...values).toFixed(3)),
|
||||
};
|
||||
}
|
||||
|
||||
function promptStepsFromEnv() {
|
||||
const rawSteps = parseJsonEnv("LANGBOT_E2E_PROMPTS_JSON", null);
|
||||
if (rawSteps === null) {
|
||||
@@ -658,6 +683,7 @@ try {
|
||||
} else {
|
||||
for (let index = 0; index < promptSteps.length; index += 1) {
|
||||
const step = promptSteps[index];
|
||||
const promptStartedAt = Date.now();
|
||||
const chatResult = await runDebugChatPrompt(page, {
|
||||
prompt: step.prompt,
|
||||
expectedText: step.expectedText,
|
||||
@@ -665,11 +691,13 @@ try {
|
||||
imagePath: index === 0 ? imagePath : "",
|
||||
failureSignals: failureSignals.length > 0 ? failureSignals : undefined,
|
||||
});
|
||||
const promptDurationMs = Date.now() - promptStartedAt;
|
||||
result.chat_results.push({
|
||||
index,
|
||||
expected_text: step.expectedText,
|
||||
status: chatResult.status,
|
||||
reason: chatResult.reason,
|
||||
response_duration_ms: promptDurationMs,
|
||||
min_expected_count: chatResult.min_expected_count,
|
||||
final_count: chatResult.final_count,
|
||||
before_assistant_expected_count: chatResult.before_assistant_expected_count,
|
||||
@@ -714,6 +742,56 @@ try {
|
||||
const finishedAt = new Date();
|
||||
result.finished_at = finishedAt.toISOString();
|
||||
result.finished_at_local = localIsoWithOffset(finishedAt);
|
||||
result.duration_ms = finishedAt.getTime() - startedAt.getTime();
|
||||
const responseDurations = result.chat_results
|
||||
.map((item) => item.response_duration_ms)
|
||||
.filter((value) => Number.isFinite(value));
|
||||
const passedPrompts = result.chat_results.filter((item) => item.status === "pass").length;
|
||||
const attemptedPrompts = result.chat_results.length;
|
||||
const errorRate = attemptedPrompts === 0 ? 1 : Number(((attemptedPrompts - passedPrompts) / attemptedPrompts).toFixed(4));
|
||||
const responseStats = stats(responseDurations);
|
||||
const responseP95BudgetMs = positiveNumberEnv(
|
||||
"LANGBOT_E2E_DEBUG_CHAT_RESPONSE_P95_MS",
|
||||
positiveNumberEnv("LANGBOT_DEBUG_CHAT_RESPONSE_P95_MS", safeResponseTimeoutMs),
|
||||
);
|
||||
const maxErrorRate = positiveNumberEnv("LANGBOT_E2E_DEBUG_CHAT_MAX_ERROR_RATE", 0);
|
||||
const metrics = {
|
||||
probe: caseId,
|
||||
url: result.url,
|
||||
prompt_count: result.prompt_count,
|
||||
attempted_prompt_count: attemptedPrompts,
|
||||
passed_prompt_count: passedPrompts,
|
||||
error_rate: errorRate,
|
||||
response_duration_ms: responseStats,
|
||||
total_duration_ms: result.duration_ms,
|
||||
chat_results: result.chat_results,
|
||||
};
|
||||
result.metrics_summary = {
|
||||
prompt_count: metrics.prompt_count,
|
||||
attempted_prompt_count: metrics.attempted_prompt_count,
|
||||
passed_prompt_count: metrics.passed_prompt_count,
|
||||
error_rate: metrics.error_rate,
|
||||
response_p50_ms: metrics.response_duration_ms.p50,
|
||||
response_p95_ms: metrics.response_duration_ms.p95,
|
||||
total_duration_ms: metrics.total_duration_ms,
|
||||
};
|
||||
result.thresholds_summary = {
|
||||
response_p95_ms: {
|
||||
actual: metrics.response_duration_ms.p95,
|
||||
max: responseP95BudgetMs,
|
||||
pass: attemptedPrompts > 0 && metrics.response_duration_ms.p95 <= responseP95BudgetMs,
|
||||
},
|
||||
error_rate: {
|
||||
actual: metrics.error_rate,
|
||||
max: maxErrorRate,
|
||||
pass: metrics.error_rate <= maxErrorRate,
|
||||
},
|
||||
};
|
||||
await writeFile(metricsPath, `${JSON.stringify(metrics, null, 2)}\n`, "utf8");
|
||||
if (result.status === "pass" && !Object.values(result.thresholds_summary).every((item) => item.pass)) {
|
||||
result.status = "fail";
|
||||
result.reason = "Debug Chat performance breached response latency or error-rate thresholds.";
|
||||
}
|
||||
const existingEvidence = {};
|
||||
for (const [key, value] of Object.entries(result.evidence)) {
|
||||
if (typeof value !== "string") continue;
|
||||
|
||||
@@ -130,6 +130,7 @@
|
||||
"references/local-agent-runner.md",
|
||||
"references/mcp-stdio-testing.md",
|
||||
"references/model-provider-testing.md",
|
||||
"references/performance-reliability-testing.md",
|
||||
"references/pipeline-debug-chat.md",
|
||||
"references/plugin-e2e-smoke.md",
|
||||
"references/sandbox-skill-authoring.md",
|
||||
@@ -150,6 +151,11 @@
|
||||
"agent-runner-release-preflight",
|
||||
"agent-runner-runtime-chaos",
|
||||
"dify-agent-debug-chat",
|
||||
"langbot-fault-taxonomy-contract",
|
||||
"langbot-live-backend-latency",
|
||||
"langbot-live-backend-log-health",
|
||||
"langbot-live-control-plane-api",
|
||||
"langbot-overhead-accounting-contract",
|
||||
"langrag-kb-retrieve",
|
||||
"langrag-parser-golden-e2e",
|
||||
"langrag-sentinel-kb-discover",
|
||||
@@ -165,6 +171,7 @@
|
||||
"mcp-stdio-register",
|
||||
"mcp-stdio-tool-call",
|
||||
"pipeline-debug-chat",
|
||||
"pipeline-debug-chat-performance",
|
||||
"plugin-e2e-smoke",
|
||||
"provider-deepseek",
|
||||
"qa-plugin-smoke-live-install",
|
||||
@@ -486,6 +493,128 @@
|
||||
"backend_log"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langbot-fault-taxonomy-contract",
|
||||
"title": "LangBot fault taxonomy and cleanup contract",
|
||||
"mode": "probe",
|
||||
"area": "reliability",
|
||||
"type": "chaos",
|
||||
"priority": "p1",
|
||||
"risk": "medium",
|
||||
"ci_eligible": true,
|
||||
"tags": [
|
||||
"reliability",
|
||||
"chaos",
|
||||
"contract",
|
||||
"synthetic"
|
||||
],
|
||||
"automation": "skills/langbot-testing/probes/langbot-fault-taxonomy-contract.mjs",
|
||||
"setup_automation": [],
|
||||
"setup_provides_env": [],
|
||||
"evidence_required": [
|
||||
"metrics",
|
||||
"filesystem"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langbot-live-backend-latency",
|
||||
"title": "LangBot live backend basic latency probe",
|
||||
"mode": "probe",
|
||||
"area": "performance",
|
||||
"type": "performance",
|
||||
"priority": "p1",
|
||||
"risk": "medium",
|
||||
"ci_eligible": false,
|
||||
"tags": [
|
||||
"performance",
|
||||
"live-backend",
|
||||
"latency",
|
||||
"metrics"
|
||||
],
|
||||
"automation": "skills/langbot-testing/probes/langbot-live-backend-latency.mjs",
|
||||
"setup_automation": [],
|
||||
"setup_provides_env": [],
|
||||
"evidence_required": [
|
||||
"metrics",
|
||||
"network",
|
||||
"api_diagnostic",
|
||||
"filesystem"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langbot-live-backend-log-health",
|
||||
"title": "LangBot live backend log health probe",
|
||||
"mode": "probe",
|
||||
"area": "reliability",
|
||||
"type": "reliability",
|
||||
"priority": "p1",
|
||||
"risk": "medium",
|
||||
"ci_eligible": false,
|
||||
"tags": [
|
||||
"reliability",
|
||||
"live-backend",
|
||||
"backend-log",
|
||||
"metrics"
|
||||
],
|
||||
"automation": "skills/langbot-testing/probes/langbot-live-backend-log-health.mjs",
|
||||
"setup_automation": [],
|
||||
"setup_provides_env": [],
|
||||
"evidence_required": [
|
||||
"metrics",
|
||||
"backend_log",
|
||||
"filesystem"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langbot-live-control-plane-api",
|
||||
"title": "LangBot live control-plane API probe",
|
||||
"mode": "probe",
|
||||
"area": "performance",
|
||||
"type": "performance",
|
||||
"priority": "p1",
|
||||
"risk": "medium",
|
||||
"ci_eligible": false,
|
||||
"tags": [
|
||||
"performance",
|
||||
"reliability",
|
||||
"live-backend",
|
||||
"control-plane",
|
||||
"metrics"
|
||||
],
|
||||
"automation": "skills/langbot-testing/probes/langbot-live-control-plane-api.mjs",
|
||||
"setup_automation": [],
|
||||
"setup_provides_env": [],
|
||||
"evidence_required": [
|
||||
"metrics",
|
||||
"network",
|
||||
"api_diagnostic",
|
||||
"filesystem"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langbot-overhead-accounting-contract",
|
||||
"title": "LangBot overhead accounting metrics contract",
|
||||
"mode": "probe",
|
||||
"area": "performance",
|
||||
"type": "performance",
|
||||
"priority": "p1",
|
||||
"risk": "medium",
|
||||
"ci_eligible": true,
|
||||
"tags": [
|
||||
"performance",
|
||||
"metrics",
|
||||
"contract",
|
||||
"synthetic"
|
||||
],
|
||||
"automation": "skills/langbot-testing/probes/langbot-overhead-accounting-contract.mjs",
|
||||
"setup_automation": [],
|
||||
"setup_provides_env": [],
|
||||
"evidence_required": [
|
||||
"metrics",
|
||||
"resource_log",
|
||||
"filesystem"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langrag-kb-retrieve",
|
||||
"title": "LangRAG knowledge base ingests and retrieves a sentinel document",
|
||||
@@ -911,6 +1040,33 @@
|
||||
"backend_log"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "pipeline-debug-chat-performance",
|
||||
"title": "Pipeline Debug Chat user-path performance probe",
|
||||
"mode": "agent-browser",
|
||||
"area": "pipeline",
|
||||
"type": "performance",
|
||||
"priority": "p1",
|
||||
"risk": "medium",
|
||||
"ci_eligible": false,
|
||||
"tags": [
|
||||
"performance",
|
||||
"pipeline",
|
||||
"debug-chat",
|
||||
"user-path",
|
||||
"metrics"
|
||||
],
|
||||
"automation": "scripts/e2e/pipeline-debug-chat.mjs",
|
||||
"setup_automation": [],
|
||||
"setup_provides_env": [],
|
||||
"evidence_required": [
|
||||
"ui",
|
||||
"screenshot",
|
||||
"console",
|
||||
"network",
|
||||
"metrics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "plugin-e2e-smoke",
|
||||
"title": "Plugin system installs a local plugin and exposes tool/page APIs",
|
||||
@@ -1059,6 +1215,10 @@
|
||||
"suites": [
|
||||
"agent-runner-release-gate",
|
||||
"core-smoke",
|
||||
"langbot-live-backend-gate",
|
||||
"langbot-performance-contract-gate",
|
||||
"langbot-performance-reliability-gate",
|
||||
"langbot-user-path-performance-gate",
|
||||
"local-agent-gate"
|
||||
],
|
||||
"suite_summaries": [
|
||||
@@ -1121,6 +1281,77 @@
|
||||
"local-agent-basic-debug-chat"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langbot-live-backend-gate",
|
||||
"title": "LangBot live backend reliability gate",
|
||||
"description": "Live backend control-plane responsiveness and runtime log health checks for a locally running LangBot instance.",
|
||||
"type": "reliability",
|
||||
"priority": "p1",
|
||||
"tags": [
|
||||
"performance",
|
||||
"reliability",
|
||||
"live-backend",
|
||||
"metrics"
|
||||
],
|
||||
"cases": [
|
||||
"langbot-live-backend-latency",
|
||||
"langbot-live-control-plane-api",
|
||||
"langbot-live-backend-log-health"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langbot-performance-contract-gate",
|
||||
"title": "LangBot performance contract gate",
|
||||
"description": "Fast synthetic contract checks for performance metric accounting and non-destructive reliability fault taxonomy.",
|
||||
"type": "contract",
|
||||
"priority": "p1",
|
||||
"tags": [
|
||||
"performance",
|
||||
"reliability",
|
||||
"contract",
|
||||
"metrics"
|
||||
],
|
||||
"cases": [
|
||||
"langbot-overhead-accounting-contract",
|
||||
"langbot-fault-taxonomy-contract"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langbot-performance-reliability-gate",
|
||||
"title": "LangBot performance and reliability starter gate",
|
||||
"description": "Starter gate for LangBot performance accounting, live backend control-plane latency, and non-destructive fault taxonomy checks.",
|
||||
"type": "reliability",
|
||||
"priority": "p1",
|
||||
"tags": [
|
||||
"performance",
|
||||
"reliability",
|
||||
"metrics",
|
||||
"chaos"
|
||||
],
|
||||
"cases": [
|
||||
"langbot-overhead-accounting-contract",
|
||||
"langbot-fault-taxonomy-contract",
|
||||
"langbot-live-backend-latency",
|
||||
"langbot-live-control-plane-api",
|
||||
"langbot-live-backend-log-health"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langbot-user-path-performance-gate",
|
||||
"title": "LangBot user-path performance gate",
|
||||
"description": "Browser-visible performance checks for user-facing LangBot paths such as Pipeline Debug Chat.",
|
||||
"type": "performance",
|
||||
"priority": "p1",
|
||||
"tags": [
|
||||
"performance",
|
||||
"browser",
|
||||
"debug-chat",
|
||||
"user-path"
|
||||
],
|
||||
"cases": [
|
||||
"pipeline-debug-chat-performance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "local-agent-gate",
|
||||
"title": "Local Agent runner regression gate",
|
||||
|
||||
@@ -21,6 +21,7 @@ Use this skill when an agent needs to verify LangBot behavior through the WebUI
|
||||
- **Sandbox-backed skill authoring**: read `references/sandbox-skill-authoring.md`.
|
||||
- **LangRAG knowledge bases**: read `references/langrag-knowledge-base.md`.
|
||||
- **MCP stdio tool testing**: read `references/mcp-stdio-testing.md`.
|
||||
- **Performance, reliability, or chaos probes**: read `references/performance-reliability-testing.md`.
|
||||
- **Drive a live instance over MCP (not raw HTTP)**: use the `langbot-mcp-ops` skill — the instance exposes an MCP server at `http://<host>:5300/mcp` (reuses API keys). Useful for setting up bots/pipelines/models as test fixtures programmatically.
|
||||
- **Known failures and fixes**: read `references/troubleshooting.md`.
|
||||
- **Reusable test groups**: run `bin/lbs suite list` and `bin/lbs suite plan <suite-id>` before manually assembling a case set.
|
||||
@@ -36,6 +37,8 @@ Use this skill when an agent needs to verify LangBot behavior through the WebUI
|
||||
- Use an authenticated browser profile prepared by `langbot-env-setup`.
|
||||
- Do not expose API keys, OAuth secrets, tokens, or localStorage token values in output.
|
||||
- A WebUI test is not complete until the visible UI result is checked against backend logs or network behavior.
|
||||
- A performance result is not complete without `metrics` evidence and a clear split between LangBot overhead and external provider/tool/network time.
|
||||
- A chaos or reliability result is not complete until the fault scope, cleanup, and recovery checks are recorded.
|
||||
- For a suite, use `bin/lbs suite start <suite-id>` to create the suite evidence root, per-case directories, and `suite-start.json`/`suite-start.md` handoff files; use `bin/lbs test result <case-id>` to write final per-case `result.json`, then run `bin/lbs suite report <suite-id> --evidence-dir <dir>`.
|
||||
- Do not mark a case `pass` until `test result --evidence` covers every value in the case's `evidence_required`.
|
||||
- For runner-specific Debug Chat cases, use the case-specific pipeline env declared by `automation_pipeline_url_env` / `automation_pipeline_name_env`; do not silently reuse a generic `LANGBOT_PIPELINE_URL`.
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
id: langbot-fault-taxonomy-contract
|
||||
title: "LangBot fault taxonomy and cleanup contract"
|
||||
mode: probe
|
||||
area: reliability
|
||||
type: chaos
|
||||
priority: p1
|
||||
risk: medium
|
||||
ci_eligible: true
|
||||
tags:
|
||||
- reliability
|
||||
- chaos
|
||||
- contract
|
||||
- synthetic
|
||||
skills:
|
||||
- langbot-testing
|
||||
automation: skills/langbot-testing/probes/langbot-fault-taxonomy-contract.mjs
|
||||
fault_model_json: '{"kind":"taxonomy-contract","destructive":false,"scenarios":["provider-timeout","plugin-runtime-disconnect","mcp-stdio-server-exit","operator-missing-login","transient-marketplace-timeout"]}'
|
||||
steps:
|
||||
- "Run `rtk bin/lbs test run langbot-fault-taxonomy-contract --dry-run` first; remove `--dry-run` after checking the evidence directory."
|
||||
- "Automation validates that representative fault scenarios declare target, injected fault, expected status, recovery check, and cleanup."
|
||||
- "Review metrics.json, fault-model.json, and automation-result.json under LBS_EVIDENCE_DIR."
|
||||
checks:
|
||||
- "automation-result.json status is pass."
|
||||
- "Every scenario has an expected status in pass, fail, blocked, env_issue, or flaky."
|
||||
- "Every scenario declares a cleanup action and recovery check."
|
||||
evidence_required:
|
||||
- metrics
|
||||
- filesystem
|
||||
diagnostics:
|
||||
- "This is a non-destructive taxonomy contract probe; it does not inject real runtime faults."
|
||||
- "Use it as a gate before adding live chaos cases that kill runtimes, route traffic through a proxy, or disrupt a backend dependency."
|
||||
success_patterns:
|
||||
- "Fault taxonomy contract declares status"
|
||||
failure_patterns:
|
||||
- "missing required scenario fields"
|
||||
@@ -0,0 +1,42 @@
|
||||
id: langbot-live-backend-latency
|
||||
title: "LangBot live backend basic latency probe"
|
||||
mode: probe
|
||||
area: performance
|
||||
type: performance
|
||||
priority: p1
|
||||
risk: medium
|
||||
ci_eligible: false
|
||||
tags:
|
||||
- performance
|
||||
- live-backend
|
||||
- latency
|
||||
- metrics
|
||||
skills:
|
||||
- langbot-testing
|
||||
env:
|
||||
- LANGBOT_BACKEND_URL
|
||||
automation: skills/langbot-testing/probes/langbot-live-backend-latency.mjs
|
||||
metrics_thresholds_json: '{"backend_p95_ms":{"max":1000},"error_rate":{"max":0}}'
|
||||
load_profile_json: '{"requests":12,"concurrency":2,"endpoints":["/healthz"]}'
|
||||
steps:
|
||||
- "Confirm the selected LangBot backend is the intended test target."
|
||||
- "Run `rtk bin/lbs test run langbot-live-backend-latency --dry-run` first; remove `--dry-run` after checking LANGBOT_BACKEND_URL and evidence directory."
|
||||
- "Automation sends a small request batch to LANGBOT_BACKEND_URL/healthz and records latency, status counts, and network errors."
|
||||
checks:
|
||||
- "automation-result.json status is pass when the backend responds and p95/error-rate thresholds pass."
|
||||
- "automation-result.json status is env_issue when the backend is not reachable."
|
||||
- "metrics.json and network.log are written under LBS_EVIDENCE_DIR."
|
||||
evidence_required:
|
||||
- metrics
|
||||
- network
|
||||
- api_diagnostic
|
||||
- filesystem
|
||||
diagnostics:
|
||||
- "This probe measures backend health endpoint reachability latency only; it does not cover model/provider, browser, Debug Chat, RAG, or plugin runtime latency."
|
||||
success_patterns:
|
||||
- "Live backend latency probe passed"
|
||||
failure_patterns:
|
||||
- "Backend did not respond"
|
||||
- "breached latency or error-rate thresholds"
|
||||
troubleshooting:
|
||||
- socks-proxy-without-socksio
|
||||
@@ -0,0 +1,45 @@
|
||||
id: langbot-live-backend-log-health
|
||||
title: "LangBot live backend log health probe"
|
||||
mode: probe
|
||||
area: reliability
|
||||
type: reliability
|
||||
priority: p1
|
||||
risk: medium
|
||||
ci_eligible: false
|
||||
tags:
|
||||
- reliability
|
||||
- live-backend
|
||||
- backend-log
|
||||
- metrics
|
||||
skills:
|
||||
- langbot-testing
|
||||
env:
|
||||
- LANGBOT_BACKEND_URL
|
||||
automation: skills/langbot-testing/probes/langbot-live-backend-log-health.mjs
|
||||
metrics_thresholds_json: '{"fail_count":{"max":0}}'
|
||||
load_profile_json: '{"lookback_seconds":300,"log_source":"LANGBOT_BACKEND_LOG or latest LANGBOT_REPO/data/logs/langbot-*.log"}'
|
||||
steps:
|
||||
- "Confirm the selected LangBot backend log belongs to the intended test target."
|
||||
- "Run `rtk bin/lbs test run langbot-live-backend-log-health --dry-run` first; remove `--dry-run` after checking evidence directory and log source."
|
||||
- "Automation scans the recent backend log window for fail-severity runtime findings such as Traceback, ImportError, ERROR, unclosed sessions, and unawaited coroutines."
|
||||
checks:
|
||||
- "automation-result.json status is pass only when fail_count is 0."
|
||||
- "metrics_summary includes scanned_line_count, fail_count, warning_count, and finding_count."
|
||||
- "findings.json and scanned-backend.log are written under LBS_EVIDENCE_DIR."
|
||||
evidence_required:
|
||||
- metrics
|
||||
- backend_log
|
||||
- filesystem
|
||||
diagnostics:
|
||||
- "Set LANGBOT_BACKEND_LOG to an explicit log path when the latest log file is not the run target."
|
||||
- "Set LANGBOT_BACKEND_LOG_SINCE or LANGBOT_BACKEND_LOG_LOOKBACK_SECONDS to control the scan window."
|
||||
- "This probe measures runtime log health; it does not prove user-facing Debug Chat, plugin, model, or RAG behavior."
|
||||
success_patterns:
|
||||
- "Live backend log health passed"
|
||||
failure_patterns:
|
||||
- "Traceback"
|
||||
- "ImportError"
|
||||
- "ERROR"
|
||||
- "unclosed"
|
||||
troubleshooting:
|
||||
- socks-proxy-without-socksio
|
||||
@@ -0,0 +1,44 @@
|
||||
id: langbot-live-control-plane-api
|
||||
title: "LangBot live control-plane API probe"
|
||||
mode: probe
|
||||
area: performance
|
||||
type: performance
|
||||
priority: p1
|
||||
risk: medium
|
||||
ci_eligible: false
|
||||
tags:
|
||||
- performance
|
||||
- reliability
|
||||
- live-backend
|
||||
- control-plane
|
||||
- metrics
|
||||
skills:
|
||||
- langbot-testing
|
||||
env:
|
||||
- LANGBOT_BACKEND_URL
|
||||
automation: skills/langbot-testing/probes/langbot-live-control-plane-api.mjs
|
||||
metrics_thresholds_json: '{"error_rate":{"max":0},"response_shape_failures":{"max":0},"healthz_p95_ms":{"max":500},"system_info_p95_ms":{"max":1000}}'
|
||||
load_profile_json: '{"requests":20,"concurrency":4,"endpoints":["/healthz","/api/v1/system/info"],"auth_required":false}'
|
||||
steps:
|
||||
- "Confirm the selected LangBot backend is the intended test target."
|
||||
- "Run `rtk bin/lbs test run langbot-live-control-plane-api --dry-run` first; remove `--dry-run` after checking LANGBOT_BACKEND_URL and evidence directory."
|
||||
- "Automation sends a small request batch to /healthz and /api/v1/system/info, then validates status code, JSON shape, and latency budgets."
|
||||
checks:
|
||||
- "automation-result.json status is pass when every control-plane request returns HTTP 200, JSON code 0, and required response fields."
|
||||
- "metrics_summary includes per-endpoint p50/p95 latency, error rate, status counts, and response_shape_failures."
|
||||
- "thresholds_summary shows error_rate, response_shape_failures, healthz_p95_ms, and system_info_p95_ms all pass."
|
||||
evidence_required:
|
||||
- metrics
|
||||
- network
|
||||
- api_diagnostic
|
||||
- filesystem
|
||||
diagnostics:
|
||||
- "This probe measures unauthenticated backend control-plane readiness; it does not cover authenticated UI flows, Debug Chat, model calls, plugins, or RAG."
|
||||
- "A system_info shape failure usually means the API contract or startup state changed and should be investigated before treating latency as healthy."
|
||||
success_patterns:
|
||||
- "Live control-plane API probe passed"
|
||||
failure_patterns:
|
||||
- "Backend did not respond"
|
||||
- "breached shape, latency, or error-rate thresholds"
|
||||
troubleshooting:
|
||||
- socks-proxy-without-socksio
|
||||
@@ -0,0 +1,37 @@
|
||||
id: langbot-overhead-accounting-contract
|
||||
title: "LangBot overhead accounting metrics contract"
|
||||
mode: probe
|
||||
area: performance
|
||||
type: performance
|
||||
priority: p1
|
||||
risk: medium
|
||||
ci_eligible: true
|
||||
tags:
|
||||
- performance
|
||||
- metrics
|
||||
- contract
|
||||
- synthetic
|
||||
skills:
|
||||
- langbot-testing
|
||||
automation: skills/langbot-testing/probes/langbot-overhead-accounting-contract.mjs
|
||||
metrics_thresholds_json: '{"sample_count":{"min":50},"langbot_overhead_p95_ms":{"max":25},"accounting_gap_max_ms":{"max":0.001}}'
|
||||
load_profile_json: '{"kind":"synthetic-overhead-accounting","samples":80,"external_latency_segments":["provider","external_tool","network"]}'
|
||||
steps:
|
||||
- "Run `rtk bin/lbs test run langbot-overhead-accounting-contract --dry-run` first; remove `--dry-run` after checking the evidence directory."
|
||||
- "Automation generates deterministic message-path latency samples and separates LangBot overhead from provider/tool/network latency."
|
||||
- "Review metrics.json, thresholds.json, resource-log.json, and automation-result.json under LBS_EVIDENCE_DIR."
|
||||
checks:
|
||||
- "automation-result.json status is pass."
|
||||
- "metrics_summary includes sample_count, langbot_overhead_p95_ms, e2e_latency_p95_ms, external_latency_p95_ms, and accounting_gap_max_ms."
|
||||
- "thresholds_summary shows sample_count, langbot_overhead_p95_ms, and accounting_gap_max_ms all pass."
|
||||
evidence_required:
|
||||
- metrics
|
||||
- resource_log
|
||||
- filesystem
|
||||
diagnostics:
|
||||
- "This is a synthetic contract probe for the QA harness; it is not live product performance."
|
||||
- "Use it to verify that reports can carry overhead accounting metrics before running live backend or browser performance probes."
|
||||
success_patterns:
|
||||
- "Overhead accounting contract passed"
|
||||
failure_patterns:
|
||||
- "breached one or more thresholds"
|
||||
@@ -0,0 +1,75 @@
|
||||
id: pipeline-debug-chat-performance
|
||||
title: "Pipeline Debug Chat user-path performance probe"
|
||||
mode: agent-browser
|
||||
area: pipeline
|
||||
type: performance
|
||||
priority: p1
|
||||
risk: medium
|
||||
ci_eligible: false
|
||||
tags:
|
||||
- performance
|
||||
- pipeline
|
||||
- debug-chat
|
||||
- user-path
|
||||
- metrics
|
||||
skills:
|
||||
- langbot-env-setup
|
||||
- langbot-testing
|
||||
env:
|
||||
- LANGBOT_FRONTEND_URL
|
||||
- LANGBOT_BACKEND_URL
|
||||
env_any:
|
||||
- LANGBOT_PIPELINE_URL|LANGBOT_PIPELINE_NAME
|
||||
automation: scripts/e2e/pipeline-debug-chat.mjs
|
||||
automation_env:
|
||||
- LANGBOT_FRONTEND_URL
|
||||
- LANGBOT_BACKEND_URL
|
||||
- LANGBOT_BROWSER_PROFILE
|
||||
- LANGBOT_CHROMIUM_EXECUTABLE
|
||||
- LANGBOT_E2E_PROMPT
|
||||
- LANGBOT_E2E_EXPECTED_TEXT
|
||||
- LANGBOT_E2E_RESPONSE_TIMEOUT_MS
|
||||
automation_env_any:
|
||||
- LANGBOT_PIPELINE_URL|LANGBOT_PIPELINE_NAME
|
||||
automation_prompt: "请只回复 OK,用于性能测试。"
|
||||
automation_expected_text: "OK"
|
||||
automation_response_timeout_ms: "120000"
|
||||
automation_reset_debug_chat: "true"
|
||||
automation_debug_chat_response_p95_ms: "120000"
|
||||
automation_debug_chat_max_error_rate: "0"
|
||||
metrics_thresholds_json: '{"response_p95_ms":{"max":120000},"error_rate":{"max":0}}'
|
||||
load_profile_json: '{"prompts":1,"browser":true,"path":"Pipeline Debug Chat","metric":"send-to-visible-completion"}'
|
||||
preconditions:
|
||||
- "LANGBOT_PIPELINE_URL or LANGBOT_PIPELINE_NAME points to the pipeline intended for this Debug Chat performance run."
|
||||
- "The target pipeline is safe to reset Debug Chat history for this run."
|
||||
- "The target pipeline has a known-good runner/model; provider latency should be interpreted separately from LangBot overhead."
|
||||
steps:
|
||||
- "Open LANGBOT_FRONTEND_URL with the prepared browser profile."
|
||||
- "Open the target pipeline and select Debug Chat."
|
||||
- "Reset Debug Chat history through the backend API when configured."
|
||||
- "Send the deterministic prompt and wait for the expected assistant response."
|
||||
checks:
|
||||
- "automation-result.json status is pass when the expected assistant response appears."
|
||||
- "metrics_summary includes response_p50_ms, response_p95_ms, error_rate, and total_duration_ms."
|
||||
- "thresholds_summary shows response_p95_ms and error_rate pass."
|
||||
evidence_required:
|
||||
- ui
|
||||
- screenshot
|
||||
- console
|
||||
- network
|
||||
- metrics
|
||||
diagnostics:
|
||||
- "This case measures browser-visible send-to-completion latency; it does not split provider latency from LangBot overhead."
|
||||
- "Use backend logs and provider diagnostics to explain slow runs before calling them LangBot regressions."
|
||||
success_patterns:
|
||||
- "Processing request from person_websocket"
|
||||
- "Streaming completed"
|
||||
failure_patterns:
|
||||
- "Action invoke_llm_stream call timed out"
|
||||
- "Task exception was never retrieved"
|
||||
- "All models failed during streaming setup"
|
||||
troubleshooting:
|
||||
- debug-chat-history-contaminates-automation
|
||||
- local-agent-model-route-unavailable
|
||||
- plugin-runtime-timeout
|
||||
- proxy-env-mismatch
|
||||
@@ -1 +1,3 @@
|
||||
dist/
|
||||
dist/*
|
||||
!dist/
|
||||
!dist/qa-plugin-smoke-0.1.0.lbpkg
|
||||
|
||||
Vendored
BIN
Binary file not shown.
@@ -0,0 +1,159 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { mkdir, writeFile } from "node:fs/promises";
|
||||
import { join, resolve } from "node:path";
|
||||
import { env, exit } from "node:process";
|
||||
|
||||
function pad(value, size = 2) {
|
||||
return String(value).padStart(size, "0");
|
||||
}
|
||||
|
||||
function localIsoWithOffset(date = new Date()) {
|
||||
const offsetMinutes = -date.getTimezoneOffset();
|
||||
const sign = offsetMinutes >= 0 ? "+" : "-";
|
||||
const absolute = Math.abs(offsetMinutes);
|
||||
return [
|
||||
`${date.getFullYear()}-${pad(date.getMonth() + 1)}-${pad(date.getDate())}`,
|
||||
`T${pad(date.getHours())}:${pad(date.getMinutes())}:${pad(date.getSeconds())}.${pad(date.getMilliseconds(), 3)}`,
|
||||
`${sign}${pad(Math.floor(absolute / 60))}:${pad(absolute % 60)}`,
|
||||
].join("");
|
||||
}
|
||||
|
||||
function timestampSlug(date = new Date()) {
|
||||
return date.toISOString().replace(/\.\d{3}Z$/, "Z").replace(/[^0-9A-Za-z]+/g, "-").replace(/^-|-$/g, "");
|
||||
}
|
||||
|
||||
const scenarios = [
|
||||
{
|
||||
id: "provider-timeout",
|
||||
target: "provider",
|
||||
injected_fault: "fake provider request exceeds the configured timeout",
|
||||
expected_status: "env_issue",
|
||||
recovery_check: "provider route is reachable or the case remains outside product pass/fail",
|
||||
cleanup: "stop fake provider or reset proxy route",
|
||||
},
|
||||
{
|
||||
id: "plugin-runtime-disconnect",
|
||||
target: "plugin-runtime",
|
||||
injected_fault: "runtime control channel disconnects during an action",
|
||||
expected_status: "fail",
|
||||
recovery_check: "runtime reconnects and a deterministic plugin action succeeds",
|
||||
cleanup: "restart the local plugin runtime process",
|
||||
},
|
||||
{
|
||||
id: "mcp-stdio-server-exit",
|
||||
target: "mcp",
|
||||
injected_fault: "stdio server exits mid-call",
|
||||
expected_status: "fail",
|
||||
recovery_check: "server can be registered again and exposes the expected tool",
|
||||
cleanup: "remove temporary MCP server registration",
|
||||
},
|
||||
{
|
||||
id: "operator-missing-login",
|
||||
target: "webui",
|
||||
injected_fault: "browser profile is not authenticated",
|
||||
expected_status: "blocked",
|
||||
recovery_check: "authenticated profile can open the same WebUI origin",
|
||||
cleanup: "no product cleanup; refresh local login state",
|
||||
},
|
||||
{
|
||||
id: "transient-marketplace-timeout",
|
||||
target: "marketplace",
|
||||
injected_fault: "marketplace request times out once and then succeeds",
|
||||
expected_status: "flaky",
|
||||
recovery_check: "rerun passes with the same product revision and no code change",
|
||||
cleanup: "clear retry-only evidence and keep the run classified as flaky",
|
||||
},
|
||||
];
|
||||
|
||||
function validateScenario(scenario) {
|
||||
const missing = ["id", "target", "injected_fault", "expected_status", "recovery_check", "cleanup"]
|
||||
.filter((key) => !scenario[key]);
|
||||
const allowedStatuses = new Set(["pass", "fail", "blocked", "env_issue", "flaky"]);
|
||||
return {
|
||||
id: scenario.id,
|
||||
pass: missing.length === 0 && allowedStatuses.has(scenario.expected_status),
|
||||
missing,
|
||||
expected_status: scenario.expected_status,
|
||||
};
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const root = resolve(env.LBS_ROOT || process.cwd());
|
||||
const caseId = "langbot-fault-taxonomy-contract";
|
||||
const runId = env.LBS_RUN_ID || `${timestampSlug()}-${caseId}`;
|
||||
const evidenceDir = resolve(env.LBS_EVIDENCE_DIR || join(root, "reports", "evidence", runId));
|
||||
await mkdir(evidenceDir, { recursive: true });
|
||||
|
||||
const startedAt = new Date();
|
||||
const validations = scenarios.map(validateScenario);
|
||||
const statusCounts = {};
|
||||
for (const scenario of scenarios) {
|
||||
statusCounts[scenario.expected_status] = (statusCounts[scenario.expected_status] || 0) + 1;
|
||||
}
|
||||
const metrics = {
|
||||
probe: caseId,
|
||||
scenario_count: scenarios.length,
|
||||
status_counts: statusCounts,
|
||||
scenarios,
|
||||
validations,
|
||||
};
|
||||
const thresholds = {
|
||||
scenario_count: { actual: scenarios.length, min: 5, pass: scenarios.length >= 5 },
|
||||
invalid_scenario_count: {
|
||||
actual: validations.filter((item) => !item.pass).length,
|
||||
max: 0,
|
||||
pass: validations.every((item) => item.pass),
|
||||
},
|
||||
cleanup_declared_count: {
|
||||
actual: scenarios.filter((item) => item.cleanup).length,
|
||||
min: scenarios.length,
|
||||
pass: scenarios.every((item) => item.cleanup),
|
||||
},
|
||||
};
|
||||
const status = Object.values(thresholds).every((item) => item.pass) ? "pass" : "fail";
|
||||
const metricsPath = join(evidenceDir, "metrics.json");
|
||||
const faultModelPath = join(evidenceDir, "fault-model.json");
|
||||
const automationResultPath = join(evidenceDir, "automation-result.json");
|
||||
const resultPath = join(evidenceDir, "result.json");
|
||||
|
||||
await writeFile(metricsPath, `${JSON.stringify(metrics, null, 2)}\n`, "utf8");
|
||||
await writeFile(faultModelPath, `${JSON.stringify({ scenarios }, null, 2)}\n`, "utf8");
|
||||
|
||||
const finishedAt = new Date();
|
||||
const result = {
|
||||
source: "automation",
|
||||
case_id: caseId,
|
||||
run_id: runId,
|
||||
status,
|
||||
reason: status === "pass"
|
||||
? "Fault taxonomy contract declares status, recovery, and cleanup for every scenario."
|
||||
: "Fault taxonomy contract is missing required scenario fields.",
|
||||
started_at: startedAt.toISOString(),
|
||||
started_at_local: localIsoWithOffset(startedAt),
|
||||
finished_at: finishedAt.toISOString(),
|
||||
finished_at_local: localIsoWithOffset(finishedAt),
|
||||
duration_ms: finishedAt.getTime() - startedAt.getTime(),
|
||||
metrics_summary: {
|
||||
scenario_count: metrics.scenario_count,
|
||||
status_counts: metrics.status_counts,
|
||||
invalid_scenario_count: thresholds.invalid_scenario_count.actual,
|
||||
},
|
||||
thresholds_summary: thresholds,
|
||||
artifacts: {
|
||||
metrics_json: metricsPath,
|
||||
fault_model_json: faultModelPath,
|
||||
automation_result_json: automationResultPath,
|
||||
result_json: resultPath,
|
||||
},
|
||||
evidence_collected: ["metrics", "filesystem"],
|
||||
};
|
||||
|
||||
const resultText = `${JSON.stringify(result, null, 2)}\n`;
|
||||
await writeFile(automationResultPath, resultText, "utf8");
|
||||
await writeFile(resultPath, resultText, "utf8");
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
exit(status === "pass" ? 0 : 1);
|
||||
}
|
||||
|
||||
await main();
|
||||
@@ -0,0 +1,212 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { mkdir, writeFile } from "node:fs/promises";
|
||||
import { join, resolve } from "node:path";
|
||||
import { env, exit } from "node:process";
|
||||
|
||||
function pad(value, size = 2) {
|
||||
return String(value).padStart(size, "0");
|
||||
}
|
||||
|
||||
function localIsoWithOffset(date = new Date()) {
|
||||
const offsetMinutes = -date.getTimezoneOffset();
|
||||
const sign = offsetMinutes >= 0 ? "+" : "-";
|
||||
const absolute = Math.abs(offsetMinutes);
|
||||
return [
|
||||
`${date.getFullYear()}-${pad(date.getMonth() + 1)}-${pad(date.getDate())}`,
|
||||
`T${pad(date.getHours())}:${pad(date.getMinutes())}:${pad(date.getSeconds())}.${pad(date.getMilliseconds(), 3)}`,
|
||||
`${sign}${pad(Math.floor(absolute / 60))}:${pad(absolute % 60)}`,
|
||||
].join("");
|
||||
}
|
||||
|
||||
function timestampSlug(date = new Date()) {
|
||||
return date.toISOString().replace(/\.\d{3}Z$/, "Z").replace(/[^0-9A-Za-z]+/g, "-").replace(/^-|-$/g, "");
|
||||
}
|
||||
|
||||
function percentile(values, percentileValue) {
|
||||
if (values.length === 0) return 0;
|
||||
const sorted = [...values].sort((a, b) => a - b);
|
||||
const index = Math.min(sorted.length - 1, Math.ceil((percentileValue / 100) * sorted.length) - 1);
|
||||
return Number(sorted[index].toFixed(3));
|
||||
}
|
||||
|
||||
function stats(values) {
|
||||
if (values.length === 0) return { min: 0, p50: 0, p95: 0, p99: 0, max: 0 };
|
||||
return {
|
||||
min: Number(Math.min(...values).toFixed(3)),
|
||||
p50: percentile(values, 50),
|
||||
p95: percentile(values, 95),
|
||||
p99: percentile(values, 99),
|
||||
max: Number(Math.max(...values).toFixed(3)),
|
||||
};
|
||||
}
|
||||
|
||||
function parseJsonList(value, fallback) {
|
||||
if (!value) return fallback;
|
||||
try {
|
||||
const parsed = JSON.parse(value);
|
||||
return Array.isArray(parsed) && parsed.every((item) => typeof item === "string") ? parsed : fallback;
|
||||
} catch {
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
|
||||
function joinUrl(baseUrl, path) {
|
||||
const base = baseUrl.replace(/\/+$/, "");
|
||||
const suffix = path.startsWith("/") ? path : `/${path}`;
|
||||
return `${base}${suffix}`;
|
||||
}
|
||||
|
||||
async function fetchOnce(url, timeoutMs) {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
const started = performance.now();
|
||||
try {
|
||||
const response = await fetch(url, { method: "GET", signal: controller.signal });
|
||||
await response.arrayBuffer();
|
||||
const latencyMs = performance.now() - started;
|
||||
return {
|
||||
url,
|
||||
ok: response.status < 500,
|
||||
status: response.status,
|
||||
latency_ms: Number(latencyMs.toFixed(3)),
|
||||
error: "",
|
||||
};
|
||||
} catch (error) {
|
||||
const latencyMs = performance.now() - started;
|
||||
return {
|
||||
url,
|
||||
ok: false,
|
||||
status: 0,
|
||||
latency_ms: Number(latencyMs.toFixed(3)),
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
async function runBatches(urls, totalRequests, concurrency, timeoutMs) {
|
||||
const queue = Array.from({ length: totalRequests }, (_, index) => urls[index % urls.length]);
|
||||
const results = [];
|
||||
while (queue.length > 0) {
|
||||
const batch = queue.splice(0, concurrency);
|
||||
results.push(...await Promise.all(batch.map((url) => fetchOnce(url, timeoutMs))));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const root = resolve(env.LBS_ROOT || process.cwd());
|
||||
const caseId = "langbot-live-backend-latency";
|
||||
const runId = env.LBS_RUN_ID || `${timestampSlug()}-${caseId}`;
|
||||
const evidenceDir = resolve(env.LBS_EVIDENCE_DIR || join(root, "reports", "evidence", runId));
|
||||
await mkdir(evidenceDir, { recursive: true });
|
||||
|
||||
const startedAt = new Date();
|
||||
const backendUrl = env.LANGBOT_BACKEND_URL || "";
|
||||
const endpoints = parseJsonList(env.LANGBOT_PERF_ENDPOINTS_JSON, ["/healthz"]);
|
||||
const totalRequests = Number(env.LANGBOT_PERF_REQUESTS || "12");
|
||||
const concurrency = Number(env.LANGBOT_PERF_CONCURRENCY || "2");
|
||||
const timeoutMs = Number(env.LANGBOT_PERF_TIMEOUT_MS || "5000");
|
||||
const p95BudgetMs = Number(env.LANGBOT_PERF_BACKEND_P95_MS || "1000");
|
||||
const maxErrorRate = Number(env.LANGBOT_PERF_MAX_ERROR_RATE || "0");
|
||||
const metricsPath = join(evidenceDir, "metrics.json");
|
||||
const networkLogPath = join(evidenceDir, "network.log");
|
||||
const automationResultPath = join(evidenceDir, "automation-result.json");
|
||||
const resultPath = join(evidenceDir, "result.json");
|
||||
|
||||
let status = "fail";
|
||||
let reason = "";
|
||||
let results = [];
|
||||
if (!backendUrl) {
|
||||
status = "env_issue";
|
||||
reason = "LANGBOT_BACKEND_URL is not configured.";
|
||||
} else {
|
||||
const urls = endpoints.map((path) => joinUrl(backendUrl, path));
|
||||
results = await runBatches(urls, totalRequests, concurrency, timeoutMs);
|
||||
const okCount = results.filter((item) => item.ok).length;
|
||||
const errorCount = results.length - okCount;
|
||||
const errorRate = results.length === 0 ? 1 : errorCount / results.length;
|
||||
const latencies = results.filter((item) => item.ok).map((item) => item.latency_ms);
|
||||
const latencyStats = stats(latencies);
|
||||
const allConnectionFailures = results.length > 0 && results.every((item) => item.status === 0);
|
||||
if (allConnectionFailures) {
|
||||
status = "env_issue";
|
||||
reason = `Backend did not respond at ${backendUrl}.`;
|
||||
} else if (latencyStats.p95 <= p95BudgetMs && errorRate <= maxErrorRate) {
|
||||
status = "pass";
|
||||
reason = "Live backend latency probe passed all thresholds.";
|
||||
} else {
|
||||
status = "fail";
|
||||
reason = "Live backend latency probe breached latency or error-rate thresholds.";
|
||||
}
|
||||
}
|
||||
|
||||
const statusCounts = {};
|
||||
for (const item of results) {
|
||||
const key = item.status === 0 ? "network_error" : String(item.status);
|
||||
statusCounts[key] = (statusCounts[key] || 0) + 1;
|
||||
}
|
||||
const okResults = results.filter((item) => item.ok);
|
||||
const metrics = {
|
||||
probe: caseId,
|
||||
backend_url: backendUrl,
|
||||
endpoints,
|
||||
total_requests: totalRequests,
|
||||
concurrency,
|
||||
timeout_ms: timeoutMs,
|
||||
ok_count: okResults.length,
|
||||
error_count: results.length - okResults.length,
|
||||
error_rate: results.length === 0 ? 1 : Number(((results.length - okResults.length) / results.length).toFixed(4)),
|
||||
latency_ms: stats(okResults.map((item) => item.latency_ms)),
|
||||
status_counts: statusCounts,
|
||||
};
|
||||
const thresholds = {
|
||||
backend_p95_ms: { actual: metrics.latency_ms.p95, max: p95BudgetMs, pass: metrics.latency_ms.p95 <= p95BudgetMs },
|
||||
error_rate: { actual: metrics.error_rate, max: maxErrorRate, pass: metrics.error_rate <= maxErrorRate },
|
||||
};
|
||||
|
||||
await writeFile(metricsPath, `${JSON.stringify({ ...metrics, samples: results }, null, 2)}\n`, "utf8");
|
||||
await writeFile(networkLogPath, results.map((item) => JSON.stringify(item)).join("\n") + (results.length > 0 ? "\n" : ""), "utf8");
|
||||
|
||||
const finishedAt = new Date();
|
||||
const result = {
|
||||
source: "automation",
|
||||
case_id: caseId,
|
||||
run_id: runId,
|
||||
status,
|
||||
reason,
|
||||
started_at: startedAt.toISOString(),
|
||||
started_at_local: localIsoWithOffset(startedAt),
|
||||
finished_at: finishedAt.toISOString(),
|
||||
finished_at_local: localIsoWithOffset(finishedAt),
|
||||
duration_ms: finishedAt.getTime() - startedAt.getTime(),
|
||||
url: backendUrl,
|
||||
metrics_summary: {
|
||||
requests: metrics.total_requests,
|
||||
concurrency: metrics.concurrency,
|
||||
ok_count: metrics.ok_count,
|
||||
error_rate: metrics.error_rate,
|
||||
latency_p50_ms: metrics.latency_ms.p50,
|
||||
latency_p95_ms: metrics.latency_ms.p95,
|
||||
status_counts: metrics.status_counts,
|
||||
},
|
||||
thresholds_summary: thresholds,
|
||||
artifacts: {
|
||||
metrics_json: metricsPath,
|
||||
network_log: networkLogPath,
|
||||
automation_result_json: automationResultPath,
|
||||
result_json: resultPath,
|
||||
},
|
||||
evidence_collected: ["metrics", "network", "api_diagnostic", "filesystem"],
|
||||
};
|
||||
|
||||
const resultText = `${JSON.stringify(result, null, 2)}\n`;
|
||||
await writeFile(automationResultPath, resultText, "utf8");
|
||||
await writeFile(resultPath, resultText, "utf8");
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
exit(status === "pass" ? 0 : status === "env_issue" ? 2 : 1);
|
||||
}
|
||||
|
||||
await main();
|
||||
@@ -0,0 +1,205 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { existsSync, readdirSync, statSync } from "node:fs";
|
||||
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
||||
import { join, resolve } from "node:path";
|
||||
import { env, exit } from "node:process";
|
||||
|
||||
function pad(value, size = 2) {
|
||||
return String(value).padStart(size, "0");
|
||||
}
|
||||
|
||||
function localIsoWithOffset(date = new Date()) {
|
||||
const offsetMinutes = -date.getTimezoneOffset();
|
||||
const sign = offsetMinutes >= 0 ? "+" : "-";
|
||||
const absolute = Math.abs(offsetMinutes);
|
||||
return [
|
||||
`${date.getFullYear()}-${pad(date.getMonth() + 1)}-${pad(date.getDate())}`,
|
||||
`T${pad(date.getHours())}:${pad(date.getMinutes())}:${pad(date.getSeconds())}.${pad(date.getMilliseconds(), 3)}`,
|
||||
`${sign}${pad(Math.floor(absolute / 60))}:${pad(absolute % 60)}`,
|
||||
].join("");
|
||||
}
|
||||
|
||||
function timestampSlug(date = new Date()) {
|
||||
return date.toISOString().replace(/\.\d{3}Z$/, "Z").replace(/[^0-9A-Za-z]+/g, "-").replace(/^-|-$/g, "");
|
||||
}
|
||||
|
||||
function repoRootFromEnv(root) {
|
||||
return env.LANGBOT_REPO ? resolve(env.LANGBOT_REPO) : resolve(root, "..");
|
||||
}
|
||||
|
||||
function latestBackendLog(root) {
|
||||
const explicit = env.LANGBOT_BACKEND_LOG;
|
||||
if (explicit) return resolve(explicit);
|
||||
|
||||
const logsDir = join(repoRootFromEnv(root), "data", "logs");
|
||||
if (!existsSync(logsDir)) return "";
|
||||
const candidates = readdirSync(logsDir)
|
||||
.filter((name) => /^langbot-.*\.log$/.test(name))
|
||||
.map((name) => join(logsDir, name))
|
||||
.filter((path) => {
|
||||
try {
|
||||
return statSync(path).isFile();
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
})
|
||||
.sort((left, right) => statSync(right).mtimeMs - statSync(left).mtimeMs);
|
||||
return candidates[0] || "";
|
||||
}
|
||||
|
||||
function parseSince(startedAt) {
|
||||
if (env.LANGBOT_BACKEND_LOG_SINCE) return new Date(env.LANGBOT_BACKEND_LOG_SINCE);
|
||||
const lookbackSeconds = Number(env.LANGBOT_BACKEND_LOG_LOOKBACK_SECONDS || "300");
|
||||
return new Date(startedAt.getTime() - lookbackSeconds * 1000);
|
||||
}
|
||||
|
||||
function parseTimestamp(line, year) {
|
||||
const localMatch = line.match(/^\[(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})\.(\d{3})\]/);
|
||||
if (localMatch) {
|
||||
const [, month, day, hour, minute, second, millisecond] = localMatch;
|
||||
return new Date(`${year}-${month}-${day}T${hour}:${minute}:${second}.${millisecond}+08:00`);
|
||||
}
|
||||
|
||||
const accessMatch = line.match(/^\[(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) ([+-]\d{4})\]/);
|
||||
if (accessMatch) {
|
||||
const [, fullYear, month, day, hour, minute, second, offset] = accessMatch;
|
||||
const normalizedOffset = `${offset.slice(0, 3)}:${offset.slice(3)}`;
|
||||
return new Date(`${fullYear}-${month}-${day}T${hour}:${minute}:${second}${normalizedOffset}`);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function findingForLine(line, number) {
|
||||
const rules = [
|
||||
{ severity: "fail", kind: "python_traceback", pattern: /\bTraceback(?: \(most recent call last\))?/i },
|
||||
{ severity: "fail", kind: "unretrieved_task_exception", pattern: /Task exception was never retrieved/i },
|
||||
{ severity: "fail", kind: "unawaited_coroutine", pattern: /RuntimeWarning:\s+coroutine .* was never awaited/i },
|
||||
{ severity: "fail", kind: "unclosed_client_session", pattern: /Unclosed client session/i },
|
||||
{ severity: "fail", kind: "unclosed_connector", pattern: /Unclosed connector/i },
|
||||
{ severity: "fail", kind: "import_error", pattern: /\bImportError\b/i },
|
||||
{ severity: "fail", kind: "error_log", pattern: /\b(?:ERROR|CRITICAL)\b/ },
|
||||
{ severity: "warning", kind: "warning_log", pattern: /\bWARNING\b/ },
|
||||
];
|
||||
|
||||
for (const rule of rules) {
|
||||
if (rule.pattern.test(line)) {
|
||||
return {
|
||||
severity: rule.severity,
|
||||
kind: rule.kind,
|
||||
line: number,
|
||||
excerpt: line,
|
||||
};
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function scanLines(text, since, year) {
|
||||
const findings = [];
|
||||
const scanned = [];
|
||||
let includeContinuation = false;
|
||||
const lines = text.split(/\r?\n/);
|
||||
for (const [index, line] of lines.entries()) {
|
||||
const number = index + 1;
|
||||
const timestamp = parseTimestamp(line, year);
|
||||
if (timestamp) includeContinuation = timestamp >= since;
|
||||
if (!includeContinuation) continue;
|
||||
scanned.push({ number, text: line });
|
||||
const finding = findingForLine(line, number);
|
||||
if (finding) findings.push(finding);
|
||||
}
|
||||
return { findings, scanned, total_lines: lines.length };
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const root = resolve(env.LBS_ROOT || process.cwd());
|
||||
const caseId = "langbot-live-backend-log-health";
|
||||
const runId = env.LBS_RUN_ID || `${timestampSlug()}-${caseId}`;
|
||||
const evidenceDir = resolve(env.LBS_EVIDENCE_DIR || join(root, "reports", "evidence", runId));
|
||||
await mkdir(evidenceDir, { recursive: true });
|
||||
|
||||
const startedAt = new Date();
|
||||
const since = parseSince(startedAt);
|
||||
const logPath = latestBackendLog(root);
|
||||
const metricsPath = join(evidenceDir, "metrics.json");
|
||||
const findingsPath = join(evidenceDir, "findings.json");
|
||||
const scannedLogPath = join(evidenceDir, "scanned-backend.log");
|
||||
const automationResultPath = join(evidenceDir, "automation-result.json");
|
||||
const resultPath = join(evidenceDir, "result.json");
|
||||
|
||||
let status = "fail";
|
||||
let reason = "";
|
||||
let scan = { findings: [], scanned: [], total_lines: 0 };
|
||||
if (!logPath || !existsSync(logPath)) {
|
||||
status = "env_issue";
|
||||
reason = "No LangBot backend log file was found. Set LANGBOT_BACKEND_LOG or LANGBOT_REPO.";
|
||||
} else {
|
||||
const text = await readFile(logPath, "utf8");
|
||||
scan = scanLines(text, since, startedAt.getFullYear());
|
||||
const failCount = scan.findings.filter((item) => item.severity === "fail").length;
|
||||
status = failCount === 0 ? "pass" : "fail";
|
||||
reason = status === "pass"
|
||||
? "Live backend log health passed; no fail-severity findings in the scanned window."
|
||||
: "Live backend log health found fail-severity backend log findings.";
|
||||
}
|
||||
|
||||
const warningCount = scan.findings.filter((item) => item.severity === "warning").length;
|
||||
const failCount = scan.findings.filter((item) => item.severity === "fail").length;
|
||||
const metrics = {
|
||||
probe: caseId,
|
||||
backend_log: logPath,
|
||||
since: since.toISOString(),
|
||||
scanned_line_count: scan.scanned.length,
|
||||
total_line_count: scan.total_lines,
|
||||
fail_count: failCount,
|
||||
warning_count: warningCount,
|
||||
finding_count: scan.findings.length,
|
||||
};
|
||||
const thresholds = {
|
||||
fail_count: { actual: failCount, max: 0, pass: failCount === 0 },
|
||||
};
|
||||
|
||||
await writeFile(metricsPath, `${JSON.stringify(metrics, null, 2)}\n`, "utf8");
|
||||
await writeFile(findingsPath, `${JSON.stringify(scan.findings, null, 2)}\n`, "utf8");
|
||||
await writeFile(scannedLogPath, scan.scanned.map((item) => `${item.number}: ${item.text}`).join("\n") + (scan.scanned.length > 0 ? "\n" : ""), "utf8");
|
||||
|
||||
const finishedAt = new Date();
|
||||
const result = {
|
||||
source: "automation",
|
||||
case_id: caseId,
|
||||
run_id: runId,
|
||||
status,
|
||||
reason,
|
||||
started_at: startedAt.toISOString(),
|
||||
started_at_local: localIsoWithOffset(startedAt),
|
||||
finished_at: finishedAt.toISOString(),
|
||||
finished_at_local: localIsoWithOffset(finishedAt),
|
||||
duration_ms: finishedAt.getTime() - startedAt.getTime(),
|
||||
url: logPath,
|
||||
metrics_summary: {
|
||||
scanned_line_count: metrics.scanned_line_count,
|
||||
fail_count: metrics.fail_count,
|
||||
warning_count: metrics.warning_count,
|
||||
finding_count: metrics.finding_count,
|
||||
},
|
||||
thresholds_summary: thresholds,
|
||||
artifacts: {
|
||||
metrics_json: metricsPath,
|
||||
findings_json: findingsPath,
|
||||
scanned_backend_log: scannedLogPath,
|
||||
automation_result_json: automationResultPath,
|
||||
result_json: resultPath,
|
||||
},
|
||||
evidence_collected: ["metrics", "backend_log", "filesystem"],
|
||||
};
|
||||
|
||||
const resultText = `${JSON.stringify(result, null, 2)}\n`;
|
||||
await writeFile(automationResultPath, resultText, "utf8");
|
||||
await writeFile(resultPath, resultText, "utf8");
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
exit(status === "pass" ? 0 : status === "env_issue" ? 2 : 1);
|
||||
}
|
||||
|
||||
await main();
|
||||
@@ -0,0 +1,311 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { mkdir, writeFile } from "node:fs/promises";
|
||||
import { join, resolve } from "node:path";
|
||||
import { env, exit } from "node:process";
|
||||
|
||||
function pad(value, size = 2) {
|
||||
return String(value).padStart(size, "0");
|
||||
}
|
||||
|
||||
function localIsoWithOffset(date = new Date()) {
|
||||
const offsetMinutes = -date.getTimezoneOffset();
|
||||
const sign = offsetMinutes >= 0 ? "+" : "-";
|
||||
const absolute = Math.abs(offsetMinutes);
|
||||
return [
|
||||
`${date.getFullYear()}-${pad(date.getMonth() + 1)}-${pad(date.getDate())}`,
|
||||
`T${pad(date.getHours())}:${pad(date.getMinutes())}:${pad(date.getSeconds())}.${pad(date.getMilliseconds(), 3)}`,
|
||||
`${sign}${pad(Math.floor(absolute / 60))}:${pad(absolute % 60)}`,
|
||||
].join("");
|
||||
}
|
||||
|
||||
function timestampSlug(date = new Date()) {
|
||||
return date.toISOString().replace(/\.\d{3}Z$/, "Z").replace(/[^0-9A-Za-z]+/g, "-").replace(/^-|-$/g, "");
|
||||
}
|
||||
|
||||
function percentile(values, percentileValue) {
|
||||
if (values.length === 0) return 0;
|
||||
const sorted = [...values].sort((a, b) => a - b);
|
||||
const index = Math.min(sorted.length - 1, Math.ceil((percentileValue / 100) * sorted.length) - 1);
|
||||
return Number(sorted[index].toFixed(3));
|
||||
}
|
||||
|
||||
function stats(values) {
|
||||
if (values.length === 0) return { min: 0, p50: 0, p95: 0, p99: 0, max: 0 };
|
||||
return {
|
||||
min: Number(Math.min(...values).toFixed(3)),
|
||||
p50: percentile(values, 50),
|
||||
p95: percentile(values, 95),
|
||||
p99: percentile(values, 99),
|
||||
max: Number(Math.max(...values).toFixed(3)),
|
||||
};
|
||||
}
|
||||
|
||||
function joinUrl(baseUrl, path) {
|
||||
const base = baseUrl.replace(/\/+$/, "");
|
||||
const suffix = path.startsWith("/") ? path : `/${path}`;
|
||||
return `${base}${suffix}`;
|
||||
}
|
||||
|
||||
function parseJsonObject(value, fallback) {
|
||||
if (!value) return fallback;
|
||||
try {
|
||||
const parsed = JSON.parse(value);
|
||||
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : fallback;
|
||||
} catch {
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
|
||||
function controlPlaneEndpoints() {
|
||||
return [
|
||||
{
|
||||
id: "healthz",
|
||||
path: "/healthz",
|
||||
expected_status: 200,
|
||||
expected_code: 0,
|
||||
p95_budget_ms: Number(env.LANGBOT_PERF_HEALTHZ_P95_MS || "500"),
|
||||
required_data_fields: [],
|
||||
},
|
||||
{
|
||||
id: "system_info",
|
||||
path: "/api/v1/system/info",
|
||||
expected_status: 200,
|
||||
expected_code: 0,
|
||||
p95_budget_ms: Number(env.LANGBOT_PERF_SYSTEM_INFO_P95_MS || "1000"),
|
||||
required_data_fields: ["version", "edition", "enable_marketplace"],
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
async function fetchEndpoint(backendUrl, endpoint, timeoutMs) {
|
||||
const url = joinUrl(backendUrl, endpoint.path);
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
const started = performance.now();
|
||||
let bodyText = "";
|
||||
let json = null;
|
||||
let jsonValid = false;
|
||||
let error = "";
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: "GET",
|
||||
headers: { "accept": "application/json" },
|
||||
signal: controller.signal,
|
||||
});
|
||||
bodyText = await response.text();
|
||||
try {
|
||||
json = bodyText ? JSON.parse(bodyText) : null;
|
||||
jsonValid = json !== null;
|
||||
} catch (parseError) {
|
||||
error = parseError instanceof Error ? parseError.message : String(parseError);
|
||||
}
|
||||
|
||||
const data = json && typeof json === "object" && json.data && typeof json.data === "object" ? json.data : {};
|
||||
const missingFields = endpoint.required_data_fields.filter((field) => !(field in data));
|
||||
const statusOk = response.status === endpoint.expected_status;
|
||||
const codeOk = !json || typeof json !== "object" ? false : json.code === endpoint.expected_code;
|
||||
const shapeOk = jsonValid && missingFields.length === 0;
|
||||
const latencyMs = performance.now() - started;
|
||||
return {
|
||||
endpoint_id: endpoint.id,
|
||||
path: endpoint.path,
|
||||
url,
|
||||
status: response.status,
|
||||
ok: statusOk && codeOk && shapeOk,
|
||||
status_ok: statusOk,
|
||||
code_ok: codeOk,
|
||||
json_valid: jsonValid,
|
||||
missing_fields: missingFields,
|
||||
response_code: json && typeof json === "object" ? json.code : null,
|
||||
latency_ms: Number(latencyMs.toFixed(3)),
|
||||
error,
|
||||
};
|
||||
} catch (fetchError) {
|
||||
const latencyMs = performance.now() - started;
|
||||
return {
|
||||
endpoint_id: endpoint.id,
|
||||
path: endpoint.path,
|
||||
url,
|
||||
status: 0,
|
||||
ok: false,
|
||||
status_ok: false,
|
||||
code_ok: false,
|
||||
json_valid: false,
|
||||
missing_fields: endpoint.required_data_fields,
|
||||
response_code: null,
|
||||
latency_ms: Number(latencyMs.toFixed(3)),
|
||||
error: fetchError instanceof Error ? fetchError.message : String(fetchError),
|
||||
};
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
async function runBatches(backendUrl, endpoints, totalRequests, concurrency, timeoutMs) {
|
||||
const queue = Array.from({ length: totalRequests }, (_, index) => endpoints[index % endpoints.length]);
|
||||
const results = [];
|
||||
while (queue.length > 0) {
|
||||
const batch = queue.splice(0, concurrency);
|
||||
results.push(...await Promise.all(batch.map((endpoint) => fetchEndpoint(backendUrl, endpoint, timeoutMs))));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
function endpointMetrics(endpoints, results) {
|
||||
return Object.fromEntries(endpoints.map((endpoint) => {
|
||||
const samples = results.filter((item) => item.endpoint_id === endpoint.id);
|
||||
const okSamples = samples.filter((item) => item.ok);
|
||||
return [
|
||||
endpoint.id,
|
||||
{
|
||||
path: endpoint.path,
|
||||
requests: samples.length,
|
||||
ok_count: okSamples.length,
|
||||
error_rate: samples.length === 0 ? 1 : Number(((samples.length - okSamples.length) / samples.length).toFixed(4)),
|
||||
latency_ms: stats(okSamples.map((item) => item.latency_ms)),
|
||||
p95_budget_ms: endpoint.p95_budget_ms,
|
||||
},
|
||||
];
|
||||
}));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const root = resolve(env.LBS_ROOT || process.cwd());
|
||||
const caseId = "langbot-live-control-plane-api";
|
||||
const runId = env.LBS_RUN_ID || `${timestampSlug()}-${caseId}`;
|
||||
const evidenceDir = resolve(env.LBS_EVIDENCE_DIR || join(root, "reports", "evidence", runId));
|
||||
await mkdir(evidenceDir, { recursive: true });
|
||||
|
||||
const startedAt = new Date();
|
||||
const backendUrl = env.LANGBOT_BACKEND_URL || "";
|
||||
const endpoints = controlPlaneEndpoints();
|
||||
const configuredBudgets = parseJsonObject(env.LANGBOT_CONTROL_PLANE_P95_BUDGETS_JSON, {});
|
||||
for (const endpoint of endpoints) {
|
||||
const budget = configuredBudgets[endpoint.id];
|
||||
if (typeof budget === "number" && Number.isFinite(budget)) endpoint.p95_budget_ms = budget;
|
||||
}
|
||||
const totalRequests = Number(env.LANGBOT_CONTROL_PLANE_REQUESTS || "20");
|
||||
const concurrency = Number(env.LANGBOT_CONTROL_PLANE_CONCURRENCY || "4");
|
||||
const timeoutMs = Number(env.LANGBOT_CONTROL_PLANE_TIMEOUT_MS || "5000");
|
||||
const maxErrorRate = Number(env.LANGBOT_CONTROL_PLANE_MAX_ERROR_RATE || "0");
|
||||
const metricsPath = join(evidenceDir, "metrics.json");
|
||||
const endpointsPath = join(evidenceDir, "endpoints.json");
|
||||
const networkLogPath = join(evidenceDir, "network.log");
|
||||
const automationResultPath = join(evidenceDir, "automation-result.json");
|
||||
const resultPath = join(evidenceDir, "result.json");
|
||||
|
||||
let status = "fail";
|
||||
let reason = "";
|
||||
let results = [];
|
||||
if (!backendUrl) {
|
||||
status = "env_issue";
|
||||
reason = "LANGBOT_BACKEND_URL is not configured.";
|
||||
} else {
|
||||
results = await runBatches(backendUrl, endpoints, totalRequests, concurrency, timeoutMs);
|
||||
const allConnectionFailures = results.length > 0 && results.every((item) => item.status === 0);
|
||||
if (allConnectionFailures) {
|
||||
status = "env_issue";
|
||||
reason = `Backend did not respond at ${backendUrl}.`;
|
||||
}
|
||||
}
|
||||
|
||||
const okResults = results.filter((item) => item.ok);
|
||||
const statusCounts = {};
|
||||
for (const item of results) {
|
||||
const key = item.status === 0 ? "network_error" : String(item.status);
|
||||
statusCounts[key] = (statusCounts[key] || 0) + 1;
|
||||
}
|
||||
const perEndpoint = endpointMetrics(endpoints, results);
|
||||
const responseShapeFailures = results.filter((item) => !item.json_valid || item.missing_fields.length > 0 || !item.code_ok).length;
|
||||
const errorRate = results.length === 0 ? 1 : Number(((results.length - okResults.length) / results.length).toFixed(4));
|
||||
const thresholds = {
|
||||
error_rate: { actual: errorRate, max: maxErrorRate, pass: errorRate <= maxErrorRate },
|
||||
response_shape_failures: { actual: responseShapeFailures, max: 0, pass: responseShapeFailures === 0 },
|
||||
};
|
||||
for (const endpoint of endpoints) {
|
||||
const actual = perEndpoint[endpoint.id].latency_ms.p95;
|
||||
thresholds[`${endpoint.id}_p95_ms`] = {
|
||||
actual,
|
||||
max: endpoint.p95_budget_ms,
|
||||
pass: actual <= endpoint.p95_budget_ms,
|
||||
};
|
||||
}
|
||||
|
||||
if (status !== "env_issue") {
|
||||
const passed = Object.values(thresholds).every((item) => item.pass);
|
||||
status = passed ? "pass" : "fail";
|
||||
reason = passed
|
||||
? "Live control-plane API probe passed all thresholds."
|
||||
: "Live control-plane API probe breached shape, latency, or error-rate thresholds.";
|
||||
}
|
||||
|
||||
const metrics = {
|
||||
probe: caseId,
|
||||
backend_url: backendUrl,
|
||||
total_requests: totalRequests,
|
||||
concurrency,
|
||||
timeout_ms: timeoutMs,
|
||||
ok_count: okResults.length,
|
||||
error_count: results.length - okResults.length,
|
||||
error_rate: errorRate,
|
||||
status_counts: statusCounts,
|
||||
response_shape_failures: responseShapeFailures,
|
||||
endpoints: perEndpoint,
|
||||
};
|
||||
|
||||
await writeFile(metricsPath, `${JSON.stringify({ ...metrics, samples: results }, null, 2)}\n`, "utf8");
|
||||
await writeFile(endpointsPath, `${JSON.stringify(endpoints, null, 2)}\n`, "utf8");
|
||||
await writeFile(networkLogPath, results.map((item) => JSON.stringify(item)).join("\n") + (results.length > 0 ? "\n" : ""), "utf8");
|
||||
|
||||
const finishedAt = new Date();
|
||||
const result = {
|
||||
source: "automation",
|
||||
case_id: caseId,
|
||||
run_id: runId,
|
||||
status,
|
||||
reason,
|
||||
started_at: startedAt.toISOString(),
|
||||
started_at_local: localIsoWithOffset(startedAt),
|
||||
finished_at: finishedAt.toISOString(),
|
||||
finished_at_local: localIsoWithOffset(finishedAt),
|
||||
duration_ms: finishedAt.getTime() - startedAt.getTime(),
|
||||
url: backendUrl,
|
||||
metrics_summary: {
|
||||
requests: metrics.total_requests,
|
||||
concurrency: metrics.concurrency,
|
||||
ok_count: metrics.ok_count,
|
||||
error_rate: metrics.error_rate,
|
||||
response_shape_failures: metrics.response_shape_failures,
|
||||
endpoints: Object.fromEntries(Object.entries(metrics.endpoints).map(([id, value]) => [
|
||||
id,
|
||||
{
|
||||
path: value.path,
|
||||
ok_count: value.ok_count,
|
||||
error_rate: value.error_rate,
|
||||
latency_p50_ms: value.latency_ms.p50,
|
||||
latency_p95_ms: value.latency_ms.p95,
|
||||
},
|
||||
])),
|
||||
status_counts: metrics.status_counts,
|
||||
},
|
||||
thresholds_summary: thresholds,
|
||||
artifacts: {
|
||||
metrics_json: metricsPath,
|
||||
endpoints_json: endpointsPath,
|
||||
network_log: networkLogPath,
|
||||
automation_result_json: automationResultPath,
|
||||
result_json: resultPath,
|
||||
},
|
||||
evidence_collected: ["metrics", "network", "api_diagnostic", "filesystem"],
|
||||
};
|
||||
|
||||
const resultText = `${JSON.stringify(result, null, 2)}\n`;
|
||||
await writeFile(automationResultPath, resultText, "utf8");
|
||||
await writeFile(resultPath, resultText, "utf8");
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
exit(status === "pass" ? 0 : status === "env_issue" ? 2 : 1);
|
||||
}
|
||||
|
||||
await main();
|
||||
@@ -0,0 +1,162 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { mkdir, writeFile } from "node:fs/promises";
|
||||
import { join, resolve } from "node:path";
|
||||
import { env, exit } from "node:process";
|
||||
|
||||
function pad(value, size = 2) {
|
||||
return String(value).padStart(size, "0");
|
||||
}
|
||||
|
||||
function localIsoWithOffset(date = new Date()) {
|
||||
const offsetMinutes = -date.getTimezoneOffset();
|
||||
const sign = offsetMinutes >= 0 ? "+" : "-";
|
||||
const absolute = Math.abs(offsetMinutes);
|
||||
return [
|
||||
`${date.getFullYear()}-${pad(date.getMonth() + 1)}-${pad(date.getDate())}`,
|
||||
`T${pad(date.getHours())}:${pad(date.getMinutes())}:${pad(date.getSeconds())}.${pad(date.getMilliseconds(), 3)}`,
|
||||
`${sign}${pad(Math.floor(absolute / 60))}:${pad(absolute % 60)}`,
|
||||
].join("");
|
||||
}
|
||||
|
||||
function timestampSlug(date = new Date()) {
|
||||
return date.toISOString().replace(/\.\d{3}Z$/, "Z").replace(/[^0-9A-Za-z]+/g, "-").replace(/^-|-$/g, "");
|
||||
}
|
||||
|
||||
function percentile(values, percentileValue) {
|
||||
if (values.length === 0) return 0;
|
||||
const sorted = [...values].sort((a, b) => a - b);
|
||||
const index = Math.min(sorted.length - 1, Math.ceil((percentileValue / 100) * sorted.length) - 1);
|
||||
return Number(sorted[index].toFixed(3));
|
||||
}
|
||||
|
||||
function stats(values) {
|
||||
return {
|
||||
min: Number(Math.min(...values).toFixed(3)),
|
||||
p50: percentile(values, 50),
|
||||
p95: percentile(values, 95),
|
||||
p99: percentile(values, 99),
|
||||
max: Number(Math.max(...values).toFixed(3)),
|
||||
};
|
||||
}
|
||||
|
||||
function threshold(actual, limit, operator) {
|
||||
const pass = operator === "<=" ? actual <= limit : actual >= limit;
|
||||
return { actual, [operator === "<=" ? "max" : "min"]: limit, pass };
|
||||
}
|
||||
|
||||
function makeSample(index) {
|
||||
const ingress = 1 + (index % 5) * 0.22;
|
||||
const pipeline = 2.8 + (index % 7) * 0.31;
|
||||
const persistence = 1.1 + (index % 4) * 0.2;
|
||||
const pluginIpc = 1.9 + (index % 6) * 0.27;
|
||||
const rag = index % 3 === 0 ? 4.4 : 0.8 + (index % 5) * 0.18;
|
||||
const streaming = 1.5 + (index % 8) * 0.24;
|
||||
const provider = 80 + (index % 13) * 11;
|
||||
const externalTool = index % 4 === 0 ? 25 + (index % 9) * 3 : 0;
|
||||
const network = 8 + (index % 10) * 1.7;
|
||||
const overhead = ingress + pipeline + persistence + pluginIpc + rag + streaming;
|
||||
const external = provider + externalTool + network;
|
||||
const total = overhead + external;
|
||||
return {
|
||||
index,
|
||||
segments_ms: {
|
||||
ingress,
|
||||
pipeline,
|
||||
persistence,
|
||||
plugin_ipc: pluginIpc,
|
||||
rag,
|
||||
streaming,
|
||||
provider,
|
||||
external_tool: externalTool,
|
||||
network,
|
||||
},
|
||||
langbot_overhead_ms: Number(overhead.toFixed(3)),
|
||||
external_latency_ms: Number(external.toFixed(3)),
|
||||
e2e_latency_ms: Number(total.toFixed(3)),
|
||||
accounting_gap_ms: Number((total - external - overhead).toFixed(6)),
|
||||
};
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const root = resolve(env.LBS_ROOT || process.cwd());
|
||||
const caseId = "langbot-overhead-accounting-contract";
|
||||
const runId = env.LBS_RUN_ID || `${timestampSlug()}-${caseId}`;
|
||||
const evidenceDir = resolve(env.LBS_EVIDENCE_DIR || join(root, "reports", "evidence", runId));
|
||||
await mkdir(evidenceDir, { recursive: true });
|
||||
|
||||
const startedAt = new Date();
|
||||
const sampleCount = Number(env.LANGBOT_PERF_CONTRACT_SAMPLES || "80");
|
||||
const overheadP95BudgetMs = Number(env.LANGBOT_PERF_OVERHEAD_P95_MS || "25");
|
||||
const samples = Array.from({ length: sampleCount }, (_, index) => makeSample(index));
|
||||
const overheads = samples.map((sample) => sample.langbot_overhead_ms);
|
||||
const e2e = samples.map((sample) => sample.e2e_latency_ms);
|
||||
const external = samples.map((sample) => sample.external_latency_ms);
|
||||
const gaps = samples.map((sample) => Math.abs(sample.accounting_gap_ms));
|
||||
const memory = process.memoryUsage();
|
||||
|
||||
const metrics = {
|
||||
probe: caseId,
|
||||
sample_count: sampleCount,
|
||||
langbot_overhead_ms: stats(overheads),
|
||||
e2e_latency_ms: stats(e2e),
|
||||
external_latency_ms: stats(external),
|
||||
accounting_gap_max_ms: Number(Math.max(...gaps).toFixed(6)),
|
||||
samples,
|
||||
};
|
||||
const thresholds = {
|
||||
sample_count: threshold(sampleCount, 50, ">="),
|
||||
langbot_overhead_p95_ms: threshold(metrics.langbot_overhead_ms.p95, overheadP95BudgetMs, "<="),
|
||||
accounting_gap_max_ms: threshold(metrics.accounting_gap_max_ms, 0.001, "<="),
|
||||
};
|
||||
const status = Object.values(thresholds).every((item) => item.pass) ? "pass" : "fail";
|
||||
const metricsPath = join(evidenceDir, "metrics.json");
|
||||
const thresholdsPath = join(evidenceDir, "thresholds.json");
|
||||
const resourceLogPath = join(evidenceDir, "resource-log.json");
|
||||
const automationResultPath = join(evidenceDir, "automation-result.json");
|
||||
const resultPath = join(evidenceDir, "result.json");
|
||||
|
||||
await writeFile(metricsPath, `${JSON.stringify(metrics, null, 2)}\n`, "utf8");
|
||||
await writeFile(thresholdsPath, `${JSON.stringify(thresholds, null, 2)}\n`, "utf8");
|
||||
await writeFile(resourceLogPath, `${JSON.stringify({ memory, pid: process.pid }, null, 2)}\n`, "utf8");
|
||||
|
||||
const finishedAt = new Date();
|
||||
const result = {
|
||||
source: "automation",
|
||||
case_id: caseId,
|
||||
run_id: runId,
|
||||
status,
|
||||
reason: status === "pass"
|
||||
? "Overhead accounting contract passed all thresholds."
|
||||
: "Overhead accounting contract breached one or more thresholds.",
|
||||
started_at: startedAt.toISOString(),
|
||||
started_at_local: localIsoWithOffset(startedAt),
|
||||
finished_at: finishedAt.toISOString(),
|
||||
finished_at_local: localIsoWithOffset(finishedAt),
|
||||
duration_ms: finishedAt.getTime() - startedAt.getTime(),
|
||||
metrics_summary: {
|
||||
sample_count: metrics.sample_count,
|
||||
langbot_overhead_p95_ms: metrics.langbot_overhead_ms.p95,
|
||||
e2e_latency_p95_ms: metrics.e2e_latency_ms.p95,
|
||||
external_latency_p95_ms: metrics.external_latency_ms.p95,
|
||||
accounting_gap_max_ms: metrics.accounting_gap_max_ms,
|
||||
},
|
||||
thresholds_summary: thresholds,
|
||||
artifacts: {
|
||||
metrics_json: metricsPath,
|
||||
thresholds_json: thresholdsPath,
|
||||
resource_log_json: resourceLogPath,
|
||||
automation_result_json: automationResultPath,
|
||||
result_json: resultPath,
|
||||
},
|
||||
evidence_collected: ["metrics", "resource_log", "filesystem"],
|
||||
};
|
||||
|
||||
const resultText = `${JSON.stringify(result, null, 2)}\n`;
|
||||
await writeFile(automationResultPath, resultText, "utf8");
|
||||
await writeFile(resultPath, resultText, "utf8");
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
exit(status === "pass" ? 0 : 1);
|
||||
}
|
||||
|
||||
await main();
|
||||
@@ -0,0 +1,173 @@
|
||||
# Performance And Reliability Testing
|
||||
|
||||
Use this reference when a QA request asks whether LangBot is fast enough,
|
||||
stable under load, or resilient to controlled faults.
|
||||
|
||||
## Scope
|
||||
|
||||
Treat `skills/` as the QA control plane:
|
||||
|
||||
- Cases define intent, readiness, thresholds, and required evidence.
|
||||
- Probe scripts collect metrics, traces, resource logs, and artifacts.
|
||||
- Reports classify the same run as `pass`, `fail`, `blocked`,
|
||||
`env_issue`, or `flaky`.
|
||||
|
||||
Do not turn `skills/` into a load generator or chaos engine. Call a focused
|
||||
tool from a `mode: probe` case when the test needs one, for example k6,
|
||||
Locust, pytest-benchmark, Playwright trace collection, Toxiproxy, Docker, or a
|
||||
Kubernetes disruption tool.
|
||||
|
||||
## LangBot Performance Model
|
||||
|
||||
For LangBot, performance is the cost LangBot adds around external systems:
|
||||
|
||||
```text
|
||||
LangBot overhead = end-to-end latency - provider latency - external tool latency - network/fault injection latency
|
||||
```
|
||||
|
||||
Measure user experience and internal composition separately:
|
||||
|
||||
- WebUI load and interaction latency.
|
||||
- Debug Chat send-to-first-visible-token and send-to-completion latency.
|
||||
- Pipeline, RAG, plugin runtime, MCP, AgentRunner, and persistence segment
|
||||
latency.
|
||||
- Queue wait time, concurrency, throughput, timeout rate, and p95/p99 latency.
|
||||
- Startup, plugin install, knowledge-base ingestion, migration, and recovery
|
||||
time.
|
||||
|
||||
Do not report a single message round-trip time as "LangBot performance" unless
|
||||
the report also explains external provider/tool/network time.
|
||||
|
||||
## Evidence Contract
|
||||
|
||||
Performance and reliability cases should declare the evidence they need:
|
||||
|
||||
- `metrics`: machine-readable latency, throughput, error-rate, or recovery
|
||||
metrics, usually `metrics.json`.
|
||||
- `resource_log`: CPU, memory, process, connection, queue, or file descriptor
|
||||
samples.
|
||||
- `trace`: browser, HTTP, database, or runtime trace artifacts.
|
||||
- `profile`: CPU, memory, or flamegraph profile artifacts.
|
||||
- `backend_log`, `network`, `api_diagnostic`, and `filesystem` as supporting
|
||||
evidence when relevant.
|
||||
|
||||
Automation should write `automation-result.json` with these fields when
|
||||
available:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "pass",
|
||||
"reason": "Probe passed all thresholds.",
|
||||
"metrics_summary": {
|
||||
"langbot_overhead_p95_ms": 12.4,
|
||||
"error_rate": 0
|
||||
},
|
||||
"thresholds_summary": {
|
||||
"langbot_overhead_p95_ms": { "actual": 12.4, "max": 50, "pass": true }
|
||||
},
|
||||
"artifacts": {
|
||||
"metrics_json": "/path/to/metrics.json"
|
||||
},
|
||||
"evidence_collected": ["metrics", "filesystem"]
|
||||
}
|
||||
```
|
||||
|
||||
Synthetic contract probes are useful for checking the QA harness, but they are
|
||||
not live product performance results. Label them as contract probes in the case
|
||||
title, checks, and report.
|
||||
|
||||
## Chaos And Reliability Rules
|
||||
|
||||
Chaos tests must be narrow and reversible:
|
||||
|
||||
- Declare the fault model in `fault_model_json`.
|
||||
- Record blast radius, target component, injection method, duration, and abort
|
||||
conditions.
|
||||
- Capture recovery checks and cleanup steps in the case.
|
||||
- Classify unavailable dependencies as `env_issue` unless the target behavior
|
||||
is LangBot's handling of that dependency failure.
|
||||
- Do not run destructive fault injection against a shared or production-like
|
||||
instance without explicit operator approval.
|
||||
|
||||
Recommended first fault models:
|
||||
|
||||
- Provider timeout or HTTP 429 from a fake provider endpoint.
|
||||
- Plugin runtime disconnect/reconnect in a local instance.
|
||||
- MCP stdio server exits mid-call.
|
||||
- RAG parser fixture fails once and recovers on retry.
|
||||
- Backend API endpoint returns 5xx from a controlled local proxy.
|
||||
|
||||
## Starter Live Probes
|
||||
|
||||
The starter gate separates QA-harness contracts from live product checks:
|
||||
|
||||
- `langbot-overhead-accounting-contract` verifies that reports can carry
|
||||
overhead accounting metrics. It uses deterministic synthetic samples and is
|
||||
not live product performance.
|
||||
- `langbot-fault-taxonomy-contract` verifies that fault scenarios declare
|
||||
expected status, recovery, and cleanup before destructive chaos tests are
|
||||
added.
|
||||
- `langbot-live-backend-latency` checks the unauthenticated `/healthz`
|
||||
endpoint for basic backend responsiveness.
|
||||
- `langbot-live-control-plane-api` checks `/healthz` and
|
||||
`/api/v1/system/info` for HTTP 200, JSON `code: 0`, response shape, and
|
||||
per-endpoint p95 latency.
|
||||
- `langbot-live-backend-log-health` scans the recent backend log window for
|
||||
fail-severity runtime findings. It is the reliability guard that should fail
|
||||
the gate when HTTP probes pass but backend logs contain Traceback, ImportError,
|
||||
ERROR, unclosed sessions, or unawaited coroutine signals.
|
||||
|
||||
Do not treat these starter live probes as Debug Chat or model-provider
|
||||
performance. They are control-plane readiness checks; user-facing performance
|
||||
needs browser/WebSocket/message-path measurements.
|
||||
|
||||
## Gate Layers
|
||||
|
||||
Use the smallest gate that answers the quality question:
|
||||
|
||||
- `langbot-performance-contract-gate`: fast synthetic checks for report shape,
|
||||
threshold accounting, and fault taxonomy. Good for PR feedback when no live
|
||||
service is running.
|
||||
- `langbot-live-backend-gate`: live backend `/healthz`,
|
||||
`/api/v1/system/info`, and backend log health. Good after starting a local
|
||||
LangBot backend.
|
||||
- `langbot-user-path-performance-gate`: browser-visible user path performance,
|
||||
starting with Pipeline Debug Chat send-to-visible-completion latency. Run it
|
||||
only when the browser profile and target pipeline are ready.
|
||||
- `langbot-performance-reliability-gate`: combined starter gate for synthetic
|
||||
contracts plus live backend checks.
|
||||
|
||||
Keep environment diagnostics separate from product regressions. For example, a
|
||||
SOCKS proxy without Python `socksio` support should be fixed or clearly
|
||||
classified by `bin/lbs env doctor`; do not hide the resulting backend
|
||||
Traceback in reports.
|
||||
|
||||
## Debug Chat Performance
|
||||
|
||||
`pipeline-debug-chat-performance` reuses the browser Debug Chat automation and
|
||||
adds `metrics.json`, `metrics_summary`, and `thresholds_summary` to
|
||||
`automation-result.json`.
|
||||
|
||||
Current metric:
|
||||
|
||||
```text
|
||||
response_duration_ms = prompt send -> expected assistant response visible and stable
|
||||
```
|
||||
|
||||
This is a user-path metric, not pure LangBot overhead. If it regresses, inspect
|
||||
provider latency, model route health, plugin/runtime logs, WebSocket behavior,
|
||||
and browser console/network evidence before attributing the whole duration to
|
||||
LangBot.
|
||||
|
||||
## Running The First Gate
|
||||
|
||||
Start with the reusable suite:
|
||||
|
||||
```bash
|
||||
rtk bin/lbs suite plan langbot-performance-reliability-gate
|
||||
rtk bin/lbs suite start langbot-performance-reliability-gate --run-id langbot-perf-rel-local
|
||||
```
|
||||
|
||||
Run synthetic contract probes first. Run live probes only after the selected
|
||||
backend/frontend instance is reachable and the run owner accepts any fault
|
||||
scope.
|
||||
@@ -0,0 +1,14 @@
|
||||
id: langbot-live-backend-gate
|
||||
title: "LangBot live backend reliability gate"
|
||||
description: "Live backend control-plane responsiveness and runtime log health checks for a locally running LangBot instance."
|
||||
type: reliability
|
||||
priority: p1
|
||||
tags:
|
||||
- performance
|
||||
- reliability
|
||||
- live-backend
|
||||
- metrics
|
||||
cases:
|
||||
- langbot-live-backend-latency
|
||||
- langbot-live-control-plane-api
|
||||
- langbot-live-backend-log-health
|
||||
@@ -0,0 +1,13 @@
|
||||
id: langbot-performance-contract-gate
|
||||
title: "LangBot performance contract gate"
|
||||
description: "Fast synthetic contract checks for performance metric accounting and non-destructive reliability fault taxonomy."
|
||||
type: contract
|
||||
priority: p1
|
||||
tags:
|
||||
- performance
|
||||
- reliability
|
||||
- contract
|
||||
- metrics
|
||||
cases:
|
||||
- langbot-overhead-accounting-contract
|
||||
- langbot-fault-taxonomy-contract
|
||||
@@ -0,0 +1,16 @@
|
||||
id: langbot-performance-reliability-gate
|
||||
title: "LangBot performance and reliability starter gate"
|
||||
description: "Starter gate for LangBot performance accounting, live backend control-plane latency, and non-destructive fault taxonomy checks."
|
||||
type: reliability
|
||||
priority: p1
|
||||
tags:
|
||||
- performance
|
||||
- reliability
|
||||
- metrics
|
||||
- chaos
|
||||
cases:
|
||||
- langbot-overhead-accounting-contract
|
||||
- langbot-fault-taxonomy-contract
|
||||
- langbot-live-backend-latency
|
||||
- langbot-live-control-plane-api
|
||||
- langbot-live-backend-log-health
|
||||
@@ -0,0 +1,12 @@
|
||||
id: langbot-user-path-performance-gate
|
||||
title: "LangBot user-path performance gate"
|
||||
description: "Browser-visible performance checks for user-facing LangBot paths such as Pipeline Debug Chat."
|
||||
type: performance
|
||||
priority: p1
|
||||
tags:
|
||||
- performance
|
||||
- browser
|
||||
- debug-chat
|
||||
- user-path
|
||||
cases:
|
||||
- pipeline-debug-chat-performance
|
||||
@@ -1,5 +1,7 @@
|
||||
import { existsSync } from "node:fs";
|
||||
import { spawnSync } from "node:child_process";
|
||||
import { Socket } from "node:net";
|
||||
import { join } from "node:path";
|
||||
import type { CommandContext } from "../types.ts";
|
||||
import { parseOptions } from "../cli.ts";
|
||||
import { loadEnv } from "../fs.ts";
|
||||
@@ -88,6 +90,37 @@ function compareProxyPair(env: Record<string, string>, upper: string, lower: str
|
||||
return null;
|
||||
}
|
||||
|
||||
function envValue(env: Record<string, string>, key: string): string {
|
||||
return process.env[key] ?? env[key] ?? "";
|
||||
}
|
||||
|
||||
function activeSocksProxy(env: Record<string, string>): { key: string; value: string } | null {
|
||||
for (const key of ["ALL_PROXY", "all_proxy", "HTTPS_PROXY", "https_proxy", "HTTP_PROXY", "http_proxy"]) {
|
||||
const value = envValue(env, key);
|
||||
if (/^socks/i.test(value)) return { key, value };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function checkSocksio(env: Record<string, string>): string | null {
|
||||
const proxy = activeSocksProxy(env);
|
||||
if (!proxy) return null;
|
||||
|
||||
const repo = env.LANGBOT_REPO;
|
||||
const python = repo ? join(repo, ".venv", "bin", "python") : "";
|
||||
if (!python || !existsSync(python)) {
|
||||
return `SOCKS proxy ${proxy.key} is configured (${redactEnvValue(proxy.key, proxy.value)}), but LangBot venv python was not found; after creating the venv, verify it can import socksio.`;
|
||||
}
|
||||
|
||||
const result = spawnSync(python, ["-c", "import socksio"], {
|
||||
encoding: "utf8",
|
||||
timeout: 5000,
|
||||
});
|
||||
if (result.status === 0) return null;
|
||||
|
||||
return `SOCKS proxy ${proxy.key} is configured (${redactEnvValue(proxy.key, proxy.value)}), but ${python} cannot import socksio; run \`${python} -m pip install socksio\` or start LangBot without SOCKS proxy env.`;
|
||||
}
|
||||
|
||||
export async function commandEnvDoctor(ctx: CommandContext): Promise<number> {
|
||||
const env = loadEnv(ctx.root);
|
||||
const failures: string[] = [];
|
||||
@@ -117,6 +150,8 @@ export async function commandEnvDoctor(ctx: CommandContext): Promise<number> {
|
||||
]) {
|
||||
if (mismatch) failures.push(mismatch);
|
||||
}
|
||||
const socksioFailure = checkSocksio(env);
|
||||
if (socksioFailure) failures.push(socksioFailure);
|
||||
|
||||
for (const [label, result] of await Promise.all([
|
||||
checkUrl("LANGBOT_BACKEND_URL", env.LANGBOT_BACKEND_URL).then((result) => ["LANGBOT_BACKEND_URL", result] as const),
|
||||
|
||||
@@ -465,6 +465,41 @@ function outputTail(value: string | Buffer | null | undefined): string {
|
||||
return String(value ?? "").trim().slice(-4000);
|
||||
}
|
||||
|
||||
function exitStatusFromResultStatus(status: string): number {
|
||||
if (status === "pass") return 0;
|
||||
if (status === "blocked" || status === "env_issue" || status === "flaky") return 2;
|
||||
return 1;
|
||||
}
|
||||
|
||||
function executionStatusFromExitStatus(status: number): string {
|
||||
if (status === 0) return "ok";
|
||||
if (status === 2) return "classified";
|
||||
return "nonzero";
|
||||
}
|
||||
|
||||
function executionFromCaseResultFile(caseItem: Record<string, unknown>): Record<string, unknown> | null {
|
||||
const resultPath = join(String(caseItem.evidence_dir), "result.json");
|
||||
if (!existsSync(resultPath)) return null;
|
||||
try {
|
||||
const parsed = JSON.parse(readFileSync(resultPath, "utf8")) as Record<string, unknown>;
|
||||
if (
|
||||
parsed.case_id !== caseItem.id ||
|
||||
parsed.run_id !== caseItem.run_id ||
|
||||
typeof parsed.status !== "string"
|
||||
) return null;
|
||||
const exitStatus = exitStatusFromResultStatus(parsed.status);
|
||||
return {
|
||||
status: executionStatusFromExitStatus(exitStatus),
|
||||
exit_status: exitStatus,
|
||||
reason: typeof parsed.reason === "string" ? parsed.reason : "result.json completed",
|
||||
result_status: parsed.status,
|
||||
result_json: resultPath,
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function executionProblemStatus(executions: Array<Record<string, unknown>>): string {
|
||||
const statuses = executions.map((item) => String(item.status));
|
||||
if (statuses.includes("nonzero")) return "fail";
|
||||
@@ -523,12 +558,18 @@ export function commandSuiteRun(ctx: CommandContext): number {
|
||||
encoding: "utf8",
|
||||
stdio: options.json === true ? "pipe" : "inherit",
|
||||
});
|
||||
const status = result.error ? 1 : result.status ?? 1;
|
||||
const fileExecution = result.error ? executionFromCaseResultFile(caseItem) : null;
|
||||
const status = typeof fileExecution?.exit_status === "number"
|
||||
? fileExecution.exit_status
|
||||
: result.error ? 1 : result.status ?? 1;
|
||||
executions.push({
|
||||
id: caseItem.id,
|
||||
status: status === 0 ? "ok" : "nonzero",
|
||||
status: fileExecution?.status ?? executionStatusFromExitStatus(status),
|
||||
exit_status: status,
|
||||
reason: result.error?.message || "",
|
||||
reason: fileExecution?.reason ?? result.error?.message ?? "",
|
||||
result_status: fileExecution?.result_status,
|
||||
result_json: fileExecution?.result_json,
|
||||
spawn_error: fileExecution && result.error ? result.error.message : undefined,
|
||||
stdout: outputTail(result.stdout),
|
||||
stderr: outputTail(result.stderr),
|
||||
});
|
||||
|
||||
+95
-14
@@ -271,7 +271,7 @@ function reportTemplate(mode: string): Record<string, string> {
|
||||
target_tested: "Probe target, endpoint, file, command, or service actually checked",
|
||||
execution_path: "automation script | shell command | direct API | other",
|
||||
probe_result: "What the probe observed",
|
||||
logs_or_artifacts: "Log, filesystem, API, or other artifact paths collected",
|
||||
metrics_or_artifacts: "Metrics, logs, filesystem artifacts, traces, or profiles collected",
|
||||
diagnostics: "Extra diagnostics used, if any",
|
||||
matched_troubleshooting: "Troubleshooting ids matched, if any",
|
||||
assets_to_update: "New case/reference/troubleshooting entries to add",
|
||||
@@ -320,7 +320,7 @@ function manualEvidenceTemplate(mode: string): ManualEvidenceTemplate {
|
||||
target_tested: "TODO: probe target, endpoint, file, command, or service actually checked",
|
||||
execution_path: "TODO: automation script | shell command | direct API | other",
|
||||
probe_result: "TODO: observed probe result",
|
||||
logs_or_artifacts: "TODO: evidence paths or skipped reason",
|
||||
metrics_or_artifacts: "TODO: metrics, logs, filesystem artifacts, traces, or profiles collected",
|
||||
diagnostics: "TODO: additional diagnostics used, if any",
|
||||
matched_troubleshooting: "TODO: troubleshooting ids matched, if any",
|
||||
assets_to_update: "TODO: case/reference/troubleshooting updates to make",
|
||||
@@ -1099,6 +1099,41 @@ function executionTail(value: string | Buffer | null | undefined): string {
|
||||
return String(value ?? "").trim().slice(-4000);
|
||||
}
|
||||
|
||||
function exitStatusFromResultStatus(status: string): number {
|
||||
if (status === "pass") return 0;
|
||||
if (status === "blocked" || status === "env_issue" || status === "flaky") return 2;
|
||||
return 1;
|
||||
}
|
||||
|
||||
function executionStatusFromExitStatus(status: number): string {
|
||||
if (status === 0) return "ok";
|
||||
if (status === 2) return "classified";
|
||||
return "nonzero";
|
||||
}
|
||||
|
||||
function executionFromAutomationResultFile(
|
||||
evidenceDir: string,
|
||||
caseId: string,
|
||||
runId: string,
|
||||
): { status: string; exit_status: number; reason: string; result_status: string; path: string } | null {
|
||||
const resultPath = join(evidenceDir, "automation-result.json");
|
||||
if (!existsSync(resultPath)) return null;
|
||||
try {
|
||||
const parsed = JSON.parse(readFileSync(resultPath, "utf8")) as Record<string, unknown>;
|
||||
if (parsed.case_id !== caseId || parsed.run_id !== runId || typeof parsed.status !== "string") return null;
|
||||
const exitStatus = exitStatusFromResultStatus(parsed.status);
|
||||
return {
|
||||
status: executionStatusFromExitStatus(exitStatus),
|
||||
exit_status: exitStatus,
|
||||
reason: typeof parsed.reason === "string" ? parsed.reason : "automation-result.json completed",
|
||||
result_status: parsed.status,
|
||||
path: resultPath,
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function runSetupAutomation(
|
||||
ctx: CommandContext,
|
||||
item: StructuredItem,
|
||||
@@ -1224,6 +1259,30 @@ export function commandTestRun(ctx: CommandContext): number {
|
||||
});
|
||||
|
||||
if (result.error) {
|
||||
const fileExecution = executionFromAutomationResultFile(
|
||||
run.automation.evidence_dir,
|
||||
String(run.case.id),
|
||||
run.run_id,
|
||||
);
|
||||
if (fileExecution) {
|
||||
if (options.json !== true) {
|
||||
console.error(`WARN: automation spawn reported an error, but ${fileExecution.path} completed: ${result.error.message}`);
|
||||
}
|
||||
if (options.json === true) {
|
||||
console.log(JSON.stringify({
|
||||
run,
|
||||
setup_executions: setupExecutions,
|
||||
automation_execution: {
|
||||
...fileExecution,
|
||||
spawn_error: result.error.message,
|
||||
stdout: executionTail(result.stdout),
|
||||
stderr: executionTail(result.stderr),
|
||||
},
|
||||
exit_status: fileExecution.exit_status,
|
||||
}, null, 2));
|
||||
}
|
||||
return fileExecution.exit_status;
|
||||
}
|
||||
if (options.json !== true) console.error(`ERROR: failed to run automation: ${result.error.message}`);
|
||||
if (options.json === true) {
|
||||
console.log(JSON.stringify({
|
||||
@@ -1247,7 +1306,7 @@ export function commandTestRun(ctx: CommandContext): number {
|
||||
run,
|
||||
setup_executions: setupExecutions,
|
||||
automation_execution: {
|
||||
status: status === 0 ? "ok" : "nonzero",
|
||||
status: executionStatusFromExitStatus(status),
|
||||
exit_status: status,
|
||||
stdout: executionTail(result.stdout),
|
||||
stderr: executionTail(result.stderr),
|
||||
@@ -1311,6 +1370,7 @@ function renderMarkdownReport(report: TestReport): string {
|
||||
const environment = report.environment;
|
||||
const logGuard = report.log_guard;
|
||||
const troubleshooting = report.troubleshooting;
|
||||
const automation = report.automation_result;
|
||||
const lines: string[] = [];
|
||||
|
||||
lines.push(`# Test Report: ${reportCase.id}`);
|
||||
@@ -1323,20 +1383,41 @@ function renderMarkdownReport(report: TestReport): string {
|
||||
lines.push(`Type: ${reportCase.type}`);
|
||||
lines.push("");
|
||||
lines.push("## Result");
|
||||
lines.push(`- result: ${evidence.result}`);
|
||||
for (const [key, value] of Object.entries(evidence)) {
|
||||
if (key !== "result") lines.push(`- ${key}: ${value}`);
|
||||
if (automation.status === "loaded" && automation.result) {
|
||||
lines.push(`- result: ${automation.result}`);
|
||||
if (automation.reason) lines.push(`- reason: ${automation.reason}`);
|
||||
if (automation.url) lines.push(`- target_tested: ${automation.url}`);
|
||||
if (automation.path) lines.push(`- automation_result: ${automation.path}`);
|
||||
if (automation.artifacts) lines.push(`- artifacts: ${JSON.stringify(automation.artifacts)}`);
|
||||
} else {
|
||||
lines.push(`- result: ${evidence.result}`);
|
||||
for (const [key, value] of Object.entries(evidence)) {
|
||||
if (key !== "result") lines.push(`- ${key}: ${value}`);
|
||||
}
|
||||
}
|
||||
lines.push("");
|
||||
lines.push("## Automation Result");
|
||||
lines.push(`- status: ${report.automation_result.status}`);
|
||||
if (report.automation_result.path) lines.push(`- path: ${report.automation_result.path}`);
|
||||
if (report.automation_result.result) lines.push(`- result: ${report.automation_result.result}`);
|
||||
if (report.automation_result.reason) lines.push(`- reason: ${report.automation_result.reason}`);
|
||||
if (report.automation_result.started_at_local) lines.push(`- started_at_local: ${report.automation_result.started_at_local}`);
|
||||
if (report.automation_result.finished_at_local) lines.push(`- finished_at_local: ${report.automation_result.finished_at_local}`);
|
||||
if (report.automation_result.url) lines.push(`- url: ${report.automation_result.url}`);
|
||||
if (report.automation_result.expected_text) lines.push(`- expected_text: ${report.automation_result.expected_text}`);
|
||||
lines.push(`- status: ${automation.status}`);
|
||||
if (automation.path) lines.push(`- path: ${automation.path}`);
|
||||
if (automation.result) lines.push(`- result: ${automation.result}`);
|
||||
if (automation.reason) lines.push(`- reason: ${automation.reason}`);
|
||||
if (automation.duration_ms !== undefined) lines.push(`- duration_ms: ${automation.duration_ms}`);
|
||||
if (automation.started_at_local) lines.push(`- started_at_local: ${automation.started_at_local}`);
|
||||
if (automation.finished_at_local) lines.push(`- finished_at_local: ${automation.finished_at_local}`);
|
||||
if (automation.url) lines.push(`- url: ${automation.url}`);
|
||||
if (automation.expected_text) lines.push(`- expected_text: ${automation.expected_text}`);
|
||||
if (automation.metrics_summary) {
|
||||
lines.push("- metrics_summary:");
|
||||
lines.push(` ${JSON.stringify(automation.metrics_summary)}`);
|
||||
}
|
||||
if (automation.thresholds_summary) {
|
||||
lines.push("- thresholds_summary:");
|
||||
lines.push(` ${JSON.stringify(automation.thresholds_summary)}`);
|
||||
}
|
||||
if (automation.artifacts) {
|
||||
lines.push("- artifacts:");
|
||||
lines.push(` ${JSON.stringify(automation.artifacts)}`);
|
||||
}
|
||||
lines.push("");
|
||||
lines.push("## Environment");
|
||||
for (const [key, value] of Object.entries(environment)) lines.push(`- ${key}=${value}`);
|
||||
|
||||
@@ -126,6 +126,9 @@ function validateCaseItem(root: string, item: StructuredItem, skillNames: Set<st
|
||||
...validateEnvKeyScalar(item, "automation_pipeline_url_env"),
|
||||
...validateEnvKeyScalar(item, "automation_pipeline_name_env"),
|
||||
...validateJsonScalar(item, "automation_filesystem_checks_json"),
|
||||
...validateJsonScalar(item, "metrics_thresholds_json"),
|
||||
...validateJsonScalar(item, "load_profile_json"),
|
||||
...validateJsonScalar(item, "fault_model_json"),
|
||||
...listValue(item.fields, "setup_automation").flatMap((entry) => (
|
||||
validateSetupAutomationEntry(root, entry, caseIds).map((error) => `${item.path}: ${error}`)
|
||||
)),
|
||||
|
||||
+27
-2
@@ -9,7 +9,18 @@ export const requiredEnvKeys = [
|
||||
];
|
||||
|
||||
export const caseModeValues = ["agent-browser", "probe"];
|
||||
export const caseTypeValues = ["smoke", "regression", "feature", "provider", "exploratory"];
|
||||
export const caseTypeValues = [
|
||||
"smoke",
|
||||
"regression",
|
||||
"feature",
|
||||
"provider",
|
||||
"exploratory",
|
||||
"contract",
|
||||
"performance",
|
||||
"reliability",
|
||||
"chaos",
|
||||
"security",
|
||||
];
|
||||
export const casePriorityValues = ["p0", "p1", "p2"];
|
||||
export const caseRiskValues = ["low", "medium", "high"];
|
||||
export const caseEvidenceValues = [
|
||||
@@ -21,10 +32,24 @@ export const caseEvidenceValues = [
|
||||
"frontend_log",
|
||||
"api_diagnostic",
|
||||
"filesystem",
|
||||
"metrics",
|
||||
"trace",
|
||||
"profile",
|
||||
"resource_log",
|
||||
];
|
||||
export const testResultStatusValues = ["pass", "fail", "blocked", "env_issue", "flaky"];
|
||||
export const troubleshootingCategoryValues = ["product", "env_issue", "external_dependency", "blocked", "flaky"];
|
||||
export const suiteTypeValues = ["smoke", "regression", "release_gate", "exploratory"];
|
||||
export const suiteTypeValues = [
|
||||
"smoke",
|
||||
"regression",
|
||||
"release_gate",
|
||||
"exploratory",
|
||||
"contract",
|
||||
"performance",
|
||||
"reliability",
|
||||
"chaos",
|
||||
"security",
|
||||
];
|
||||
export const suiteRequiredStrings = ["id", "title", "description", "type", "priority"];
|
||||
export const suiteRequiredLists = ["tags", "cases"];
|
||||
|
||||
|
||||
@@ -91,6 +91,7 @@ export type AutomationResultEvidence = {
|
||||
path?: string;
|
||||
result?: string;
|
||||
reason?: string;
|
||||
duration_ms?: number;
|
||||
started_at?: string;
|
||||
started_at_local?: string;
|
||||
finished_at?: string;
|
||||
@@ -98,6 +99,9 @@ export type AutomationResultEvidence = {
|
||||
url?: string;
|
||||
prompt?: string;
|
||||
expected_text?: string;
|
||||
metrics_summary?: Record<string, unknown>;
|
||||
thresholds_summary?: Record<string, unknown>;
|
||||
artifacts?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
type MutableScanState = {
|
||||
@@ -594,6 +598,18 @@ function stringField(data: Record<string, unknown>, key: string): string | undef
|
||||
return typeof value === "string" && value.trim() ? value : undefined;
|
||||
}
|
||||
|
||||
function numberField(data: Record<string, unknown>, key: string): number | undefined {
|
||||
const value = data[key];
|
||||
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function objectField(data: Record<string, unknown>, key: string): Record<string, unknown> | undefined {
|
||||
const value = data[key];
|
||||
return value && typeof value === "object" && !Array.isArray(value)
|
||||
? value as Record<string, unknown>
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function evidenceDirFromOptions(options: Record<string, string | boolean>): string | undefined {
|
||||
const explicit = typeof options["evidence-dir"] === "string" ? options["evidence-dir"] : undefined;
|
||||
if (explicit) return resolve(explicit);
|
||||
@@ -628,6 +644,7 @@ export function readAutomationResultEvidence(options: Record<string, string | bo
|
||||
path: resultPath,
|
||||
result: stringField(result, "status"),
|
||||
reason: stringField(result, "reason"),
|
||||
duration_ms: numberField(result, "duration_ms"),
|
||||
started_at: stringField(result, "started_at"),
|
||||
started_at_local: stringField(result, "started_at_local"),
|
||||
finished_at: stringField(result, "finished_at"),
|
||||
@@ -635,6 +652,9 @@ export function readAutomationResultEvidence(options: Record<string, string | bo
|
||||
url: stringField(result, "url"),
|
||||
prompt: redactSecrets(stringField(result, "prompt") ?? ""),
|
||||
expected_text: stringField(result, "expected_text"),
|
||||
metrics_summary: objectField(result, "metrics_summary"),
|
||||
thresholds_summary: objectField(result, "thresholds_summary"),
|
||||
artifacts: objectField(result, "artifacts"),
|
||||
};
|
||||
} catch (error) {
|
||||
return { status: "invalid", path: resultPath, reason: String(error) };
|
||||
|
||||
@@ -114,6 +114,8 @@ export function automationEnvDefaults(item: StructuredItem, env: EnvSource = pro
|
||||
["automation_expected_runner_id", "LANGBOT_E2E_EXPECTED_RUNNER_ID"],
|
||||
["automation_reset_debug_chat", "LANGBOT_E2E_RESET_DEBUG_CHAT"],
|
||||
["automation_debug_chat_session_type", "LANGBOT_E2E_DEBUG_CHAT_SESSION_TYPE"],
|
||||
["automation_debug_chat_response_p95_ms", "LANGBOT_E2E_DEBUG_CHAT_RESPONSE_P95_MS"],
|
||||
["automation_debug_chat_max_error_rate", "LANGBOT_E2E_DEBUG_CHAT_MAX_ERROR_RATE"],
|
||||
["automation_filesystem_checks_json", "LANGBOT_E2E_FILESYSTEM_CHECKS_JSON"],
|
||||
["automation_plugin_package", "LANGBOT_E2E_PLUGIN_PACKAGE"],
|
||||
["automation_expected_plugin_id", "LANGBOT_E2E_EXPECTED_PLUGIN_ID"],
|
||||
|
||||
+159
-1
@@ -1,6 +1,6 @@
|
||||
import assert from "node:assert/strict";
|
||||
import { test } from "node:test";
|
||||
import { appendFileSync, existsSync, mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { appendFileSync, chmodSync, existsSync, mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { spawnSync } from "node:child_process";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
@@ -676,6 +676,82 @@ test("suite run JSON captures failed case output", () => {
|
||||
}
|
||||
});
|
||||
|
||||
test("suite run preserves classified env_issue automation results", () => {
|
||||
const tmp = mkdtempSync(join(tmpdir(), "lbs-suite-run-env-issue-"));
|
||||
try {
|
||||
const skillDir = join(tmp, "skills", "langbot-testing");
|
||||
const casesDir = join(skillDir, "cases");
|
||||
const suitesDir = join(skillDir, "suites");
|
||||
const scriptsDir = join(tmp, "scripts");
|
||||
mkdirSync(casesDir, { recursive: true });
|
||||
mkdirSync(suitesDir, { recursive: true });
|
||||
mkdirSync(scriptsDir, { recursive: true });
|
||||
writeFileSync(join(skillDir, "SKILL.md"), "---\nname: langbot-testing\ndescription: Testing.\n---\n\n# Testing\n");
|
||||
writeFileSync(join(tmp, "skills", ".env"), "");
|
||||
writeFileSync(
|
||||
join(casesDir, "env-case.yaml"),
|
||||
[
|
||||
"id: env-case",
|
||||
"title: Env Case",
|
||||
"mode: probe",
|
||||
"area: qa",
|
||||
"type: smoke",
|
||||
"priority: p2",
|
||||
"risk: low",
|
||||
"ci_eligible: true",
|
||||
"automation: scripts/env-issue.mjs",
|
||||
"evidence_required:",
|
||||
" - filesystem",
|
||||
].join("\n"),
|
||||
);
|
||||
writeFileSync(
|
||||
join(suitesDir, "mini.yaml"),
|
||||
[
|
||||
"id: mini",
|
||||
"title: Mini",
|
||||
"description: Mini suite.",
|
||||
"type: smoke",
|
||||
"priority: p2",
|
||||
"tags:",
|
||||
" - qa",
|
||||
"cases:",
|
||||
" - env-case",
|
||||
].join("\n"),
|
||||
);
|
||||
writeFileSync(
|
||||
join(scriptsDir, "env-issue.mjs"),
|
||||
[
|
||||
"import { mkdirSync, writeFileSync } from 'node:fs';",
|
||||
"import { join } from 'node:path';",
|
||||
"mkdirSync(process.env.LBS_EVIDENCE_DIR, { recursive: true });",
|
||||
"const result = {",
|
||||
" case_id: process.env.LBS_CASE_ID,",
|
||||
" run_id: process.env.LBS_RUN_ID,",
|
||||
" status: 'env_issue',",
|
||||
" reason: 'backend not reachable',",
|
||||
" evidence_collected: ['filesystem']",
|
||||
"};",
|
||||
"writeFileSync(join(process.env.LBS_EVIDENCE_DIR, 'result.json'), JSON.stringify(result));",
|
||||
"writeFileSync(join(process.env.LBS_EVIDENCE_DIR, 'automation-result.json'), JSON.stringify({ ...result, source: 'automation' }));",
|
||||
"process.exit(2);",
|
||||
].join("\n"),
|
||||
);
|
||||
|
||||
const result = capture(() => commandSuiteRun({
|
||||
root: tmp,
|
||||
args: ["suite", "run", "mini", "--run-id", "mini-run", "--evidence-dir", join(tmp, "evidence"), "--json"],
|
||||
}));
|
||||
|
||||
assert.equal(result.code, 2);
|
||||
const payload = JSON.parse(result.output);
|
||||
assert.equal(payload.executions[0].status, "classified");
|
||||
assert.equal(payload.report.status, "env_issue");
|
||||
assert.equal(payload.report.execution_status, "ok");
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("suite run failure cannot be masked by stale pass result", () => {
|
||||
const tmp = mkdtempSync(join(tmpdir(), "lbs-suite-run-stale-pass-"));
|
||||
try {
|
||||
@@ -1369,6 +1445,56 @@ test("env doctor does not require proxy variables", async () => {
|
||||
}
|
||||
});
|
||||
|
||||
test("env doctor reports missing socksio for active SOCKS proxy", async () => {
|
||||
const tmp = mkdtempSync(join(tmpdir(), "lbs-env-doctor-socksio-"));
|
||||
const originalAllProxy = process.env.ALL_PROXY;
|
||||
const originalAllProxyLower = process.env.all_proxy;
|
||||
try {
|
||||
delete process.env.ALL_PROXY;
|
||||
delete process.env.all_proxy;
|
||||
const skillsDir = join(tmp, "skills");
|
||||
const repoDir = join(tmp, "LangBot");
|
||||
const webDir = join(repoDir, "web");
|
||||
const venvBin = join(repoDir, ".venv", "bin");
|
||||
const browserProfile = join(tmp, "browser-profile");
|
||||
const chromium = join(tmp, "chromium");
|
||||
mkdirSync(skillsDir, { recursive: true });
|
||||
mkdirSync(webDir, { recursive: true });
|
||||
mkdirSync(venvBin, { recursive: true });
|
||||
mkdirSync(browserProfile, { recursive: true });
|
||||
writeFileSync(chromium, "");
|
||||
const python = join(venvBin, "python");
|
||||
writeFileSync(python, "#!/bin/sh\nexit 1\n");
|
||||
chmodSync(python, 0o755);
|
||||
writeFileSync(
|
||||
join(skillsDir, ".env"),
|
||||
[
|
||||
"LANGBOT_BACKEND_URL=http://127.0.0.1:59996",
|
||||
"LANGBOT_FRONTEND_URL=http://127.0.0.1:59996",
|
||||
"LANGBOT_DEV_FRONTEND_URL=http://127.0.0.1:59996",
|
||||
`LANGBOT_REPO=${repoDir}`,
|
||||
`LANGBOT_WEB_REPO=${webDir}`,
|
||||
`LANGBOT_BROWSER_PROFILE=${browserProfile}`,
|
||||
`LANGBOT_CHROMIUM_EXECUTABLE=${chromium}`,
|
||||
"ALL_PROXY=socks5://127.0.0.1:7890",
|
||||
].join("\n"),
|
||||
);
|
||||
|
||||
const result = await captureAsync(() => commandEnvDoctor({ root: tmp, args: ["env", "doctor"] }));
|
||||
|
||||
assert.equal(result.code, 1);
|
||||
assert.match(result.output, /FAIL: SOCKS proxy ALL_PROXY is configured/);
|
||||
assert.match(result.output, /cannot import socksio/);
|
||||
assert.match(result.output, /-m pip install socksio/);
|
||||
} finally {
|
||||
if (originalAllProxy === undefined) delete process.env.ALL_PROXY;
|
||||
else process.env.ALL_PROXY = originalAllProxy;
|
||||
if (originalAllProxyLower === undefined) delete process.env.all_proxy;
|
||||
else process.env.all_proxy = originalAllProxyLower;
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("env show redacts secret-like values by default", () => {
|
||||
const tmp = mkdtempSync(join(tmpdir(), "lbs-env-show-redact-"));
|
||||
try {
|
||||
@@ -2521,6 +2647,38 @@ test("test report renders a reusable evidence template", () => {
|
||||
assert.match(result.output, /no log files provided/);
|
||||
});
|
||||
|
||||
test("test report promotes loaded automation evidence into result section", () => {
|
||||
const tmp = mkdtempSync(join(tmpdir(), "lbs-report-automation-"));
|
||||
try {
|
||||
writeFileSync(
|
||||
join(tmp, "automation-result.json"),
|
||||
JSON.stringify({
|
||||
status: "pass",
|
||||
reason: "latency thresholds passed",
|
||||
url: "http://127.0.0.1:5300",
|
||||
artifacts: { metrics_json: join(tmp, "metrics.json") },
|
||||
}),
|
||||
);
|
||||
|
||||
const result = capture(() => commandTestReport(ctx([
|
||||
"test",
|
||||
"report",
|
||||
"langbot-live-backend-latency",
|
||||
"--evidence-dir",
|
||||
tmp,
|
||||
"--no-auto-log",
|
||||
])));
|
||||
|
||||
assert.equal(result.code, 0);
|
||||
assert.match(result.output, /## Result\n- result: pass\n- reason: latency thresholds passed/);
|
||||
assert.match(result.output, /- target_tested: http:\/\/127\.0\.0\.1:5300/);
|
||||
assert.doesNotMatch(result.output, /target_tested: TODO/);
|
||||
assert.match(result.output, /## Automation Result/);
|
||||
} finally {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("validate rejects dangling case references and missing automation scripts", () => {
|
||||
const tmp = mkdtempSync(join(tmpdir(), "lbs-validate-strict-"));
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user