diff --git a/skills/docs/user-guide.md b/skills/docs/user-guide.md index 846b7f5c4..007d28d94 100644 --- a/skills/docs/user-guide.md +++ b/skills/docs/user-guide.md @@ -122,12 +122,18 @@ Controlled Debug Chat message-path load gate: ```bash bin/lbs suite plan langbot-debug-chat-load-gate bin/lbs test run langbot-fake-provider-debug-chat-load --run-id langbot-fake-load-local +bin/lbs test run langbot-fake-provider-debug-chat-slow-load --run-id langbot-fake-slow-local +bin/lbs test run langbot-fake-provider-debug-chat-fault-recovery --run-id langbot-fake-fault-local bin/lbs test run langbot-space-debug-chat-concurrency-smoke --run-id langbot-space-smoke-local ``` Start with `langbot-fake-provider-debug-chat-load`. It launches a local OpenAI-compatible fake provider, creates the matching provider/model/pipeline, then sends concurrent WebSocket Debug Chat messages through the real backend. +Use `langbot-fake-provider-debug-chat-slow-load` to measure the same path under +deterministic streaming latency. Use +`langbot-fake-provider-debug-chat-fault-recovery` to inject bounded provider +HTTP failures and confirm later Debug Chat requests recover. Use `langbot-space-debug-chat-concurrency-smoke` only as a low-volume live provider smoke; it includes Space/model/network latency and should be compared against the fake-provider baseline before attributing failures to LangBot. diff --git a/skills/schemas/case.schema.json b/skills/schemas/case.schema.json index 9f4da284a..46601142a 100644 --- a/skills/schemas/case.schema.json +++ b/skills/schemas/case.schema.json @@ -227,6 +227,18 @@ "automation_debug_chat_load_max_error_rate": { "type": "string" }, + "automation_debug_chat_load_min_error_rate": { + "type": "string" + }, + "automation_debug_chat_load_min_error_count": { + "type": "string" + }, + "automation_debug_chat_load_min_ok_count": { + "type": "string" + }, + "automation_debug_chat_load_min_provider_fault_count": { + "type": "string" + }, "automation_debug_chat_load_expected_prefix": { "type": "string" }, @@ -241,6 +253,39 @@ "type": "string", "enum": ["0", "1", "false", "true"] }, + "automation_debug_chat_load_fail_on_final_mismatch": { + "type": "string", + "enum": ["0", "1", "false", "true"] + }, + "automation_fake_provider_response_text": { + "type": "string" + }, + "automation_fake_provider_first_token_delay_ms": { + "type": "string" + }, + "automation_fake_provider_chunk_delay_ms": { + "type": "string" + }, + "automation_fake_provider_chunk_count": { + "type": "string" + }, + "automation_fake_provider_fail_first_n": { + "type": "string" + }, + "automation_fake_provider_fail_every_n": { + "type": "string" + }, + "automation_fake_provider_fault_status": { + "type": "string" + }, + "automation_fake_provider_fail_after_first_chunk": { + "type": "string", + "enum": ["0", "1", "false", "true"] + }, + "automation_fake_provider_dynamic_response": { + "type": "string", + "enum": ["0", "1", "false", "true"] + }, "automation_filesystem_checks_json": { "type": "string" }, diff --git a/skills/scripts/e2e/ensure-fake-provider-pipeline.mjs b/skills/scripts/e2e/ensure-fake-provider-pipeline.mjs index 94a17c08a..9b709be0a 100644 --- a/skills/scripts/e2e/ensure-fake-provider-pipeline.mjs +++ b/skills/scripts/e2e/ensure-fake-provider-pipeline.mjs @@ -54,6 +54,7 @@ const result = { base_url: "", pid: null, reused: false, + config: {}, state_file: fakeStatePath, stdout_log: fakeStdoutPath, stderr_log: fakeStderrPath, @@ -99,9 +100,11 @@ try { } const fakeProvider = await ensureFakeProvider(); + const setupConfig = await configureFakeProvider(fakeProvider.url, healthyFakeProviderConfig(), true); result.fake_provider = { ...result.fake_provider, ...fakeProvider, + config: setupConfig.config || healthyFakeProviderConfig(), }; const user = env.LANGBOT_E2E_LOGIN_USER || ""; @@ -144,6 +147,9 @@ try { Object.assign(result, pipeline); result.pipeline_url = `${frontendUrl.replace(/\/$/, "")}/home/pipelines?id=${encodeURIComponent(pipeline.pipeline_id)}`; + const runConfig = await configureFakeProvider(fakeProvider.url, targetFakeProviderConfig(), true); + result.fake_provider.config = runConfig.config || targetFakeProviderConfig(); + if (writeEnv) { await upsertEnvLocal(envLocalPath, { LANGBOT_E2E_LOGIN_USER: user, @@ -172,7 +178,7 @@ process.exit(result.status === "pass" ? 0 : result.status === "env_issue" ? 2 : async function ensureFakeProvider() { const envUrl = normalizeProviderRootUrl(env.LANGBOT_FAKE_PROVIDER_URL || ""); - if (envUrl && await fakeProviderHealthy(envUrl)) { + if (envUrl && await fakeProviderHealthy(envUrl) && await fakeProviderConfigurable(envUrl)) { return { url: envUrl, base_url: `${envUrl}/v1`, @@ -184,12 +190,15 @@ async function ensureFakeProvider() { const state = await readState(fakeStatePath); const stateUrl = normalizeProviderRootUrl(state.url || ""); if (stateUrl && await fakeProviderHealthy(stateUrl)) { - return { - url: stateUrl, - base_url: state.base_url || `${stateUrl}/v1`, - pid: Number.isInteger(state.pid) ? state.pid : null, - reused: true, - }; + if (await fakeProviderConfigurable(stateUrl)) { + return { + url: stateUrl, + base_url: state.base_url || `${stateUrl}/v1`, + pid: Number.isInteger(state.pid) ? state.pid : null, + reused: true, + }; + } + if (Number.isInteger(state.pid)) await stopProcess(state.pid); } await mkdir(fakeStateDir, { recursive: true }); @@ -218,7 +227,7 @@ async function ensureFakeProvider() { await stderr.close(); const started = await waitForFakeProviderState(fakeStatePath, child.pid, 10_000); - if (!started.url || !await fakeProviderHealthy(started.url)) { + if (!started.url || !await fakeProviderHealthy(started.url) || !await fakeProviderConfigurable(started.url)) { throw new Error(`Fake provider did not become healthy. See ${fakeStderrPath}`); } @@ -230,6 +239,23 @@ async function ensureFakeProvider() { }; } +async function configureFakeProvider(rootUrl, config, resetRequestCount) { + const response = await fetch(`${normalizeProviderRootUrl(rootUrl)}/__qa/config`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + config, + reset_request_count: resetRequestCount, + }), + signal: AbortSignal.timeout(3000), + }); + const json = await response.json().catch(() => ({})); + if (!response.ok || json.ok !== true) { + throw new Error(`Fake provider config failed with HTTP ${response.status}.`); + } + return json; +} + async function fakeProviderHealthy(rootUrl) { try { const response = await fetch(`${rootUrl.replace(/\/$/, "")}/healthz`, { @@ -243,6 +269,28 @@ async function fakeProviderHealthy(rootUrl) { } } +async function fakeProviderConfigurable(rootUrl) { + try { + const response = await fetch(`${rootUrl.replace(/\/$/, "")}/__qa/config`, { + signal: AbortSignal.timeout(2000), + }); + if (!response.ok) return false; + const json = await response.json().catch(() => ({})); + return json.ok === true && json.config && typeof json.config === "object"; + } catch { + return false; + } +} + +async function stopProcess(pid) { + try { + process.kill(pid, "SIGTERM"); + } catch { + return; + } + await sleep(500); +} + async function waitForFakeProviderState(path, expectedPid, timeoutMs) { const startedAt = Date.now(); let lastState = {}; @@ -268,6 +316,34 @@ function normalizeProviderRootUrl(value) { return trimmed.endsWith("/v1") ? trimmed.slice(0, -3) : trimmed; } +function healthyFakeProviderConfig() { + return { + response_text: "OK", + first_token_delay_ms: 25, + chunk_delay_ms: 10, + chunk_count: 0, + fault_status: 500, + fail_first_n: 0, + fail_every_n: 0, + fail_after_first_chunk: false, + dynamic_response: true, + }; +} + +function targetFakeProviderConfig() { + return { + response_text: env.LANGBOT_FAKE_PROVIDER_RESPONSE_TEXT || "OK", + first_token_delay_ms: nonNegativeInteger(env.LANGBOT_FAKE_PROVIDER_FIRST_TOKEN_DELAY_MS, 25), + chunk_delay_ms: nonNegativeInteger(env.LANGBOT_FAKE_PROVIDER_CHUNK_DELAY_MS, 10), + chunk_count: nonNegativeInteger(env.LANGBOT_FAKE_PROVIDER_CHUNK_COUNT, 0), + fault_status: httpFaultStatus(env.LANGBOT_FAKE_PROVIDER_FAULT_STATUS, 500), + fail_first_n: nonNegativeInteger(env.LANGBOT_FAKE_PROVIDER_FAIL_FIRST_N, 0), + fail_every_n: nonNegativeInteger(env.LANGBOT_FAKE_PROVIDER_FAIL_EVERY_N, 0), + fail_after_first_chunk: envBool(env.LANGBOT_FAKE_PROVIDER_FAIL_AFTER_FIRST_CHUNK, false), + dynamic_response: envBool(env.LANGBOT_FAKE_PROVIDER_DYNAMIC_RESPONSE, true), + }; +} + async function skipWizard({ backendUrl, token }) { const response = await apiJson(backendUrl, "/api/v1/system/wizard/completed", { method: "POST", @@ -505,6 +581,23 @@ function positiveInteger(value, fallback) { return Number.isInteger(parsed) && parsed > 0 ? parsed : fallback; } +function nonNegativeInteger(value, fallback) { + const parsed = Number(value); + return Number.isInteger(parsed) && parsed >= 0 ? parsed : fallback; +} + +function httpFaultStatus(value, fallback) { + const parsed = Number(value); + return Number.isInteger(parsed) && parsed >= 400 && parsed <= 599 ? parsed : fallback; +} + +function envBool(value, fallback) { + if (value === undefined || value === "") return fallback; + if (/^(1|true|yes|on)$/i.test(String(value))) return true; + if (/^(0|false|no|off)$/i.test(String(value))) return false; + return fallback; +} + function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } diff --git a/skills/scripts/e2e/fake-openai-provider.mjs b/skills/scripts/e2e/fake-openai-provider.mjs index f75d86ffe..7a4853f42 100644 --- a/skills/scripts/e2e/fake-openai-provider.mjs +++ b/skills/scripts/e2e/fake-openai-provider.mjs @@ -10,14 +10,18 @@ const host = args.host || env.LANGBOT_FAKE_PROVIDER_HOST || "127.0.0.1"; const port = integer(args.port ?? env.LANGBOT_FAKE_PROVIDER_PORT, 0); const stateFile = args["state-file"] || env.LANGBOT_FAKE_PROVIDER_STATE_FILE || ""; const modelName = env.LANGBOT_FAKE_PROVIDER_MODEL_NAME || "gpt-4o-mini"; -const responseText = env.LANGBOT_FAKE_PROVIDER_RESPONSE_TEXT || "OK"; -const firstTokenDelayMs = integer(env.LANGBOT_FAKE_PROVIDER_FIRST_TOKEN_DELAY_MS, 25); -const chunkDelayMs = integer(env.LANGBOT_FAKE_PROVIDER_CHUNK_DELAY_MS, 10); -const faultStatus = integer(env.LANGBOT_FAKE_PROVIDER_FAULT_STATUS, 500); -const failFirstN = integer(env.LANGBOT_FAKE_PROVIDER_FAIL_FIRST_N, 0); -const failEveryN = integer(env.LANGBOT_FAKE_PROVIDER_FAIL_EVERY_N, 0); -const failAfterFirstChunk = bool(env.LANGBOT_FAKE_PROVIDER_FAIL_AFTER_FIRST_CHUNK, false); -const requestLogLimit = integer(env.LANGBOT_FAKE_PROVIDER_REQUEST_LOG_LIMIT, 500); +const config = { + response_text: env.LANGBOT_FAKE_PROVIDER_RESPONSE_TEXT || "OK", + first_token_delay_ms: integer(env.LANGBOT_FAKE_PROVIDER_FIRST_TOKEN_DELAY_MS, 25), + chunk_delay_ms: integer(env.LANGBOT_FAKE_PROVIDER_CHUNK_DELAY_MS, 10), + chunk_count: integer(env.LANGBOT_FAKE_PROVIDER_CHUNK_COUNT, 0), + fault_status: integer(env.LANGBOT_FAKE_PROVIDER_FAULT_STATUS, 500), + fail_first_n: integer(env.LANGBOT_FAKE_PROVIDER_FAIL_FIRST_N, 0), + fail_every_n: integer(env.LANGBOT_FAKE_PROVIDER_FAIL_EVERY_N, 0), + fail_after_first_chunk: bool(env.LANGBOT_FAKE_PROVIDER_FAIL_AFTER_FIRST_CHUNK, false), + dynamic_response: !/^(0|false|no|off)$/i.test(env.LANGBOT_FAKE_PROVIDER_DYNAMIC_RESPONSE || ""), + request_log_limit: integer(env.LANGBOT_FAKE_PROVIDER_REQUEST_LOG_LIMIT, 500), +}; let requestCount = 0; const recentRequests = []; @@ -30,12 +34,48 @@ const server = createServer(async (request, response) => { sendJson(response, 200, { ok: true, model: modelName, + config, request_count: requestCount, recent_request_count: recentRequests.length, }); return; } + if (request.method === "GET" && url.pathname === "/__qa/config") { + sendJson(response, 200, { + ok: true, + model: modelName, + config, + request_count: requestCount, + recent_requests: recentRequests, + }); + return; + } + + if (request.method === "POST" && url.pathname === "/__qa/config") { + const body = await readJson(request); + applyConfig(body.config && typeof body.config === "object" ? body.config : body); + if (body.reset_request_count !== false) resetRequestState(); + sendJson(response, 200, { + ok: true, + model: modelName, + config, + request_count: requestCount, + }); + return; + } + + if (request.method === "POST" && url.pathname === "/__qa/reset") { + resetRequestState(); + sendJson(response, 200, { + ok: true, + model: modelName, + config, + request_count: requestCount, + }); + return; + } + if (request.method === "GET" && ["/models", "/v1/models"].includes(url.pathname)) { sendJson(response, 200, { object: "list", @@ -56,7 +96,8 @@ const server = createServer(async (request, response) => { requestCount += 1; const body = await readJson(request); const requestId = `chatcmpl-langbot-fake-${requestCount}`; - const shouldFail = requestCount <= failFirstN || (failEveryN > 0 && requestCount % failEveryN === 0); + const shouldFail = requestCount <= config.fail_first_n + || (config.fail_every_n > 0 && requestCount % config.fail_every_n === 0); recordRequest({ id: requestId, path: url.pathname, @@ -67,10 +108,10 @@ const server = createServer(async (request, response) => { }); if (shouldFail) { - await sleep(firstTokenDelayMs); - sendJson(response, faultStatus, { + await sleep(config.first_token_delay_ms); + sendJson(response, config.fault_status, { error: { - message: `LangBot fake provider injected HTTP ${faultStatus}`, + message: `LangBot fake provider injected HTTP ${config.fault_status}`, type: "fake_provider_fault", code: "fake_provider_fault", }, @@ -85,10 +126,10 @@ const server = createServer(async (request, response) => { requestId, model: body.model || modelName, content: replyText, - failAfterFirstChunk, + failAfterFirstChunk: config.fail_after_first_chunk, }); } else { - await sleep(firstTokenDelayMs + chunkDelayMs); + await sleep(config.first_token_delay_ms + config.chunk_delay_ms); sendJson(response, 200, completionPayload({ requestId, model: body.model || modelName, @@ -230,7 +271,7 @@ async function streamCompletion(response, { requestId, model, content, failAfter "connection": "keep-alive", }); - await sleep(firstTokenDelayMs); + await sleep(config.first_token_delay_ms); writeSse(response, { id: requestId, object: "chat.completion.chunk", @@ -241,7 +282,7 @@ async function streamCompletion(response, { requestId, model, content, failAfter const chunks = splitContent(content); for (let index = 0; index < chunks.length; index += 1) { - await sleep(chunkDelayMs); + await sleep(config.chunk_delay_ms); writeSse(response, { id: requestId, object: "chat.completion.chunk", @@ -255,7 +296,7 @@ async function streamCompletion(response, { requestId, model, content, failAfter } } - await sleep(chunkDelayMs); + await sleep(config.chunk_delay_ms); const completionTokens = tokenEstimate(content); writeSse(response, { id: requestId, @@ -279,7 +320,7 @@ function writeSse(response, payload) { function splitContent(content) { const text = String(content); - const requested = integer(env.LANGBOT_FAKE_PROVIDER_CHUNK_COUNT, 0); + const requested = config.chunk_count; if (requested <= 1 || text.length <= 1) return [text]; const chunkSize = Math.max(1, Math.ceil(text.length / requested)); const chunks = []; @@ -294,8 +335,8 @@ function tokenEstimate(content) { } function responseTextForBody(body) { - if (/^(0|false|no|off)$/i.test(env.LANGBOT_FAKE_PROVIDER_DYNAMIC_RESPONSE || "")) { - return responseText; + if (!config.dynamic_response) { + return config.response_text; } const messages = Array.isArray(body.messages) ? body.messages : []; const lastUser = [...messages].reverse().find((message) => message?.role === "user"); @@ -306,7 +347,7 @@ function responseTextForBody(body) { if (exact?.[1]) return exact[1].trim().replace(/[。.!?]+$/, ""); const only = text.match(/只回复\s*([A-Za-z0-9_.:@-]{1,80})/); if (only?.[1]) return only[1].trim().replace(/[。.!?]+$/, ""); - return responseText; + return config.response_text; } function flattenContent(content) { @@ -328,5 +369,42 @@ function recordRequest(entry) { ...entry, at: new Date().toISOString(), }); - while (recentRequests.length > requestLogLimit) recentRequests.shift(); + while (recentRequests.length > config.request_log_limit) recentRequests.shift(); +} + +function resetRequestState() { + requestCount = 0; + recentRequests.length = 0; +} + +function applyConfig(updates) { + if (!updates || typeof updates !== "object") return; + assignString(updates, "response_text"); + assignNonNegativeInteger(updates, "first_token_delay_ms"); + assignNonNegativeInteger(updates, "chunk_delay_ms"); + assignNonNegativeInteger(updates, "chunk_count"); + assignNonNegativeInteger(updates, "fail_first_n"); + assignNonNegativeInteger(updates, "fail_every_n"); + assignNonNegativeInteger(updates, "request_log_limit"); + if (updates.fault_status !== undefined) { + const parsed = Number.parseInt(String(updates.fault_status), 10); + if (Number.isInteger(parsed) && parsed >= 400 && parsed <= 599) config.fault_status = parsed; + } + assignBoolean(updates, "fail_after_first_chunk"); + assignBoolean(updates, "dynamic_response"); +} + +function assignString(updates, key) { + if (updates[key] !== undefined) config[key] = String(updates[key]); +} + +function assignNonNegativeInteger(updates, key) { + if (updates[key] === undefined) return; + const parsed = Number.parseInt(String(updates[key]), 10); + if (Number.isInteger(parsed) && parsed >= 0) config[key] = parsed; +} + +function assignBoolean(updates, key) { + if (updates[key] === undefined) return; + config[key] = bool(updates[key], config[key]); } diff --git a/skills/skills.index.json b/skills/skills.index.json index 5bc988ed6..f1b14ecf8 100644 --- a/skills/skills.index.json +++ b/skills/skills.index.json @@ -151,7 +151,9 @@ "agent-runner-release-preflight", "agent-runner-runtime-chaos", "dify-agent-debug-chat", + "langbot-fake-provider-debug-chat-fault-recovery", "langbot-fake-provider-debug-chat-load", + "langbot-fake-provider-debug-chat-slow-load", "langbot-fault-taxonomy-contract", "langbot-live-backend-latency", "langbot-live-backend-log-health", @@ -495,6 +497,44 @@ "backend_log" ] }, + { + "id": "langbot-fake-provider-debug-chat-fault-recovery", + "title": "LangBot Debug Chat fake-provider fault recovery probe", + "mode": "probe", + "area": "reliability", + "type": "chaos", + "priority": "p1", + "risk": "high", + "ci_eligible": false, + "tags": [ + "reliability", + "chaos", + "debug-chat", + "websocket", + "fake-provider", + "fault-injection", + "metrics" + ], + "automation": "skills/langbot-testing/probes/langbot-debug-chat-concurrency.mjs", + "setup_automation": [ + "node:scripts/e2e/ensure-fake-provider-pipeline.mjs --write-env" + ], + "setup_provides_env": [ + "LANGBOT_FAKE_PROVIDER_URL", + "LANGBOT_FAKE_PROVIDER_BASE_URL", + "LANGBOT_FAKE_PROVIDER_PID", + "LANGBOT_FAKE_PROVIDER_PROVIDER_UUID", + "LANGBOT_FAKE_PROVIDER_MODEL_UUID", + "LANGBOT_FAKE_PROVIDER_PIPELINE_URL", + "LANGBOT_FAKE_PROVIDER_PIPELINE_NAME" + ], + "evidence_required": [ + "metrics", + "network", + "api_diagnostic", + "filesystem" + ] + }, { "id": "langbot-fake-provider-debug-chat-load", "title": "LangBot Debug Chat controlled fake-provider load probe", @@ -532,6 +572,44 @@ "filesystem" ] }, + { + "id": "langbot-fake-provider-debug-chat-slow-load", + "title": "LangBot Debug Chat slow fake-provider load probe", + "mode": "probe", + "area": "performance", + "type": "performance", + "priority": "p1", + "risk": "medium", + "ci_eligible": false, + "tags": [ + "performance", + "debug-chat", + "websocket", + "fake-provider", + "slow-provider", + "load", + "metrics" + ], + "automation": "skills/langbot-testing/probes/langbot-debug-chat-concurrency.mjs", + "setup_automation": [ + "node:scripts/e2e/ensure-fake-provider-pipeline.mjs --write-env" + ], + "setup_provides_env": [ + "LANGBOT_FAKE_PROVIDER_URL", + "LANGBOT_FAKE_PROVIDER_BASE_URL", + "LANGBOT_FAKE_PROVIDER_PID", + "LANGBOT_FAKE_PROVIDER_PROVIDER_UUID", + "LANGBOT_FAKE_PROVIDER_MODEL_UUID", + "LANGBOT_FAKE_PROVIDER_PIPELINE_URL", + "LANGBOT_FAKE_PROVIDER_PIPELINE_NAME" + ], + "evidence_required": [ + "metrics", + "network", + "api_diagnostic", + "filesystem" + ] + }, { "id": "langbot-fault-taxonomy-contract", "title": "LangBot fault taxonomy and cleanup contract", @@ -1366,7 +1444,7 @@ { "id": "langbot-debug-chat-load-gate", "title": "LangBot Debug Chat load gate", - "description": "Message-path load checks for Pipeline Debug Chat: controlled fake-provider baseline plus optional real Space-provider smoke.", + "description": "Message-path load checks for Pipeline Debug Chat: controlled fake-provider baseline, slow-provider and fault-recovery profiles, plus optional real Space-provider smoke.", "type": "performance", "priority": "p1", "tags": [ @@ -1377,6 +1455,8 @@ ], "cases": [ "langbot-fake-provider-debug-chat-load", + "langbot-fake-provider-debug-chat-slow-load", + "langbot-fake-provider-debug-chat-fault-recovery", "langbot-space-debug-chat-concurrency-smoke" ] }, diff --git a/skills/skills/.env.example b/skills/skills/.env.example index d236c199a..888c5721d 100644 --- a/skills/skills/.env.example +++ b/skills/skills/.env.example @@ -36,9 +36,12 @@ LANGBOT_FAKE_PROVIDER_MODEL_NAME=gpt-4o-mini LANGBOT_FAKE_PROVIDER_RESPONSE_TEXT=OK LANGBOT_FAKE_PROVIDER_FIRST_TOKEN_DELAY_MS=25 LANGBOT_FAKE_PROVIDER_CHUNK_DELAY_MS=10 +LANGBOT_FAKE_PROVIDER_CHUNK_COUNT=0 LANGBOT_FAKE_PROVIDER_FAIL_FIRST_N=0 LANGBOT_FAKE_PROVIDER_FAIL_EVERY_N=0 LANGBOT_FAKE_PROVIDER_FAULT_STATUS=500 +LANGBOT_FAKE_PROVIDER_FAIL_AFTER_FIRST_CHUNK=false +LANGBOT_FAKE_PROVIDER_DYNAMIC_RESPONSE=true # Optional case-specific runner targets. Prefer these for runner-specific cases # so the automation cannot silently test the wrong runner. diff --git a/skills/skills/langbot-testing/cases/langbot-fake-provider-debug-chat-fault-recovery.yaml b/skills/skills/langbot-testing/cases/langbot-fake-provider-debug-chat-fault-recovery.yaml new file mode 100644 index 000000000..7dfa45c91 --- /dev/null +++ b/skills/skills/langbot-testing/cases/langbot-fake-provider-debug-chat-fault-recovery.yaml @@ -0,0 +1,95 @@ +id: langbot-fake-provider-debug-chat-fault-recovery +title: "LangBot Debug Chat fake-provider fault recovery probe" +mode: probe +area: reliability +type: chaos +priority: p1 +risk: high +ci_eligible: false +tags: + - reliability + - chaos + - debug-chat + - websocket + - fake-provider + - fault-injection + - metrics +skills: + - langbot-env-setup + - langbot-testing +env: + - LANGBOT_BACKEND_URL + - LANGBOT_FRONTEND_URL + - LANGBOT_E2E_LOGIN_USER +automation: skills/langbot-testing/probes/langbot-debug-chat-concurrency.mjs +automation_env: + - LANGBOT_BACKEND_URL + - LANGBOT_E2E_LOGIN_USER + - LANGBOT_FAKE_PROVIDER_PIPELINE_URL + - LANGBOT_FAKE_PROVIDER_PIPELINE_NAME +automation_pipeline_url_env: LANGBOT_FAKE_PROVIDER_PIPELINE_URL +automation_pipeline_name_env: LANGBOT_FAKE_PROVIDER_PIPELINE_NAME +automation_debug_chat_load_requests: "6" +automation_debug_chat_load_concurrency: "1" +automation_debug_chat_load_timeout_ms: "15000" +automation_debug_chat_load_response_p95_ms: "5000" +automation_debug_chat_load_max_error_rate: "0" +automation_debug_chat_load_min_ok_count: "6" +automation_debug_chat_load_min_provider_fault_count: "2" +automation_debug_chat_load_expected_prefix: "FAULTQA" +automation_debug_chat_load_prompt_template: '请只回复 "{expected}",不要解释,不要添加其他字符。' +automation_debug_chat_load_stream: "true" +automation_debug_chat_load_reset: "true" +automation_debug_chat_load_fail_on_final_mismatch: "true" +automation_fake_provider_first_token_delay_ms: "25" +automation_fake_provider_chunk_delay_ms: "10" +automation_fake_provider_chunk_count: "0" +automation_fake_provider_fail_first_n: "2" +automation_fake_provider_fail_every_n: "0" +automation_fake_provider_fault_status: "503" +metrics_thresholds_json: '{"response_p95_ms":{"max":5000},"error_rate":{"max":0},"ok_count_min":{"min":6},"fake_provider_fault_count_min":{"min":2}}' +fault_model_json: '{"provider_fault":"HTTP 503 for first 2 fake-provider chat completions after reset","expected_behavior":"LangBot retries or otherwise recovers from bounded provider failures so every Debug Chat request receives its expected response without backend crash."}' +load_profile_json: '{"requests":6,"concurrency":1,"path":"Pipeline Debug Chat WebSocket","provider":"controlled fake OpenAI-compatible provider","classification":"fault-recovery-not-throughput-benchmark"}' +setup_automation: + - "node:scripts/e2e/ensure-fake-provider-pipeline.mjs --write-env" +setup_provides_env: + - LANGBOT_FAKE_PROVIDER_URL + - LANGBOT_FAKE_PROVIDER_BASE_URL + - LANGBOT_FAKE_PROVIDER_PID + - LANGBOT_FAKE_PROVIDER_PROVIDER_UUID + - LANGBOT_FAKE_PROVIDER_MODEL_UUID + - LANGBOT_FAKE_PROVIDER_PIPELINE_URL + - LANGBOT_FAKE_PROVIDER_PIPELINE_NAME +steps: + - "Configure the local fake provider to return HTTP 503 for the first two chat completions after reset." + - "Create or update the LangBot provider, model, and local-agent pipeline that points at the fake provider." + - "Reset the target Debug Chat session and fake-provider request counter." + - "Send a sequential Debug Chat batch and verify later requests recover after the injected provider faults." +checks: + - "automation-result.json status is pass when the fake provider records at least two injected faults, every Debug Chat request succeeds, and total user-visible error rate stays at zero." + - "metrics_summary includes fake_provider_fault_count and status_counts for the same run window." + - "backend logs show request handling for the same run window without unexpected Traceback or task-leak findings." +evidence_required: + - metrics + - network + - api_diagnostic + - filesystem +diagnostics: + - "This is a fault-recovery probe, not a throughput benchmark." + - "Provider faults may be retried inside the provider/requester path; judge this case by fake_provider_fault_count plus user-visible success/error metrics." + - "The profile uses concurrency 1 because Debug Chat broadcasts assistant responses to every connection in a session, and failed responses do not carry the unique success token needed for concurrent attribution." +success_patterns: + - "Debug Chat WebSocket concurrency probe passed" + - "Streaming completed" +failure_patterns: + - "fake_provider_fault" + - "HTTP 503" + - "Timed out after" + - "All models failed during streaming setup" +expected_failures: + - "fake_provider_fault" + - "HTTP 503" +troubleshooting: + - backend-not-listening + - debug-chat-history-contaminates-automation + - local-agent-model-route-unavailable diff --git a/skills/skills/langbot-testing/cases/langbot-fake-provider-debug-chat-slow-load.yaml b/skills/skills/langbot-testing/cases/langbot-fake-provider-debug-chat-slow-load.yaml new file mode 100644 index 000000000..afa7de154 --- /dev/null +++ b/skills/skills/langbot-testing/cases/langbot-fake-provider-debug-chat-slow-load.yaml @@ -0,0 +1,88 @@ +id: langbot-fake-provider-debug-chat-slow-load +title: "LangBot Debug Chat slow fake-provider load probe" +mode: probe +area: performance +type: performance +priority: p1 +risk: medium +ci_eligible: false +tags: + - performance + - debug-chat + - websocket + - fake-provider + - slow-provider + - load + - metrics +skills: + - langbot-env-setup + - langbot-testing +env: + - LANGBOT_BACKEND_URL + - LANGBOT_FRONTEND_URL + - LANGBOT_E2E_LOGIN_USER +automation: skills/langbot-testing/probes/langbot-debug-chat-concurrency.mjs +automation_env: + - LANGBOT_BACKEND_URL + - LANGBOT_E2E_LOGIN_USER + - LANGBOT_FAKE_PROVIDER_PIPELINE_URL + - LANGBOT_FAKE_PROVIDER_PIPELINE_NAME +automation_pipeline_url_env: LANGBOT_FAKE_PROVIDER_PIPELINE_URL +automation_pipeline_name_env: LANGBOT_FAKE_PROVIDER_PIPELINE_NAME +automation_debug_chat_load_requests: "8" +automation_debug_chat_load_concurrency: "4" +automation_debug_chat_load_timeout_ms: "45000" +automation_debug_chat_load_response_p95_ms: "10000" +automation_debug_chat_load_first_response_p95_ms: "7000" +automation_debug_chat_load_max_error_rate: "0" +automation_debug_chat_load_expected_prefix: "SLOWQA" +automation_debug_chat_load_prompt_template: '请只回复 "{expected}",不要解释,不要添加其他字符。' +automation_debug_chat_load_stream: "true" +automation_debug_chat_load_reset: "true" +automation_fake_provider_first_token_delay_ms: "1000" +automation_fake_provider_chunk_delay_ms: "250" +automation_fake_provider_chunk_count: "4" +automation_fake_provider_fail_first_n: "0" +automation_fake_provider_fail_every_n: "0" +automation_fake_provider_fault_status: "500" +metrics_thresholds_json: '{"response_p95_ms":{"max":10000},"first_response_p95_ms":{"max":7000},"error_rate":{"max":0}}' +load_profile_json: '{"requests":8,"concurrency":4,"path":"Pipeline Debug Chat WebSocket","provider":"controlled slow fake OpenAI-compatible provider","metric":"send-to-final-assistant-response","provider_profile":{"first_token_delay_ms":1000,"chunk_delay_ms":250,"chunk_count":4}}' +setup_automation: + - "node:scripts/e2e/ensure-fake-provider-pipeline.mjs --write-env" +setup_provides_env: + - LANGBOT_FAKE_PROVIDER_URL + - LANGBOT_FAKE_PROVIDER_BASE_URL + - LANGBOT_FAKE_PROVIDER_PID + - LANGBOT_FAKE_PROVIDER_PROVIDER_UUID + - LANGBOT_FAKE_PROVIDER_MODEL_UUID + - LANGBOT_FAKE_PROVIDER_PIPELINE_URL + - LANGBOT_FAKE_PROVIDER_PIPELINE_NAME +steps: + - "Configure the local fake provider with deterministic slow streaming latency." + - "Create or update the LangBot provider, model, and local-agent pipeline that points at the fake provider." + - "Reset the target Debug Chat session." + - "Open concurrent WebSocket Debug Chat connections and send unique deterministic prompts through the real backend pipeline." +checks: + - "automation-result.json status is pass when every request receives its own expected assistant response." + - "metrics_summary shows zero errors under the slow-provider profile." + - "thresholds_summary shows response_p95_ms, first_response_p95_ms, and error_rate pass." +evidence_required: + - metrics + - network + - api_diagnostic + - filesystem +diagnostics: + - "This probe keeps the model deterministic while injecting provider latency, so it catches backend timeout, streaming, and WebSocket backpressure issues without Space variability." + - "Compare with langbot-fake-provider-debug-chat-load to separate fixed LangBot overhead from provider-latency amplification." +success_patterns: + - "Debug Chat WebSocket concurrency probe passed" + - "Streaming completed" +failure_patterns: + - "WebSocket connection error" + - "Timed out after" + - "Final assistant response did not include" + - "All models failed during streaming setup" +troubleshooting: + - backend-not-listening + - debug-chat-history-contaminates-automation + - local-agent-model-route-unavailable diff --git a/skills/skills/langbot-testing/probes/langbot-debug-chat-concurrency.mjs b/skills/skills/langbot-testing/probes/langbot-debug-chat-concurrency.mjs index af40698a1..707fab6b0 100644 --- a/skills/skills/langbot-testing/probes/langbot-debug-chat-concurrency.mjs +++ b/skills/skills/langbot-testing/probes/langbot-debug-chat-concurrency.mjs @@ -28,8 +28,10 @@ await ensureEvidence(paths); const startedAt = new Date(); const metricsPath = resolve(paths.evidenceDir, "metrics.json"); const samplesPath = resolve(paths.evidenceDir, "samples.json"); +const fakeProviderStatePath = resolve(paths.evidenceDir, "fake-provider-state.json"); const resetDiagnosticPath = resolve(paths.evidenceDir, "debug-chat-reset-diagnostic.json"); const backendUrl = env.LANGBOT_BACKEND_URL || ""; +const fakeProviderUrl = env.LANGBOT_FAKE_PROVIDER_URL || ""; const pipelineUrl = env.LANGBOT_E2E_PIPELINE_URL || env.LANGBOT_PIPELINE_URL || ""; const pipelineName = env.LANGBOT_E2E_PIPELINE_NAME || env.LANGBOT_PIPELINE_NAME || ""; const sessionType = env.LANGBOT_DEBUG_CHAT_LOAD_SESSION_TYPE || env.LANGBOT_E2E_DEBUG_CHAT_SESSION_TYPE || "person"; @@ -44,6 +46,12 @@ const resetBeforeRun = bool(env.LANGBOT_DEBUG_CHAT_LOAD_RESET, true); const responseP95BudgetMs = positiveNumber(env.LANGBOT_DEBUG_CHAT_LOAD_RESPONSE_P95_MS, defaultP95Budget(caseId)); const firstResponseP95BudgetMs = positiveNumber(env.LANGBOT_DEBUG_CHAT_LOAD_FIRST_RESPONSE_P95_MS, 0); const maxErrorRate = positiveNumber(env.LANGBOT_DEBUG_CHAT_LOAD_MAX_ERROR_RATE, 0); +const minErrorRate = positiveNumber(env.LANGBOT_DEBUG_CHAT_LOAD_MIN_ERROR_RATE, 0); +const minErrorCount = nonNegativeInteger(env.LANGBOT_DEBUG_CHAT_LOAD_MIN_ERROR_COUNT, 0); +const minOkCount = nonNegativeInteger(env.LANGBOT_DEBUG_CHAT_LOAD_MIN_OK_COUNT, 0); +const minProviderFaultCount = nonNegativeInteger(env.LANGBOT_DEBUG_CHAT_LOAD_MIN_PROVIDER_FAULT_COUNT, 0); +const failOnFinalMismatch = bool(env.LANGBOT_DEBUG_CHAT_LOAD_FAIL_ON_FINAL_MISMATCH, false); +const failureSignals = textList(env.LANGBOT_E2E_FAILURE_SIGNALS || env.LANGBOT_DEBUG_CHAT_LOAD_FAILURE_SIGNALS || ""); const result = { source: "automation", @@ -67,11 +75,13 @@ const result = { timeout_ms: timeoutMs, stream, reset_before_run: resetBeforeRun, + fail_on_final_mismatch: failOnFinalMismatch, }, evidence: { network_log: paths.networkLog, metrics_json: metricsPath, samples_json: samplesPath, + fake_provider_state_json: fakeProviderStatePath, debug_chat_reset_diagnostic_json: resetDiagnosticPath, automation_result_json: paths.automationResultJson, result_json: paths.resultJson, @@ -135,8 +145,14 @@ try { promptTemplate, expectedPrefix, stream, + failOnFinalMismatch, + failureSignals, }); const loadDurationMs = performance.now() - loadStartedAt; + const fakeProviderState = await readFakeProviderState(fakeProviderUrl); + if (fakeProviderState) { + await writeFile(fakeProviderStatePath, `${JSON.stringify(fakeProviderState, null, 2)}\n`, "utf8"); + } const metrics = buildMetrics({ samples, totalRequests, @@ -146,6 +162,7 @@ try { backendUrl, pipelineId: pipeline.id, sessionType, + fakeProviderState, }); const thresholds = buildThresholds(metrics); const passed = Object.values(thresholds).every((item) => item.pass); @@ -165,11 +182,14 @@ try { first_response_p95_ms: metrics.first_response_ms.p95, throughput_rps: metrics.throughput_rps, status_counts: metrics.status_counts, + fake_provider_request_count: metrics.fake_provider?.request_count ?? null, + fake_provider_fault_count: metrics.fake_provider?.fault_count ?? null, }; result.thresholds_summary = thresholds; result.artifacts = { metrics_json: metricsPath, samples_json: samplesPath, + fake_provider_state_json: fakeProviderState ? fakeProviderStatePath : "", network_log: paths.networkLog, automation_result_json: paths.automationResultJson, result_json: paths.resultJson, @@ -215,6 +235,11 @@ function positiveInteger(value, fallback) { return Number.isInteger(parsed) && parsed > 0 ? parsed : fallback; } +function nonNegativeInteger(value, fallback) { + const parsed = Number.parseInt(String(value ?? ""), 10); + return Number.isInteger(parsed) && parsed >= 0 ? parsed : fallback; +} + function positiveNumber(value, fallback) { const parsed = Number(value || ""); return Number.isFinite(parsed) && parsed >= 0 ? parsed : fallback; @@ -227,6 +252,13 @@ function bool(value, fallback) { return fallback; } +function textList(value) { + return String(value || "") + .split(/\r?\n|,/) + .map((item) => item.trim()) + .filter(Boolean); +} + async function backendReachable(baseUrl) { try { const response = await fetch(`${baseUrl.replace(/\/$/, "")}/healthz`, { @@ -238,6 +270,38 @@ async function backendReachable(baseUrl) { } } +async function readFakeProviderState(rootUrl) { + if (!rootUrl) return null; + try { + const response = await fetch(`${normalizeProviderRootUrl(rootUrl)}/__qa/config`, { + signal: AbortSignal.timeout(3000), + }); + const json = await response.json().catch(() => ({})); + return { + status: response.ok && json.ok === true ? "loaded" : "unavailable", + url: normalizeProviderRootUrl(rootUrl), + http_status: response.status, + model: json.model || "", + config: json.config || {}, + request_count: Number.isFinite(json.request_count) ? json.request_count : null, + recent_requests: Array.isArray(json.recent_requests) ? json.recent_requests : [], + }; + } catch (error) { + return { + status: "unavailable", + url: normalizeProviderRootUrl(rootUrl), + reason: safeReason(error.message), + request_count: null, + recent_requests: [], + }; + } +} + +function normalizeProviderRootUrl(value) { + const trimmed = String(value || "").trim().replace(/\/$/, ""); + return trimmed.endsWith("/v1") ? trimmed.slice(0, -3) : trimmed; +} + function pipelineIdFromUrl(url) { if (!url) return ""; try { @@ -314,6 +378,8 @@ function runSingleRequest({ promptTemplate, expectedPrefix, stream, + failOnFinalMismatch, + failureSignals, }) { return new Promise((resolve) => { const expected = expectedForIndex(expectedPrefix, index); @@ -384,18 +450,22 @@ function runSingleRequest({ const content = String(data.data.content || ""); if (content) sample.response_text = content; - if (data.data.is_final === true) { - const ok = sample.response_text.includes(expected); - if (ok) { - if (sample.first_response_ms === null && sentAt > 0) { - sample.first_response_ms = rounded(performance.now() - sentAt); + if (data.data.is_final === true) { + const ok = sample.response_text.includes(expected); + if (ok) { + if (sample.first_response_ms === null && sentAt > 0) { + sample.first_response_ms = rounded(performance.now() - sentAt); + } + finish("pass", ""); + } else if (matchesFailureSignal(sample.response_text, failureSignals)) { + finish("app_error", `Assistant final response matched a failure signal: ${sample.response_text}`); + } else if (failOnFinalMismatch && !containsLoadToken(sample.response_text, expectedPrefix)) { + finish("mismatch", `Final assistant response did not include ${expected}: ${sample.response_text}`); + } else { + sample.foreign_response_count += 1; + sample.last_foreign_response_text = sample.response_text; } - finish("pass", ""); - } else { - sample.foreign_response_count += 1; - sample.last_foreign_response_text = sample.response_text; } - } }, onError(error) { finish("connection_error", `WebSocket connection error: ${error.message}`); @@ -428,6 +498,16 @@ function runSingleRequest({ }); } +function containsLoadToken(text, prefix) { + const escaped = String(prefix).replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return new RegExp(`${escaped}-\\d{4}`).test(String(text || "")); +} + +function matchesFailureSignal(text, signals) { + const lower = String(text || "").toLowerCase(); + return signals.some((signal) => lower.includes(signal.toLowerCase())); +} + function openRawWebSocket(wsUrl, handlers) { const parsed = new URL(wsUrl); const secure = parsed.protocol === "wss:"; @@ -605,7 +685,7 @@ function stats(values) { }; } -function buildMetrics({ samples, totalRequests, concurrency, timeoutMs, loadDurationMs, backendUrl, pipelineId, sessionType }) { +function buildMetrics({ samples, totalRequests, concurrency, timeoutMs, loadDurationMs, backendUrl, pipelineId, sessionType, fakeProviderState }) { const okSamples = samples.filter((sample) => sample.ok); const statusCounts = {}; for (const sample of samples) { @@ -631,10 +711,25 @@ function buildMetrics({ samples, totalRequests, concurrency, timeoutMs, loadDura connected_ms: stats(samples.map((sample) => sample.connected_ms).filter(Number.isFinite)), first_response_ms: stats(okSamples.map((sample) => sample.first_response_ms).filter(Number.isFinite)), response_duration_ms: stats(okSamples.map((sample) => sample.response_duration_ms).filter(Number.isFinite)), + fake_provider: summarizeFakeProviderState(fakeProviderState), samples, }; } +function summarizeFakeProviderState(state) { + if (!state) return null; + const recentRequests = Array.isArray(state.recent_requests) ? state.recent_requests : []; + return { + status: state.status || "unknown", + url: state.url || "", + request_count: Number.isFinite(state.request_count) ? state.request_count : recentRequests.length, + recent_request_count: recentRequests.length, + fault_count: recentRequests.filter((request) => request?.should_fail === true).length, + streamed_request_count: recentRequests.filter((request) => request?.stream === true).length, + config: state.config || {}, + }; +} + function buildThresholds(metrics) { const thresholds = { error_rate: { actual: metrics.error_rate, max: maxErrorRate, pass: metrics.error_rate <= maxErrorRate }, @@ -644,6 +739,35 @@ function buildThresholds(metrics) { pass: metrics.ok_count > 0 && metrics.response_duration_ms.p95 <= responseP95BudgetMs, }, }; + if (minErrorRate > 0) { + thresholds.error_rate_min = { + actual: metrics.error_rate, + min: minErrorRate, + pass: metrics.error_rate >= minErrorRate, + }; + } + if (minErrorCount > 0) { + thresholds.error_count_min = { + actual: metrics.error_count, + min: minErrorCount, + pass: metrics.error_count >= minErrorCount, + }; + } + if (minOkCount > 0) { + thresholds.ok_count_min = { + actual: metrics.ok_count, + min: minOkCount, + pass: metrics.ok_count >= minOkCount, + }; + } + if (minProviderFaultCount > 0) { + const actual = metrics.fake_provider?.fault_count ?? 0; + thresholds.fake_provider_fault_count_min = { + actual, + min: minProviderFaultCount, + pass: actual >= minProviderFaultCount, + }; + } if (firstResponseP95BudgetMs > 0) { thresholds.first_response_p95_ms = { actual: metrics.first_response_ms.p95, diff --git a/skills/skills/langbot-testing/references/performance-reliability-testing.md b/skills/skills/langbot-testing/references/performance-reliability-testing.md index 357cf0a74..54592f6f7 100644 --- a/skills/skills/langbot-testing/references/performance-reliability-testing.md +++ b/skills/skills/langbot-testing/references/performance-reliability-testing.md @@ -144,6 +144,14 @@ request because Debug Chat broadcasts messages to every connection in the same session; unique tokens prevent one connection from counting another connection's response as its own. +After the baseline passes, run `langbot-fake-provider-debug-chat-slow-load` to +keep the same live backend path while injecting deterministic streaming latency. +Run `langbot-fake-provider-debug-chat-fault-recovery` to inject bounded HTTP +provider failures and require both observed failures and later successful +requests. The fault-recovery case is deliberately sequential because failed +Debug Chat responses do not carry a unique success token that can be attributed +to one concurrent connection. + Use `langbot-space-debug-chat-concurrency-smoke` after the fake-provider baseline. It runs a deliberately small real Space-provider batch and reports user-visible latency, not pure LangBot overhead. Space/model/network failures @@ -156,6 +164,8 @@ Useful commands: ```bash rtk bin/lbs test run langbot-fake-provider-debug-chat-load --run-id langbot-fake-load-local +rtk bin/lbs test run langbot-fake-provider-debug-chat-slow-load --run-id langbot-fake-slow-local +rtk bin/lbs test run langbot-fake-provider-debug-chat-fault-recovery --run-id langbot-fake-fault-local rtk bin/lbs test run langbot-space-debug-chat-concurrency-smoke --run-id langbot-space-smoke-local rtk bin/lbs suite run langbot-debug-chat-load-gate --run-id langbot-debug-chat-load-local --include-manual-check ``` @@ -174,8 +184,8 @@ Use the smallest gate that answers the quality question: starting with Pipeline Debug Chat send-to-visible-completion latency. Run it only when the browser profile and target pipeline are ready. - `langbot-debug-chat-load-gate`: WebSocket Debug Chat load checks, starting - with a controlled fake-provider baseline and optionally a low-volume real - Space-provider smoke. + with controlled fake-provider baseline, slow-provider, and fault-recovery + profiles, plus an optional low-volume real Space-provider smoke. - `langbot-performance-reliability-gate`: combined starter gate for synthetic contracts plus live backend checks. diff --git a/skills/skills/langbot-testing/suites/langbot-debug-chat-load-gate.yaml b/skills/skills/langbot-testing/suites/langbot-debug-chat-load-gate.yaml index 730e87583..280a8dd47 100644 --- a/skills/skills/langbot-testing/suites/langbot-debug-chat-load-gate.yaml +++ b/skills/skills/langbot-testing/suites/langbot-debug-chat-load-gate.yaml @@ -1,6 +1,6 @@ id: langbot-debug-chat-load-gate title: "LangBot Debug Chat load gate" -description: "Message-path load checks for Pipeline Debug Chat: controlled fake-provider baseline plus optional real Space-provider smoke." +description: "Message-path load checks for Pipeline Debug Chat: controlled fake-provider baseline, slow-provider and fault-recovery profiles, plus optional real Space-provider smoke." type: performance priority: p1 tags: @@ -10,4 +10,6 @@ tags: - load cases: - langbot-fake-provider-debug-chat-load + - langbot-fake-provider-debug-chat-slow-load + - langbot-fake-provider-debug-chat-fault-recovery - langbot-space-debug-chat-concurrency-smoke diff --git a/skills/src/commands/validate.ts b/skills/src/commands/validate.ts index 9176b9f18..590032ef8 100644 --- a/skills/src/commands/validate.ts +++ b/skills/src/commands/validate.ts @@ -198,15 +198,45 @@ function validateCaseItem(root: string, item: StructuredItem, skillNames: Set 599) { + errors.push(`${item.path}: 'automation_fake_provider_fault_status' must be an HTTP 4xx or 5xx status string`); + } } const streamOutput = scalar(item.fields, "automation_stream_output"); if (streamOutput && !["0", "1", "false", "true"].includes(streamOutput)) { errors.push(`${item.path}: 'automation_stream_output' must be one of 0, 1, false, or true`); } - for (const key of ["automation_debug_chat_load_stream", "automation_debug_chat_load_reset"]) { + for (const key of [ + "automation_debug_chat_load_stream", + "automation_debug_chat_load_reset", + "automation_debug_chat_load_fail_on_final_mismatch", + "automation_fake_provider_fail_after_first_chunk", + "automation_fake_provider_dynamic_response", + ]) { const value = scalar(item.fields, key); if (value && !["0", "1", "false", "true"].includes(value)) { errors.push(`${item.path}: '${key}' must be one of 0, 1, false, or true`); diff --git a/skills/src/readiness.ts b/skills/src/readiness.ts index f4fc3fae0..945fcb598 100644 --- a/skills/src/readiness.ts +++ b/skills/src/readiness.ts @@ -122,10 +122,24 @@ export function automationEnvDefaults(item: StructuredItem, env: EnvSource = pro ["automation_debug_chat_load_response_p95_ms", "LANGBOT_DEBUG_CHAT_LOAD_RESPONSE_P95_MS"], ["automation_debug_chat_load_first_response_p95_ms", "LANGBOT_DEBUG_CHAT_LOAD_FIRST_RESPONSE_P95_MS"], ["automation_debug_chat_load_max_error_rate", "LANGBOT_DEBUG_CHAT_LOAD_MAX_ERROR_RATE"], + ["automation_debug_chat_load_min_error_rate", "LANGBOT_DEBUG_CHAT_LOAD_MIN_ERROR_RATE"], + ["automation_debug_chat_load_min_error_count", "LANGBOT_DEBUG_CHAT_LOAD_MIN_ERROR_COUNT"], + ["automation_debug_chat_load_min_ok_count", "LANGBOT_DEBUG_CHAT_LOAD_MIN_OK_COUNT"], + ["automation_debug_chat_load_min_provider_fault_count", "LANGBOT_DEBUG_CHAT_LOAD_MIN_PROVIDER_FAULT_COUNT"], ["automation_debug_chat_load_expected_prefix", "LANGBOT_DEBUG_CHAT_LOAD_EXPECTED_PREFIX"], ["automation_debug_chat_load_prompt_template", "LANGBOT_DEBUG_CHAT_LOAD_PROMPT_TEMPLATE"], ["automation_debug_chat_load_stream", "LANGBOT_DEBUG_CHAT_LOAD_STREAM"], ["automation_debug_chat_load_reset", "LANGBOT_DEBUG_CHAT_LOAD_RESET"], + ["automation_debug_chat_load_fail_on_final_mismatch", "LANGBOT_DEBUG_CHAT_LOAD_FAIL_ON_FINAL_MISMATCH"], + ["automation_fake_provider_response_text", "LANGBOT_FAKE_PROVIDER_RESPONSE_TEXT"], + ["automation_fake_provider_first_token_delay_ms", "LANGBOT_FAKE_PROVIDER_FIRST_TOKEN_DELAY_MS"], + ["automation_fake_provider_chunk_delay_ms", "LANGBOT_FAKE_PROVIDER_CHUNK_DELAY_MS"], + ["automation_fake_provider_chunk_count", "LANGBOT_FAKE_PROVIDER_CHUNK_COUNT"], + ["automation_fake_provider_fail_first_n", "LANGBOT_FAKE_PROVIDER_FAIL_FIRST_N"], + ["automation_fake_provider_fail_every_n", "LANGBOT_FAKE_PROVIDER_FAIL_EVERY_N"], + ["automation_fake_provider_fault_status", "LANGBOT_FAKE_PROVIDER_FAULT_STATUS"], + ["automation_fake_provider_fail_after_first_chunk", "LANGBOT_FAKE_PROVIDER_FAIL_AFTER_FIRST_CHUNK"], + ["automation_fake_provider_dynamic_response", "LANGBOT_FAKE_PROVIDER_DYNAMIC_RESPONSE"], ["automation_filesystem_checks_json", "LANGBOT_E2E_FILESYSTEM_CHECKS_JSON"], ["automation_plugin_package", "LANGBOT_E2E_PLUGIN_PACKAGE"], ["automation_expected_plugin_id", "LANGBOT_E2E_EXPECTED_PLUGIN_ID"],