mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-25 15:04:19 +00:00
test(skills): extend fake provider load profiles
This commit is contained in:
@@ -122,12 +122,18 @@ Controlled Debug Chat message-path load gate:
|
||||
```bash
|
||||
bin/lbs suite plan langbot-debug-chat-load-gate
|
||||
bin/lbs test run langbot-fake-provider-debug-chat-load --run-id langbot-fake-load-local
|
||||
bin/lbs test run langbot-fake-provider-debug-chat-slow-load --run-id langbot-fake-slow-local
|
||||
bin/lbs test run langbot-fake-provider-debug-chat-fault-recovery --run-id langbot-fake-fault-local
|
||||
bin/lbs test run langbot-space-debug-chat-concurrency-smoke --run-id langbot-space-smoke-local
|
||||
```
|
||||
|
||||
Start with `langbot-fake-provider-debug-chat-load`. It launches a local
|
||||
OpenAI-compatible fake provider, creates the matching provider/model/pipeline,
|
||||
then sends concurrent WebSocket Debug Chat messages through the real backend.
|
||||
Use `langbot-fake-provider-debug-chat-slow-load` to measure the same path under
|
||||
deterministic streaming latency. Use
|
||||
`langbot-fake-provider-debug-chat-fault-recovery` to inject bounded provider
|
||||
HTTP failures and confirm later Debug Chat requests recover.
|
||||
Use `langbot-space-debug-chat-concurrency-smoke` only as a low-volume live
|
||||
provider smoke; it includes Space/model/network latency and should be compared
|
||||
against the fake-provider baseline before attributing failures to LangBot.
|
||||
|
||||
@@ -227,6 +227,18 @@
|
||||
"automation_debug_chat_load_max_error_rate": {
|
||||
"type": "string"
|
||||
},
|
||||
"automation_debug_chat_load_min_error_rate": {
|
||||
"type": "string"
|
||||
},
|
||||
"automation_debug_chat_load_min_error_count": {
|
||||
"type": "string"
|
||||
},
|
||||
"automation_debug_chat_load_min_ok_count": {
|
||||
"type": "string"
|
||||
},
|
||||
"automation_debug_chat_load_min_provider_fault_count": {
|
||||
"type": "string"
|
||||
},
|
||||
"automation_debug_chat_load_expected_prefix": {
|
||||
"type": "string"
|
||||
},
|
||||
@@ -241,6 +253,39 @@
|
||||
"type": "string",
|
||||
"enum": ["0", "1", "false", "true"]
|
||||
},
|
||||
"automation_debug_chat_load_fail_on_final_mismatch": {
|
||||
"type": "string",
|
||||
"enum": ["0", "1", "false", "true"]
|
||||
},
|
||||
"automation_fake_provider_response_text": {
|
||||
"type": "string"
|
||||
},
|
||||
"automation_fake_provider_first_token_delay_ms": {
|
||||
"type": "string"
|
||||
},
|
||||
"automation_fake_provider_chunk_delay_ms": {
|
||||
"type": "string"
|
||||
},
|
||||
"automation_fake_provider_chunk_count": {
|
||||
"type": "string"
|
||||
},
|
||||
"automation_fake_provider_fail_first_n": {
|
||||
"type": "string"
|
||||
},
|
||||
"automation_fake_provider_fail_every_n": {
|
||||
"type": "string"
|
||||
},
|
||||
"automation_fake_provider_fault_status": {
|
||||
"type": "string"
|
||||
},
|
||||
"automation_fake_provider_fail_after_first_chunk": {
|
||||
"type": "string",
|
||||
"enum": ["0", "1", "false", "true"]
|
||||
},
|
||||
"automation_fake_provider_dynamic_response": {
|
||||
"type": "string",
|
||||
"enum": ["0", "1", "false", "true"]
|
||||
},
|
||||
"automation_filesystem_checks_json": {
|
||||
"type": "string"
|
||||
},
|
||||
|
||||
@@ -54,6 +54,7 @@ const result = {
|
||||
base_url: "",
|
||||
pid: null,
|
||||
reused: false,
|
||||
config: {},
|
||||
state_file: fakeStatePath,
|
||||
stdout_log: fakeStdoutPath,
|
||||
stderr_log: fakeStderrPath,
|
||||
@@ -99,9 +100,11 @@ try {
|
||||
}
|
||||
|
||||
const fakeProvider = await ensureFakeProvider();
|
||||
const setupConfig = await configureFakeProvider(fakeProvider.url, healthyFakeProviderConfig(), true);
|
||||
result.fake_provider = {
|
||||
...result.fake_provider,
|
||||
...fakeProvider,
|
||||
config: setupConfig.config || healthyFakeProviderConfig(),
|
||||
};
|
||||
|
||||
const user = env.LANGBOT_E2E_LOGIN_USER || "";
|
||||
@@ -144,6 +147,9 @@ try {
|
||||
Object.assign(result, pipeline);
|
||||
result.pipeline_url = `${frontendUrl.replace(/\/$/, "")}/home/pipelines?id=${encodeURIComponent(pipeline.pipeline_id)}`;
|
||||
|
||||
const runConfig = await configureFakeProvider(fakeProvider.url, targetFakeProviderConfig(), true);
|
||||
result.fake_provider.config = runConfig.config || targetFakeProviderConfig();
|
||||
|
||||
if (writeEnv) {
|
||||
await upsertEnvLocal(envLocalPath, {
|
||||
LANGBOT_E2E_LOGIN_USER: user,
|
||||
@@ -172,7 +178,7 @@ process.exit(result.status === "pass" ? 0 : result.status === "env_issue" ? 2 :
|
||||
|
||||
async function ensureFakeProvider() {
|
||||
const envUrl = normalizeProviderRootUrl(env.LANGBOT_FAKE_PROVIDER_URL || "");
|
||||
if (envUrl && await fakeProviderHealthy(envUrl)) {
|
||||
if (envUrl && await fakeProviderHealthy(envUrl) && await fakeProviderConfigurable(envUrl)) {
|
||||
return {
|
||||
url: envUrl,
|
||||
base_url: `${envUrl}/v1`,
|
||||
@@ -184,12 +190,15 @@ async function ensureFakeProvider() {
|
||||
const state = await readState(fakeStatePath);
|
||||
const stateUrl = normalizeProviderRootUrl(state.url || "");
|
||||
if (stateUrl && await fakeProviderHealthy(stateUrl)) {
|
||||
return {
|
||||
url: stateUrl,
|
||||
base_url: state.base_url || `${stateUrl}/v1`,
|
||||
pid: Number.isInteger(state.pid) ? state.pid : null,
|
||||
reused: true,
|
||||
};
|
||||
if (await fakeProviderConfigurable(stateUrl)) {
|
||||
return {
|
||||
url: stateUrl,
|
||||
base_url: state.base_url || `${stateUrl}/v1`,
|
||||
pid: Number.isInteger(state.pid) ? state.pid : null,
|
||||
reused: true,
|
||||
};
|
||||
}
|
||||
if (Number.isInteger(state.pid)) await stopProcess(state.pid);
|
||||
}
|
||||
|
||||
await mkdir(fakeStateDir, { recursive: true });
|
||||
@@ -218,7 +227,7 @@ async function ensureFakeProvider() {
|
||||
await stderr.close();
|
||||
|
||||
const started = await waitForFakeProviderState(fakeStatePath, child.pid, 10_000);
|
||||
if (!started.url || !await fakeProviderHealthy(started.url)) {
|
||||
if (!started.url || !await fakeProviderHealthy(started.url) || !await fakeProviderConfigurable(started.url)) {
|
||||
throw new Error(`Fake provider did not become healthy. See ${fakeStderrPath}`);
|
||||
}
|
||||
|
||||
@@ -230,6 +239,23 @@ async function ensureFakeProvider() {
|
||||
};
|
||||
}
|
||||
|
||||
async function configureFakeProvider(rootUrl, config, resetRequestCount) {
|
||||
const response = await fetch(`${normalizeProviderRootUrl(rootUrl)}/__qa/config`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
config,
|
||||
reset_request_count: resetRequestCount,
|
||||
}),
|
||||
signal: AbortSignal.timeout(3000),
|
||||
});
|
||||
const json = await response.json().catch(() => ({}));
|
||||
if (!response.ok || json.ok !== true) {
|
||||
throw new Error(`Fake provider config failed with HTTP ${response.status}.`);
|
||||
}
|
||||
return json;
|
||||
}
|
||||
|
||||
async function fakeProviderHealthy(rootUrl) {
|
||||
try {
|
||||
const response = await fetch(`${rootUrl.replace(/\/$/, "")}/healthz`, {
|
||||
@@ -243,6 +269,28 @@ async function fakeProviderHealthy(rootUrl) {
|
||||
}
|
||||
}
|
||||
|
||||
async function fakeProviderConfigurable(rootUrl) {
|
||||
try {
|
||||
const response = await fetch(`${rootUrl.replace(/\/$/, "")}/__qa/config`, {
|
||||
signal: AbortSignal.timeout(2000),
|
||||
});
|
||||
if (!response.ok) return false;
|
||||
const json = await response.json().catch(() => ({}));
|
||||
return json.ok === true && json.config && typeof json.config === "object";
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function stopProcess(pid) {
|
||||
try {
|
||||
process.kill(pid, "SIGTERM");
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
await sleep(500);
|
||||
}
|
||||
|
||||
async function waitForFakeProviderState(path, expectedPid, timeoutMs) {
|
||||
const startedAt = Date.now();
|
||||
let lastState = {};
|
||||
@@ -268,6 +316,34 @@ function normalizeProviderRootUrl(value) {
|
||||
return trimmed.endsWith("/v1") ? trimmed.slice(0, -3) : trimmed;
|
||||
}
|
||||
|
||||
function healthyFakeProviderConfig() {
|
||||
return {
|
||||
response_text: "OK",
|
||||
first_token_delay_ms: 25,
|
||||
chunk_delay_ms: 10,
|
||||
chunk_count: 0,
|
||||
fault_status: 500,
|
||||
fail_first_n: 0,
|
||||
fail_every_n: 0,
|
||||
fail_after_first_chunk: false,
|
||||
dynamic_response: true,
|
||||
};
|
||||
}
|
||||
|
||||
function targetFakeProviderConfig() {
|
||||
return {
|
||||
response_text: env.LANGBOT_FAKE_PROVIDER_RESPONSE_TEXT || "OK",
|
||||
first_token_delay_ms: nonNegativeInteger(env.LANGBOT_FAKE_PROVIDER_FIRST_TOKEN_DELAY_MS, 25),
|
||||
chunk_delay_ms: nonNegativeInteger(env.LANGBOT_FAKE_PROVIDER_CHUNK_DELAY_MS, 10),
|
||||
chunk_count: nonNegativeInteger(env.LANGBOT_FAKE_PROVIDER_CHUNK_COUNT, 0),
|
||||
fault_status: httpFaultStatus(env.LANGBOT_FAKE_PROVIDER_FAULT_STATUS, 500),
|
||||
fail_first_n: nonNegativeInteger(env.LANGBOT_FAKE_PROVIDER_FAIL_FIRST_N, 0),
|
||||
fail_every_n: nonNegativeInteger(env.LANGBOT_FAKE_PROVIDER_FAIL_EVERY_N, 0),
|
||||
fail_after_first_chunk: envBool(env.LANGBOT_FAKE_PROVIDER_FAIL_AFTER_FIRST_CHUNK, false),
|
||||
dynamic_response: envBool(env.LANGBOT_FAKE_PROVIDER_DYNAMIC_RESPONSE, true),
|
||||
};
|
||||
}
|
||||
|
||||
async function skipWizard({ backendUrl, token }) {
|
||||
const response = await apiJson(backendUrl, "/api/v1/system/wizard/completed", {
|
||||
method: "POST",
|
||||
@@ -505,6 +581,23 @@ function positiveInteger(value, fallback) {
|
||||
return Number.isInteger(parsed) && parsed > 0 ? parsed : fallback;
|
||||
}
|
||||
|
||||
function nonNegativeInteger(value, fallback) {
|
||||
const parsed = Number(value);
|
||||
return Number.isInteger(parsed) && parsed >= 0 ? parsed : fallback;
|
||||
}
|
||||
|
||||
function httpFaultStatus(value, fallback) {
|
||||
const parsed = Number(value);
|
||||
return Number.isInteger(parsed) && parsed >= 400 && parsed <= 599 ? parsed : fallback;
|
||||
}
|
||||
|
||||
function envBool(value, fallback) {
|
||||
if (value === undefined || value === "") return fallback;
|
||||
if (/^(1|true|yes|on)$/i.test(String(value))) return true;
|
||||
if (/^(0|false|no|off)$/i.test(String(value))) return false;
|
||||
return fallback;
|
||||
}
|
||||
|
||||
function sleep(ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
@@ -10,14 +10,18 @@ const host = args.host || env.LANGBOT_FAKE_PROVIDER_HOST || "127.0.0.1";
|
||||
const port = integer(args.port ?? env.LANGBOT_FAKE_PROVIDER_PORT, 0);
|
||||
const stateFile = args["state-file"] || env.LANGBOT_FAKE_PROVIDER_STATE_FILE || "";
|
||||
const modelName = env.LANGBOT_FAKE_PROVIDER_MODEL_NAME || "gpt-4o-mini";
|
||||
const responseText = env.LANGBOT_FAKE_PROVIDER_RESPONSE_TEXT || "OK";
|
||||
const firstTokenDelayMs = integer(env.LANGBOT_FAKE_PROVIDER_FIRST_TOKEN_DELAY_MS, 25);
|
||||
const chunkDelayMs = integer(env.LANGBOT_FAKE_PROVIDER_CHUNK_DELAY_MS, 10);
|
||||
const faultStatus = integer(env.LANGBOT_FAKE_PROVIDER_FAULT_STATUS, 500);
|
||||
const failFirstN = integer(env.LANGBOT_FAKE_PROVIDER_FAIL_FIRST_N, 0);
|
||||
const failEveryN = integer(env.LANGBOT_FAKE_PROVIDER_FAIL_EVERY_N, 0);
|
||||
const failAfterFirstChunk = bool(env.LANGBOT_FAKE_PROVIDER_FAIL_AFTER_FIRST_CHUNK, false);
|
||||
const requestLogLimit = integer(env.LANGBOT_FAKE_PROVIDER_REQUEST_LOG_LIMIT, 500);
|
||||
const config = {
|
||||
response_text: env.LANGBOT_FAKE_PROVIDER_RESPONSE_TEXT || "OK",
|
||||
first_token_delay_ms: integer(env.LANGBOT_FAKE_PROVIDER_FIRST_TOKEN_DELAY_MS, 25),
|
||||
chunk_delay_ms: integer(env.LANGBOT_FAKE_PROVIDER_CHUNK_DELAY_MS, 10),
|
||||
chunk_count: integer(env.LANGBOT_FAKE_PROVIDER_CHUNK_COUNT, 0),
|
||||
fault_status: integer(env.LANGBOT_FAKE_PROVIDER_FAULT_STATUS, 500),
|
||||
fail_first_n: integer(env.LANGBOT_FAKE_PROVIDER_FAIL_FIRST_N, 0),
|
||||
fail_every_n: integer(env.LANGBOT_FAKE_PROVIDER_FAIL_EVERY_N, 0),
|
||||
fail_after_first_chunk: bool(env.LANGBOT_FAKE_PROVIDER_FAIL_AFTER_FIRST_CHUNK, false),
|
||||
dynamic_response: !/^(0|false|no|off)$/i.test(env.LANGBOT_FAKE_PROVIDER_DYNAMIC_RESPONSE || ""),
|
||||
request_log_limit: integer(env.LANGBOT_FAKE_PROVIDER_REQUEST_LOG_LIMIT, 500),
|
||||
};
|
||||
|
||||
let requestCount = 0;
|
||||
const recentRequests = [];
|
||||
@@ -30,12 +34,48 @@ const server = createServer(async (request, response) => {
|
||||
sendJson(response, 200, {
|
||||
ok: true,
|
||||
model: modelName,
|
||||
config,
|
||||
request_count: requestCount,
|
||||
recent_request_count: recentRequests.length,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (request.method === "GET" && url.pathname === "/__qa/config") {
|
||||
sendJson(response, 200, {
|
||||
ok: true,
|
||||
model: modelName,
|
||||
config,
|
||||
request_count: requestCount,
|
||||
recent_requests: recentRequests,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (request.method === "POST" && url.pathname === "/__qa/config") {
|
||||
const body = await readJson(request);
|
||||
applyConfig(body.config && typeof body.config === "object" ? body.config : body);
|
||||
if (body.reset_request_count !== false) resetRequestState();
|
||||
sendJson(response, 200, {
|
||||
ok: true,
|
||||
model: modelName,
|
||||
config,
|
||||
request_count: requestCount,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (request.method === "POST" && url.pathname === "/__qa/reset") {
|
||||
resetRequestState();
|
||||
sendJson(response, 200, {
|
||||
ok: true,
|
||||
model: modelName,
|
||||
config,
|
||||
request_count: requestCount,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (request.method === "GET" && ["/models", "/v1/models"].includes(url.pathname)) {
|
||||
sendJson(response, 200, {
|
||||
object: "list",
|
||||
@@ -56,7 +96,8 @@ const server = createServer(async (request, response) => {
|
||||
requestCount += 1;
|
||||
const body = await readJson(request);
|
||||
const requestId = `chatcmpl-langbot-fake-${requestCount}`;
|
||||
const shouldFail = requestCount <= failFirstN || (failEveryN > 0 && requestCount % failEveryN === 0);
|
||||
const shouldFail = requestCount <= config.fail_first_n
|
||||
|| (config.fail_every_n > 0 && requestCount % config.fail_every_n === 0);
|
||||
recordRequest({
|
||||
id: requestId,
|
||||
path: url.pathname,
|
||||
@@ -67,10 +108,10 @@ const server = createServer(async (request, response) => {
|
||||
});
|
||||
|
||||
if (shouldFail) {
|
||||
await sleep(firstTokenDelayMs);
|
||||
sendJson(response, faultStatus, {
|
||||
await sleep(config.first_token_delay_ms);
|
||||
sendJson(response, config.fault_status, {
|
||||
error: {
|
||||
message: `LangBot fake provider injected HTTP ${faultStatus}`,
|
||||
message: `LangBot fake provider injected HTTP ${config.fault_status}`,
|
||||
type: "fake_provider_fault",
|
||||
code: "fake_provider_fault",
|
||||
},
|
||||
@@ -85,10 +126,10 @@ const server = createServer(async (request, response) => {
|
||||
requestId,
|
||||
model: body.model || modelName,
|
||||
content: replyText,
|
||||
failAfterFirstChunk,
|
||||
failAfterFirstChunk: config.fail_after_first_chunk,
|
||||
});
|
||||
} else {
|
||||
await sleep(firstTokenDelayMs + chunkDelayMs);
|
||||
await sleep(config.first_token_delay_ms + config.chunk_delay_ms);
|
||||
sendJson(response, 200, completionPayload({
|
||||
requestId,
|
||||
model: body.model || modelName,
|
||||
@@ -230,7 +271,7 @@ async function streamCompletion(response, { requestId, model, content, failAfter
|
||||
"connection": "keep-alive",
|
||||
});
|
||||
|
||||
await sleep(firstTokenDelayMs);
|
||||
await sleep(config.first_token_delay_ms);
|
||||
writeSse(response, {
|
||||
id: requestId,
|
||||
object: "chat.completion.chunk",
|
||||
@@ -241,7 +282,7 @@ async function streamCompletion(response, { requestId, model, content, failAfter
|
||||
|
||||
const chunks = splitContent(content);
|
||||
for (let index = 0; index < chunks.length; index += 1) {
|
||||
await sleep(chunkDelayMs);
|
||||
await sleep(config.chunk_delay_ms);
|
||||
writeSse(response, {
|
||||
id: requestId,
|
||||
object: "chat.completion.chunk",
|
||||
@@ -255,7 +296,7 @@ async function streamCompletion(response, { requestId, model, content, failAfter
|
||||
}
|
||||
}
|
||||
|
||||
await sleep(chunkDelayMs);
|
||||
await sleep(config.chunk_delay_ms);
|
||||
const completionTokens = tokenEstimate(content);
|
||||
writeSse(response, {
|
||||
id: requestId,
|
||||
@@ -279,7 +320,7 @@ function writeSse(response, payload) {
|
||||
|
||||
function splitContent(content) {
|
||||
const text = String(content);
|
||||
const requested = integer(env.LANGBOT_FAKE_PROVIDER_CHUNK_COUNT, 0);
|
||||
const requested = config.chunk_count;
|
||||
if (requested <= 1 || text.length <= 1) return [text];
|
||||
const chunkSize = Math.max(1, Math.ceil(text.length / requested));
|
||||
const chunks = [];
|
||||
@@ -294,8 +335,8 @@ function tokenEstimate(content) {
|
||||
}
|
||||
|
||||
function responseTextForBody(body) {
|
||||
if (/^(0|false|no|off)$/i.test(env.LANGBOT_FAKE_PROVIDER_DYNAMIC_RESPONSE || "")) {
|
||||
return responseText;
|
||||
if (!config.dynamic_response) {
|
||||
return config.response_text;
|
||||
}
|
||||
const messages = Array.isArray(body.messages) ? body.messages : [];
|
||||
const lastUser = [...messages].reverse().find((message) => message?.role === "user");
|
||||
@@ -306,7 +347,7 @@ function responseTextForBody(body) {
|
||||
if (exact?.[1]) return exact[1].trim().replace(/[。.!?]+$/, "");
|
||||
const only = text.match(/只回复\s*([A-Za-z0-9_.:@-]{1,80})/);
|
||||
if (only?.[1]) return only[1].trim().replace(/[。.!?]+$/, "");
|
||||
return responseText;
|
||||
return config.response_text;
|
||||
}
|
||||
|
||||
function flattenContent(content) {
|
||||
@@ -328,5 +369,42 @@ function recordRequest(entry) {
|
||||
...entry,
|
||||
at: new Date().toISOString(),
|
||||
});
|
||||
while (recentRequests.length > requestLogLimit) recentRequests.shift();
|
||||
while (recentRequests.length > config.request_log_limit) recentRequests.shift();
|
||||
}
|
||||
|
||||
function resetRequestState() {
|
||||
requestCount = 0;
|
||||
recentRequests.length = 0;
|
||||
}
|
||||
|
||||
function applyConfig(updates) {
|
||||
if (!updates || typeof updates !== "object") return;
|
||||
assignString(updates, "response_text");
|
||||
assignNonNegativeInteger(updates, "first_token_delay_ms");
|
||||
assignNonNegativeInteger(updates, "chunk_delay_ms");
|
||||
assignNonNegativeInteger(updates, "chunk_count");
|
||||
assignNonNegativeInteger(updates, "fail_first_n");
|
||||
assignNonNegativeInteger(updates, "fail_every_n");
|
||||
assignNonNegativeInteger(updates, "request_log_limit");
|
||||
if (updates.fault_status !== undefined) {
|
||||
const parsed = Number.parseInt(String(updates.fault_status), 10);
|
||||
if (Number.isInteger(parsed) && parsed >= 400 && parsed <= 599) config.fault_status = parsed;
|
||||
}
|
||||
assignBoolean(updates, "fail_after_first_chunk");
|
||||
assignBoolean(updates, "dynamic_response");
|
||||
}
|
||||
|
||||
function assignString(updates, key) {
|
||||
if (updates[key] !== undefined) config[key] = String(updates[key]);
|
||||
}
|
||||
|
||||
function assignNonNegativeInteger(updates, key) {
|
||||
if (updates[key] === undefined) return;
|
||||
const parsed = Number.parseInt(String(updates[key]), 10);
|
||||
if (Number.isInteger(parsed) && parsed >= 0) config[key] = parsed;
|
||||
}
|
||||
|
||||
function assignBoolean(updates, key) {
|
||||
if (updates[key] === undefined) return;
|
||||
config[key] = bool(updates[key], config[key]);
|
||||
}
|
||||
|
||||
@@ -151,7 +151,9 @@
|
||||
"agent-runner-release-preflight",
|
||||
"agent-runner-runtime-chaos",
|
||||
"dify-agent-debug-chat",
|
||||
"langbot-fake-provider-debug-chat-fault-recovery",
|
||||
"langbot-fake-provider-debug-chat-load",
|
||||
"langbot-fake-provider-debug-chat-slow-load",
|
||||
"langbot-fault-taxonomy-contract",
|
||||
"langbot-live-backend-latency",
|
||||
"langbot-live-backend-log-health",
|
||||
@@ -495,6 +497,44 @@
|
||||
"backend_log"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langbot-fake-provider-debug-chat-fault-recovery",
|
||||
"title": "LangBot Debug Chat fake-provider fault recovery probe",
|
||||
"mode": "probe",
|
||||
"area": "reliability",
|
||||
"type": "chaos",
|
||||
"priority": "p1",
|
||||
"risk": "high",
|
||||
"ci_eligible": false,
|
||||
"tags": [
|
||||
"reliability",
|
||||
"chaos",
|
||||
"debug-chat",
|
||||
"websocket",
|
||||
"fake-provider",
|
||||
"fault-injection",
|
||||
"metrics"
|
||||
],
|
||||
"automation": "skills/langbot-testing/probes/langbot-debug-chat-concurrency.mjs",
|
||||
"setup_automation": [
|
||||
"node:scripts/e2e/ensure-fake-provider-pipeline.mjs --write-env"
|
||||
],
|
||||
"setup_provides_env": [
|
||||
"LANGBOT_FAKE_PROVIDER_URL",
|
||||
"LANGBOT_FAKE_PROVIDER_BASE_URL",
|
||||
"LANGBOT_FAKE_PROVIDER_PID",
|
||||
"LANGBOT_FAKE_PROVIDER_PROVIDER_UUID",
|
||||
"LANGBOT_FAKE_PROVIDER_MODEL_UUID",
|
||||
"LANGBOT_FAKE_PROVIDER_PIPELINE_URL",
|
||||
"LANGBOT_FAKE_PROVIDER_PIPELINE_NAME"
|
||||
],
|
||||
"evidence_required": [
|
||||
"metrics",
|
||||
"network",
|
||||
"api_diagnostic",
|
||||
"filesystem"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langbot-fake-provider-debug-chat-load",
|
||||
"title": "LangBot Debug Chat controlled fake-provider load probe",
|
||||
@@ -532,6 +572,44 @@
|
||||
"filesystem"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langbot-fake-provider-debug-chat-slow-load",
|
||||
"title": "LangBot Debug Chat slow fake-provider load probe",
|
||||
"mode": "probe",
|
||||
"area": "performance",
|
||||
"type": "performance",
|
||||
"priority": "p1",
|
||||
"risk": "medium",
|
||||
"ci_eligible": false,
|
||||
"tags": [
|
||||
"performance",
|
||||
"debug-chat",
|
||||
"websocket",
|
||||
"fake-provider",
|
||||
"slow-provider",
|
||||
"load",
|
||||
"metrics"
|
||||
],
|
||||
"automation": "skills/langbot-testing/probes/langbot-debug-chat-concurrency.mjs",
|
||||
"setup_automation": [
|
||||
"node:scripts/e2e/ensure-fake-provider-pipeline.mjs --write-env"
|
||||
],
|
||||
"setup_provides_env": [
|
||||
"LANGBOT_FAKE_PROVIDER_URL",
|
||||
"LANGBOT_FAKE_PROVIDER_BASE_URL",
|
||||
"LANGBOT_FAKE_PROVIDER_PID",
|
||||
"LANGBOT_FAKE_PROVIDER_PROVIDER_UUID",
|
||||
"LANGBOT_FAKE_PROVIDER_MODEL_UUID",
|
||||
"LANGBOT_FAKE_PROVIDER_PIPELINE_URL",
|
||||
"LANGBOT_FAKE_PROVIDER_PIPELINE_NAME"
|
||||
],
|
||||
"evidence_required": [
|
||||
"metrics",
|
||||
"network",
|
||||
"api_diagnostic",
|
||||
"filesystem"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langbot-fault-taxonomy-contract",
|
||||
"title": "LangBot fault taxonomy and cleanup contract",
|
||||
@@ -1366,7 +1444,7 @@
|
||||
{
|
||||
"id": "langbot-debug-chat-load-gate",
|
||||
"title": "LangBot Debug Chat load gate",
|
||||
"description": "Message-path load checks for Pipeline Debug Chat: controlled fake-provider baseline plus optional real Space-provider smoke.",
|
||||
"description": "Message-path load checks for Pipeline Debug Chat: controlled fake-provider baseline, slow-provider and fault-recovery profiles, plus optional real Space-provider smoke.",
|
||||
"type": "performance",
|
||||
"priority": "p1",
|
||||
"tags": [
|
||||
@@ -1377,6 +1455,8 @@
|
||||
],
|
||||
"cases": [
|
||||
"langbot-fake-provider-debug-chat-load",
|
||||
"langbot-fake-provider-debug-chat-slow-load",
|
||||
"langbot-fake-provider-debug-chat-fault-recovery",
|
||||
"langbot-space-debug-chat-concurrency-smoke"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -36,9 +36,12 @@ LANGBOT_FAKE_PROVIDER_MODEL_NAME=gpt-4o-mini
|
||||
LANGBOT_FAKE_PROVIDER_RESPONSE_TEXT=OK
|
||||
LANGBOT_FAKE_PROVIDER_FIRST_TOKEN_DELAY_MS=25
|
||||
LANGBOT_FAKE_PROVIDER_CHUNK_DELAY_MS=10
|
||||
LANGBOT_FAKE_PROVIDER_CHUNK_COUNT=0
|
||||
LANGBOT_FAKE_PROVIDER_FAIL_FIRST_N=0
|
||||
LANGBOT_FAKE_PROVIDER_FAIL_EVERY_N=0
|
||||
LANGBOT_FAKE_PROVIDER_FAULT_STATUS=500
|
||||
LANGBOT_FAKE_PROVIDER_FAIL_AFTER_FIRST_CHUNK=false
|
||||
LANGBOT_FAKE_PROVIDER_DYNAMIC_RESPONSE=true
|
||||
|
||||
# Optional case-specific runner targets. Prefer these for runner-specific cases
|
||||
# so the automation cannot silently test the wrong runner.
|
||||
|
||||
+95
@@ -0,0 +1,95 @@
|
||||
id: langbot-fake-provider-debug-chat-fault-recovery
|
||||
title: "LangBot Debug Chat fake-provider fault recovery probe"
|
||||
mode: probe
|
||||
area: reliability
|
||||
type: chaos
|
||||
priority: p1
|
||||
risk: high
|
||||
ci_eligible: false
|
||||
tags:
|
||||
- reliability
|
||||
- chaos
|
||||
- debug-chat
|
||||
- websocket
|
||||
- fake-provider
|
||||
- fault-injection
|
||||
- metrics
|
||||
skills:
|
||||
- langbot-env-setup
|
||||
- langbot-testing
|
||||
env:
|
||||
- LANGBOT_BACKEND_URL
|
||||
- LANGBOT_FRONTEND_URL
|
||||
- LANGBOT_E2E_LOGIN_USER
|
||||
automation: skills/langbot-testing/probes/langbot-debug-chat-concurrency.mjs
|
||||
automation_env:
|
||||
- LANGBOT_BACKEND_URL
|
||||
- LANGBOT_E2E_LOGIN_USER
|
||||
- LANGBOT_FAKE_PROVIDER_PIPELINE_URL
|
||||
- LANGBOT_FAKE_PROVIDER_PIPELINE_NAME
|
||||
automation_pipeline_url_env: LANGBOT_FAKE_PROVIDER_PIPELINE_URL
|
||||
automation_pipeline_name_env: LANGBOT_FAKE_PROVIDER_PIPELINE_NAME
|
||||
automation_debug_chat_load_requests: "6"
|
||||
automation_debug_chat_load_concurrency: "1"
|
||||
automation_debug_chat_load_timeout_ms: "15000"
|
||||
automation_debug_chat_load_response_p95_ms: "5000"
|
||||
automation_debug_chat_load_max_error_rate: "0"
|
||||
automation_debug_chat_load_min_ok_count: "6"
|
||||
automation_debug_chat_load_min_provider_fault_count: "2"
|
||||
automation_debug_chat_load_expected_prefix: "FAULTQA"
|
||||
automation_debug_chat_load_prompt_template: '请只回复 "{expected}",不要解释,不要添加其他字符。'
|
||||
automation_debug_chat_load_stream: "true"
|
||||
automation_debug_chat_load_reset: "true"
|
||||
automation_debug_chat_load_fail_on_final_mismatch: "true"
|
||||
automation_fake_provider_first_token_delay_ms: "25"
|
||||
automation_fake_provider_chunk_delay_ms: "10"
|
||||
automation_fake_provider_chunk_count: "0"
|
||||
automation_fake_provider_fail_first_n: "2"
|
||||
automation_fake_provider_fail_every_n: "0"
|
||||
automation_fake_provider_fault_status: "503"
|
||||
metrics_thresholds_json: '{"response_p95_ms":{"max":5000},"error_rate":{"max":0},"ok_count_min":{"min":6},"fake_provider_fault_count_min":{"min":2}}'
|
||||
fault_model_json: '{"provider_fault":"HTTP 503 for first 2 fake-provider chat completions after reset","expected_behavior":"LangBot retries or otherwise recovers from bounded provider failures so every Debug Chat request receives its expected response without backend crash."}'
|
||||
load_profile_json: '{"requests":6,"concurrency":1,"path":"Pipeline Debug Chat WebSocket","provider":"controlled fake OpenAI-compatible provider","classification":"fault-recovery-not-throughput-benchmark"}'
|
||||
setup_automation:
|
||||
- "node:scripts/e2e/ensure-fake-provider-pipeline.mjs --write-env"
|
||||
setup_provides_env:
|
||||
- LANGBOT_FAKE_PROVIDER_URL
|
||||
- LANGBOT_FAKE_PROVIDER_BASE_URL
|
||||
- LANGBOT_FAKE_PROVIDER_PID
|
||||
- LANGBOT_FAKE_PROVIDER_PROVIDER_UUID
|
||||
- LANGBOT_FAKE_PROVIDER_MODEL_UUID
|
||||
- LANGBOT_FAKE_PROVIDER_PIPELINE_URL
|
||||
- LANGBOT_FAKE_PROVIDER_PIPELINE_NAME
|
||||
steps:
|
||||
- "Configure the local fake provider to return HTTP 503 for the first two chat completions after reset."
|
||||
- "Create or update the LangBot provider, model, and local-agent pipeline that points at the fake provider."
|
||||
- "Reset the target Debug Chat session and fake-provider request counter."
|
||||
- "Send a sequential Debug Chat batch and verify later requests recover after the injected provider faults."
|
||||
checks:
|
||||
- "automation-result.json status is pass when the fake provider records at least two injected faults, every Debug Chat request succeeds, and total user-visible error rate stays at zero."
|
||||
- "metrics_summary includes fake_provider_fault_count and status_counts for the same run window."
|
||||
- "backend logs show request handling for the same run window without unexpected Traceback or task-leak findings."
|
||||
evidence_required:
|
||||
- metrics
|
||||
- network
|
||||
- api_diagnostic
|
||||
- filesystem
|
||||
diagnostics:
|
||||
- "This is a fault-recovery probe, not a throughput benchmark."
|
||||
- "Provider faults may be retried inside the provider/requester path; judge this case by fake_provider_fault_count plus user-visible success/error metrics."
|
||||
- "The profile uses concurrency 1 because Debug Chat broadcasts assistant responses to every connection in a session, and failed responses do not carry the unique success token needed for concurrent attribution."
|
||||
success_patterns:
|
||||
- "Debug Chat WebSocket concurrency probe passed"
|
||||
- "Streaming completed"
|
||||
failure_patterns:
|
||||
- "fake_provider_fault"
|
||||
- "HTTP 503"
|
||||
- "Timed out after"
|
||||
- "All models failed during streaming setup"
|
||||
expected_failures:
|
||||
- "fake_provider_fault"
|
||||
- "HTTP 503"
|
||||
troubleshooting:
|
||||
- backend-not-listening
|
||||
- debug-chat-history-contaminates-automation
|
||||
- local-agent-model-route-unavailable
|
||||
@@ -0,0 +1,88 @@
|
||||
id: langbot-fake-provider-debug-chat-slow-load
|
||||
title: "LangBot Debug Chat slow fake-provider load probe"
|
||||
mode: probe
|
||||
area: performance
|
||||
type: performance
|
||||
priority: p1
|
||||
risk: medium
|
||||
ci_eligible: false
|
||||
tags:
|
||||
- performance
|
||||
- debug-chat
|
||||
- websocket
|
||||
- fake-provider
|
||||
- slow-provider
|
||||
- load
|
||||
- metrics
|
||||
skills:
|
||||
- langbot-env-setup
|
||||
- langbot-testing
|
||||
env:
|
||||
- LANGBOT_BACKEND_URL
|
||||
- LANGBOT_FRONTEND_URL
|
||||
- LANGBOT_E2E_LOGIN_USER
|
||||
automation: skills/langbot-testing/probes/langbot-debug-chat-concurrency.mjs
|
||||
automation_env:
|
||||
- LANGBOT_BACKEND_URL
|
||||
- LANGBOT_E2E_LOGIN_USER
|
||||
- LANGBOT_FAKE_PROVIDER_PIPELINE_URL
|
||||
- LANGBOT_FAKE_PROVIDER_PIPELINE_NAME
|
||||
automation_pipeline_url_env: LANGBOT_FAKE_PROVIDER_PIPELINE_URL
|
||||
automation_pipeline_name_env: LANGBOT_FAKE_PROVIDER_PIPELINE_NAME
|
||||
automation_debug_chat_load_requests: "8"
|
||||
automation_debug_chat_load_concurrency: "4"
|
||||
automation_debug_chat_load_timeout_ms: "45000"
|
||||
automation_debug_chat_load_response_p95_ms: "10000"
|
||||
automation_debug_chat_load_first_response_p95_ms: "7000"
|
||||
automation_debug_chat_load_max_error_rate: "0"
|
||||
automation_debug_chat_load_expected_prefix: "SLOWQA"
|
||||
automation_debug_chat_load_prompt_template: '请只回复 "{expected}",不要解释,不要添加其他字符。'
|
||||
automation_debug_chat_load_stream: "true"
|
||||
automation_debug_chat_load_reset: "true"
|
||||
automation_fake_provider_first_token_delay_ms: "1000"
|
||||
automation_fake_provider_chunk_delay_ms: "250"
|
||||
automation_fake_provider_chunk_count: "4"
|
||||
automation_fake_provider_fail_first_n: "0"
|
||||
automation_fake_provider_fail_every_n: "0"
|
||||
automation_fake_provider_fault_status: "500"
|
||||
metrics_thresholds_json: '{"response_p95_ms":{"max":10000},"first_response_p95_ms":{"max":7000},"error_rate":{"max":0}}'
|
||||
load_profile_json: '{"requests":8,"concurrency":4,"path":"Pipeline Debug Chat WebSocket","provider":"controlled slow fake OpenAI-compatible provider","metric":"send-to-final-assistant-response","provider_profile":{"first_token_delay_ms":1000,"chunk_delay_ms":250,"chunk_count":4}}'
|
||||
setup_automation:
|
||||
- "node:scripts/e2e/ensure-fake-provider-pipeline.mjs --write-env"
|
||||
setup_provides_env:
|
||||
- LANGBOT_FAKE_PROVIDER_URL
|
||||
- LANGBOT_FAKE_PROVIDER_BASE_URL
|
||||
- LANGBOT_FAKE_PROVIDER_PID
|
||||
- LANGBOT_FAKE_PROVIDER_PROVIDER_UUID
|
||||
- LANGBOT_FAKE_PROVIDER_MODEL_UUID
|
||||
- LANGBOT_FAKE_PROVIDER_PIPELINE_URL
|
||||
- LANGBOT_FAKE_PROVIDER_PIPELINE_NAME
|
||||
steps:
|
||||
- "Configure the local fake provider with deterministic slow streaming latency."
|
||||
- "Create or update the LangBot provider, model, and local-agent pipeline that points at the fake provider."
|
||||
- "Reset the target Debug Chat session."
|
||||
- "Open concurrent WebSocket Debug Chat connections and send unique deterministic prompts through the real backend pipeline."
|
||||
checks:
|
||||
- "automation-result.json status is pass when every request receives its own expected assistant response."
|
||||
- "metrics_summary shows zero errors under the slow-provider profile."
|
||||
- "thresholds_summary shows response_p95_ms, first_response_p95_ms, and error_rate pass."
|
||||
evidence_required:
|
||||
- metrics
|
||||
- network
|
||||
- api_diagnostic
|
||||
- filesystem
|
||||
diagnostics:
|
||||
- "This probe keeps the model deterministic while injecting provider latency, so it catches backend timeout, streaming, and WebSocket backpressure issues without Space variability."
|
||||
- "Compare with langbot-fake-provider-debug-chat-load to separate fixed LangBot overhead from provider-latency amplification."
|
||||
success_patterns:
|
||||
- "Debug Chat WebSocket concurrency probe passed"
|
||||
- "Streaming completed"
|
||||
failure_patterns:
|
||||
- "WebSocket connection error"
|
||||
- "Timed out after"
|
||||
- "Final assistant response did not include"
|
||||
- "All models failed during streaming setup"
|
||||
troubleshooting:
|
||||
- backend-not-listening
|
||||
- debug-chat-history-contaminates-automation
|
||||
- local-agent-model-route-unavailable
|
||||
@@ -28,8 +28,10 @@ await ensureEvidence(paths);
|
||||
const startedAt = new Date();
|
||||
const metricsPath = resolve(paths.evidenceDir, "metrics.json");
|
||||
const samplesPath = resolve(paths.evidenceDir, "samples.json");
|
||||
const fakeProviderStatePath = resolve(paths.evidenceDir, "fake-provider-state.json");
|
||||
const resetDiagnosticPath = resolve(paths.evidenceDir, "debug-chat-reset-diagnostic.json");
|
||||
const backendUrl = env.LANGBOT_BACKEND_URL || "";
|
||||
const fakeProviderUrl = env.LANGBOT_FAKE_PROVIDER_URL || "";
|
||||
const pipelineUrl = env.LANGBOT_E2E_PIPELINE_URL || env.LANGBOT_PIPELINE_URL || "";
|
||||
const pipelineName = env.LANGBOT_E2E_PIPELINE_NAME || env.LANGBOT_PIPELINE_NAME || "";
|
||||
const sessionType = env.LANGBOT_DEBUG_CHAT_LOAD_SESSION_TYPE || env.LANGBOT_E2E_DEBUG_CHAT_SESSION_TYPE || "person";
|
||||
@@ -44,6 +46,12 @@ const resetBeforeRun = bool(env.LANGBOT_DEBUG_CHAT_LOAD_RESET, true);
|
||||
const responseP95BudgetMs = positiveNumber(env.LANGBOT_DEBUG_CHAT_LOAD_RESPONSE_P95_MS, defaultP95Budget(caseId));
|
||||
const firstResponseP95BudgetMs = positiveNumber(env.LANGBOT_DEBUG_CHAT_LOAD_FIRST_RESPONSE_P95_MS, 0);
|
||||
const maxErrorRate = positiveNumber(env.LANGBOT_DEBUG_CHAT_LOAD_MAX_ERROR_RATE, 0);
|
||||
const minErrorRate = positiveNumber(env.LANGBOT_DEBUG_CHAT_LOAD_MIN_ERROR_RATE, 0);
|
||||
const minErrorCount = nonNegativeInteger(env.LANGBOT_DEBUG_CHAT_LOAD_MIN_ERROR_COUNT, 0);
|
||||
const minOkCount = nonNegativeInteger(env.LANGBOT_DEBUG_CHAT_LOAD_MIN_OK_COUNT, 0);
|
||||
const minProviderFaultCount = nonNegativeInteger(env.LANGBOT_DEBUG_CHAT_LOAD_MIN_PROVIDER_FAULT_COUNT, 0);
|
||||
const failOnFinalMismatch = bool(env.LANGBOT_DEBUG_CHAT_LOAD_FAIL_ON_FINAL_MISMATCH, false);
|
||||
const failureSignals = textList(env.LANGBOT_E2E_FAILURE_SIGNALS || env.LANGBOT_DEBUG_CHAT_LOAD_FAILURE_SIGNALS || "");
|
||||
|
||||
const result = {
|
||||
source: "automation",
|
||||
@@ -67,11 +75,13 @@ const result = {
|
||||
timeout_ms: timeoutMs,
|
||||
stream,
|
||||
reset_before_run: resetBeforeRun,
|
||||
fail_on_final_mismatch: failOnFinalMismatch,
|
||||
},
|
||||
evidence: {
|
||||
network_log: paths.networkLog,
|
||||
metrics_json: metricsPath,
|
||||
samples_json: samplesPath,
|
||||
fake_provider_state_json: fakeProviderStatePath,
|
||||
debug_chat_reset_diagnostic_json: resetDiagnosticPath,
|
||||
automation_result_json: paths.automationResultJson,
|
||||
result_json: paths.resultJson,
|
||||
@@ -135,8 +145,14 @@ try {
|
||||
promptTemplate,
|
||||
expectedPrefix,
|
||||
stream,
|
||||
failOnFinalMismatch,
|
||||
failureSignals,
|
||||
});
|
||||
const loadDurationMs = performance.now() - loadStartedAt;
|
||||
const fakeProviderState = await readFakeProviderState(fakeProviderUrl);
|
||||
if (fakeProviderState) {
|
||||
await writeFile(fakeProviderStatePath, `${JSON.stringify(fakeProviderState, null, 2)}\n`, "utf8");
|
||||
}
|
||||
const metrics = buildMetrics({
|
||||
samples,
|
||||
totalRequests,
|
||||
@@ -146,6 +162,7 @@ try {
|
||||
backendUrl,
|
||||
pipelineId: pipeline.id,
|
||||
sessionType,
|
||||
fakeProviderState,
|
||||
});
|
||||
const thresholds = buildThresholds(metrics);
|
||||
const passed = Object.values(thresholds).every((item) => item.pass);
|
||||
@@ -165,11 +182,14 @@ try {
|
||||
first_response_p95_ms: metrics.first_response_ms.p95,
|
||||
throughput_rps: metrics.throughput_rps,
|
||||
status_counts: metrics.status_counts,
|
||||
fake_provider_request_count: metrics.fake_provider?.request_count ?? null,
|
||||
fake_provider_fault_count: metrics.fake_provider?.fault_count ?? null,
|
||||
};
|
||||
result.thresholds_summary = thresholds;
|
||||
result.artifacts = {
|
||||
metrics_json: metricsPath,
|
||||
samples_json: samplesPath,
|
||||
fake_provider_state_json: fakeProviderState ? fakeProviderStatePath : "",
|
||||
network_log: paths.networkLog,
|
||||
automation_result_json: paths.automationResultJson,
|
||||
result_json: paths.resultJson,
|
||||
@@ -215,6 +235,11 @@ function positiveInteger(value, fallback) {
|
||||
return Number.isInteger(parsed) && parsed > 0 ? parsed : fallback;
|
||||
}
|
||||
|
||||
function nonNegativeInteger(value, fallback) {
|
||||
const parsed = Number.parseInt(String(value ?? ""), 10);
|
||||
return Number.isInteger(parsed) && parsed >= 0 ? parsed : fallback;
|
||||
}
|
||||
|
||||
function positiveNumber(value, fallback) {
|
||||
const parsed = Number(value || "");
|
||||
return Number.isFinite(parsed) && parsed >= 0 ? parsed : fallback;
|
||||
@@ -227,6 +252,13 @@ function bool(value, fallback) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
function textList(value) {
|
||||
return String(value || "")
|
||||
.split(/\r?\n|,/)
|
||||
.map((item) => item.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
async function backendReachable(baseUrl) {
|
||||
try {
|
||||
const response = await fetch(`${baseUrl.replace(/\/$/, "")}/healthz`, {
|
||||
@@ -238,6 +270,38 @@ async function backendReachable(baseUrl) {
|
||||
}
|
||||
}
|
||||
|
||||
async function readFakeProviderState(rootUrl) {
|
||||
if (!rootUrl) return null;
|
||||
try {
|
||||
const response = await fetch(`${normalizeProviderRootUrl(rootUrl)}/__qa/config`, {
|
||||
signal: AbortSignal.timeout(3000),
|
||||
});
|
||||
const json = await response.json().catch(() => ({}));
|
||||
return {
|
||||
status: response.ok && json.ok === true ? "loaded" : "unavailable",
|
||||
url: normalizeProviderRootUrl(rootUrl),
|
||||
http_status: response.status,
|
||||
model: json.model || "",
|
||||
config: json.config || {},
|
||||
request_count: Number.isFinite(json.request_count) ? json.request_count : null,
|
||||
recent_requests: Array.isArray(json.recent_requests) ? json.recent_requests : [],
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
status: "unavailable",
|
||||
url: normalizeProviderRootUrl(rootUrl),
|
||||
reason: safeReason(error.message),
|
||||
request_count: null,
|
||||
recent_requests: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeProviderRootUrl(value) {
|
||||
const trimmed = String(value || "").trim().replace(/\/$/, "");
|
||||
return trimmed.endsWith("/v1") ? trimmed.slice(0, -3) : trimmed;
|
||||
}
|
||||
|
||||
function pipelineIdFromUrl(url) {
|
||||
if (!url) return "";
|
||||
try {
|
||||
@@ -314,6 +378,8 @@ function runSingleRequest({
|
||||
promptTemplate,
|
||||
expectedPrefix,
|
||||
stream,
|
||||
failOnFinalMismatch,
|
||||
failureSignals,
|
||||
}) {
|
||||
return new Promise((resolve) => {
|
||||
const expected = expectedForIndex(expectedPrefix, index);
|
||||
@@ -384,18 +450,22 @@ function runSingleRequest({
|
||||
|
||||
const content = String(data.data.content || "");
|
||||
if (content) sample.response_text = content;
|
||||
if (data.data.is_final === true) {
|
||||
const ok = sample.response_text.includes(expected);
|
||||
if (ok) {
|
||||
if (sample.first_response_ms === null && sentAt > 0) {
|
||||
sample.first_response_ms = rounded(performance.now() - sentAt);
|
||||
if (data.data.is_final === true) {
|
||||
const ok = sample.response_text.includes(expected);
|
||||
if (ok) {
|
||||
if (sample.first_response_ms === null && sentAt > 0) {
|
||||
sample.first_response_ms = rounded(performance.now() - sentAt);
|
||||
}
|
||||
finish("pass", "");
|
||||
} else if (matchesFailureSignal(sample.response_text, failureSignals)) {
|
||||
finish("app_error", `Assistant final response matched a failure signal: ${sample.response_text}`);
|
||||
} else if (failOnFinalMismatch && !containsLoadToken(sample.response_text, expectedPrefix)) {
|
||||
finish("mismatch", `Final assistant response did not include ${expected}: ${sample.response_text}`);
|
||||
} else {
|
||||
sample.foreign_response_count += 1;
|
||||
sample.last_foreign_response_text = sample.response_text;
|
||||
}
|
||||
finish("pass", "");
|
||||
} else {
|
||||
sample.foreign_response_count += 1;
|
||||
sample.last_foreign_response_text = sample.response_text;
|
||||
}
|
||||
}
|
||||
},
|
||||
onError(error) {
|
||||
finish("connection_error", `WebSocket connection error: ${error.message}`);
|
||||
@@ -428,6 +498,16 @@ function runSingleRequest({
|
||||
});
|
||||
}
|
||||
|
||||
function containsLoadToken(text, prefix) {
|
||||
const escaped = String(prefix).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
return new RegExp(`${escaped}-\\d{4}`).test(String(text || ""));
|
||||
}
|
||||
|
||||
function matchesFailureSignal(text, signals) {
|
||||
const lower = String(text || "").toLowerCase();
|
||||
return signals.some((signal) => lower.includes(signal.toLowerCase()));
|
||||
}
|
||||
|
||||
function openRawWebSocket(wsUrl, handlers) {
|
||||
const parsed = new URL(wsUrl);
|
||||
const secure = parsed.protocol === "wss:";
|
||||
@@ -605,7 +685,7 @@ function stats(values) {
|
||||
};
|
||||
}
|
||||
|
||||
function buildMetrics({ samples, totalRequests, concurrency, timeoutMs, loadDurationMs, backendUrl, pipelineId, sessionType }) {
|
||||
function buildMetrics({ samples, totalRequests, concurrency, timeoutMs, loadDurationMs, backendUrl, pipelineId, sessionType, fakeProviderState }) {
|
||||
const okSamples = samples.filter((sample) => sample.ok);
|
||||
const statusCounts = {};
|
||||
for (const sample of samples) {
|
||||
@@ -631,10 +711,25 @@ function buildMetrics({ samples, totalRequests, concurrency, timeoutMs, loadDura
|
||||
connected_ms: stats(samples.map((sample) => sample.connected_ms).filter(Number.isFinite)),
|
||||
first_response_ms: stats(okSamples.map((sample) => sample.first_response_ms).filter(Number.isFinite)),
|
||||
response_duration_ms: stats(okSamples.map((sample) => sample.response_duration_ms).filter(Number.isFinite)),
|
||||
fake_provider: summarizeFakeProviderState(fakeProviderState),
|
||||
samples,
|
||||
};
|
||||
}
|
||||
|
||||
function summarizeFakeProviderState(state) {
|
||||
if (!state) return null;
|
||||
const recentRequests = Array.isArray(state.recent_requests) ? state.recent_requests : [];
|
||||
return {
|
||||
status: state.status || "unknown",
|
||||
url: state.url || "",
|
||||
request_count: Number.isFinite(state.request_count) ? state.request_count : recentRequests.length,
|
||||
recent_request_count: recentRequests.length,
|
||||
fault_count: recentRequests.filter((request) => request?.should_fail === true).length,
|
||||
streamed_request_count: recentRequests.filter((request) => request?.stream === true).length,
|
||||
config: state.config || {},
|
||||
};
|
||||
}
|
||||
|
||||
function buildThresholds(metrics) {
|
||||
const thresholds = {
|
||||
error_rate: { actual: metrics.error_rate, max: maxErrorRate, pass: metrics.error_rate <= maxErrorRate },
|
||||
@@ -644,6 +739,35 @@ function buildThresholds(metrics) {
|
||||
pass: metrics.ok_count > 0 && metrics.response_duration_ms.p95 <= responseP95BudgetMs,
|
||||
},
|
||||
};
|
||||
if (minErrorRate > 0) {
|
||||
thresholds.error_rate_min = {
|
||||
actual: metrics.error_rate,
|
||||
min: minErrorRate,
|
||||
pass: metrics.error_rate >= minErrorRate,
|
||||
};
|
||||
}
|
||||
if (minErrorCount > 0) {
|
||||
thresholds.error_count_min = {
|
||||
actual: metrics.error_count,
|
||||
min: minErrorCount,
|
||||
pass: metrics.error_count >= minErrorCount,
|
||||
};
|
||||
}
|
||||
if (minOkCount > 0) {
|
||||
thresholds.ok_count_min = {
|
||||
actual: metrics.ok_count,
|
||||
min: minOkCount,
|
||||
pass: metrics.ok_count >= minOkCount,
|
||||
};
|
||||
}
|
||||
if (minProviderFaultCount > 0) {
|
||||
const actual = metrics.fake_provider?.fault_count ?? 0;
|
||||
thresholds.fake_provider_fault_count_min = {
|
||||
actual,
|
||||
min: minProviderFaultCount,
|
||||
pass: actual >= minProviderFaultCount,
|
||||
};
|
||||
}
|
||||
if (firstResponseP95BudgetMs > 0) {
|
||||
thresholds.first_response_p95_ms = {
|
||||
actual: metrics.first_response_ms.p95,
|
||||
|
||||
@@ -144,6 +144,14 @@ request because Debug Chat broadcasts messages to every connection in the same
|
||||
session; unique tokens prevent one connection from counting another
|
||||
connection's response as its own.
|
||||
|
||||
After the baseline passes, run `langbot-fake-provider-debug-chat-slow-load` to
|
||||
keep the same live backend path while injecting deterministic streaming latency.
|
||||
Run `langbot-fake-provider-debug-chat-fault-recovery` to inject bounded HTTP
|
||||
provider failures and require both observed failures and later successful
|
||||
requests. The fault-recovery case is deliberately sequential because failed
|
||||
Debug Chat responses do not carry a unique success token that can be attributed
|
||||
to one concurrent connection.
|
||||
|
||||
Use `langbot-space-debug-chat-concurrency-smoke` after the fake-provider
|
||||
baseline. It runs a deliberately small real Space-provider batch and reports
|
||||
user-visible latency, not pure LangBot overhead. Space/model/network failures
|
||||
@@ -156,6 +164,8 @@ Useful commands:
|
||||
|
||||
```bash
|
||||
rtk bin/lbs test run langbot-fake-provider-debug-chat-load --run-id langbot-fake-load-local
|
||||
rtk bin/lbs test run langbot-fake-provider-debug-chat-slow-load --run-id langbot-fake-slow-local
|
||||
rtk bin/lbs test run langbot-fake-provider-debug-chat-fault-recovery --run-id langbot-fake-fault-local
|
||||
rtk bin/lbs test run langbot-space-debug-chat-concurrency-smoke --run-id langbot-space-smoke-local
|
||||
rtk bin/lbs suite run langbot-debug-chat-load-gate --run-id langbot-debug-chat-load-local --include-manual-check
|
||||
```
|
||||
@@ -174,8 +184,8 @@ Use the smallest gate that answers the quality question:
|
||||
starting with Pipeline Debug Chat send-to-visible-completion latency. Run it
|
||||
only when the browser profile and target pipeline are ready.
|
||||
- `langbot-debug-chat-load-gate`: WebSocket Debug Chat load checks, starting
|
||||
with a controlled fake-provider baseline and optionally a low-volume real
|
||||
Space-provider smoke.
|
||||
with controlled fake-provider baseline, slow-provider, and fault-recovery
|
||||
profiles, plus an optional low-volume real Space-provider smoke.
|
||||
- `langbot-performance-reliability-gate`: combined starter gate for synthetic
|
||||
contracts plus live backend checks.
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
id: langbot-debug-chat-load-gate
|
||||
title: "LangBot Debug Chat load gate"
|
||||
description: "Message-path load checks for Pipeline Debug Chat: controlled fake-provider baseline plus optional real Space-provider smoke."
|
||||
description: "Message-path load checks for Pipeline Debug Chat: controlled fake-provider baseline, slow-provider and fault-recovery profiles, plus optional real Space-provider smoke."
|
||||
type: performance
|
||||
priority: p1
|
||||
tags:
|
||||
@@ -10,4 +10,6 @@ tags:
|
||||
- load
|
||||
cases:
|
||||
- langbot-fake-provider-debug-chat-load
|
||||
- langbot-fake-provider-debug-chat-slow-load
|
||||
- langbot-fake-provider-debug-chat-fault-recovery
|
||||
- langbot-space-debug-chat-concurrency-smoke
|
||||
|
||||
@@ -198,15 +198,45 @@ function validateCaseItem(root: string, item: StructuredItem, skillNames: Set<st
|
||||
errors.push(`${item.path}: '${key}' must be a positive integer string`);
|
||||
}
|
||||
}
|
||||
const loadMaxErrorRate = scalar(item.fields, "automation_debug_chat_load_max_error_rate");
|
||||
if (loadMaxErrorRate && (!/^(?:0(?:\.\d+)?|1(?:\.0+)?)$/.test(loadMaxErrorRate))) {
|
||||
errors.push(`${item.path}: 'automation_debug_chat_load_max_error_rate' must be a number string between 0 and 1`);
|
||||
for (const key of [
|
||||
"automation_debug_chat_load_min_error_count",
|
||||
"automation_debug_chat_load_min_ok_count",
|
||||
"automation_debug_chat_load_min_provider_fault_count",
|
||||
"automation_fake_provider_first_token_delay_ms",
|
||||
"automation_fake_provider_chunk_delay_ms",
|
||||
"automation_fake_provider_chunk_count",
|
||||
"automation_fake_provider_fail_first_n",
|
||||
"automation_fake_provider_fail_every_n",
|
||||
]) {
|
||||
const value = scalar(item.fields, key);
|
||||
if (value && (!/^\d+$/.test(value) || Number.parseInt(value, 10) < 0)) {
|
||||
errors.push(`${item.path}: '${key}' must be a non-negative integer string`);
|
||||
}
|
||||
}
|
||||
for (const key of ["automation_debug_chat_load_max_error_rate", "automation_debug_chat_load_min_error_rate"]) {
|
||||
const value = scalar(item.fields, key);
|
||||
if (value && (!/^(?:0(?:\.\d+)?|1(?:\.0+)?)$/.test(value))) {
|
||||
errors.push(`${item.path}: '${key}' must be a number string between 0 and 1`);
|
||||
}
|
||||
}
|
||||
const fakeProviderFaultStatus = scalar(item.fields, "automation_fake_provider_fault_status");
|
||||
if (fakeProviderFaultStatus) {
|
||||
const parsed = Number.parseInt(fakeProviderFaultStatus, 10);
|
||||
if (!/^\d+$/.test(fakeProviderFaultStatus) || parsed < 400 || parsed > 599) {
|
||||
errors.push(`${item.path}: 'automation_fake_provider_fault_status' must be an HTTP 4xx or 5xx status string`);
|
||||
}
|
||||
}
|
||||
const streamOutput = scalar(item.fields, "automation_stream_output");
|
||||
if (streamOutput && !["0", "1", "false", "true"].includes(streamOutput)) {
|
||||
errors.push(`${item.path}: 'automation_stream_output' must be one of 0, 1, false, or true`);
|
||||
}
|
||||
for (const key of ["automation_debug_chat_load_stream", "automation_debug_chat_load_reset"]) {
|
||||
for (const key of [
|
||||
"automation_debug_chat_load_stream",
|
||||
"automation_debug_chat_load_reset",
|
||||
"automation_debug_chat_load_fail_on_final_mismatch",
|
||||
"automation_fake_provider_fail_after_first_chunk",
|
||||
"automation_fake_provider_dynamic_response",
|
||||
]) {
|
||||
const value = scalar(item.fields, key);
|
||||
if (value && !["0", "1", "false", "true"].includes(value)) {
|
||||
errors.push(`${item.path}: '${key}' must be one of 0, 1, false, or true`);
|
||||
|
||||
@@ -122,10 +122,24 @@ export function automationEnvDefaults(item: StructuredItem, env: EnvSource = pro
|
||||
["automation_debug_chat_load_response_p95_ms", "LANGBOT_DEBUG_CHAT_LOAD_RESPONSE_P95_MS"],
|
||||
["automation_debug_chat_load_first_response_p95_ms", "LANGBOT_DEBUG_CHAT_LOAD_FIRST_RESPONSE_P95_MS"],
|
||||
["automation_debug_chat_load_max_error_rate", "LANGBOT_DEBUG_CHAT_LOAD_MAX_ERROR_RATE"],
|
||||
["automation_debug_chat_load_min_error_rate", "LANGBOT_DEBUG_CHAT_LOAD_MIN_ERROR_RATE"],
|
||||
["automation_debug_chat_load_min_error_count", "LANGBOT_DEBUG_CHAT_LOAD_MIN_ERROR_COUNT"],
|
||||
["automation_debug_chat_load_min_ok_count", "LANGBOT_DEBUG_CHAT_LOAD_MIN_OK_COUNT"],
|
||||
["automation_debug_chat_load_min_provider_fault_count", "LANGBOT_DEBUG_CHAT_LOAD_MIN_PROVIDER_FAULT_COUNT"],
|
||||
["automation_debug_chat_load_expected_prefix", "LANGBOT_DEBUG_CHAT_LOAD_EXPECTED_PREFIX"],
|
||||
["automation_debug_chat_load_prompt_template", "LANGBOT_DEBUG_CHAT_LOAD_PROMPT_TEMPLATE"],
|
||||
["automation_debug_chat_load_stream", "LANGBOT_DEBUG_CHAT_LOAD_STREAM"],
|
||||
["automation_debug_chat_load_reset", "LANGBOT_DEBUG_CHAT_LOAD_RESET"],
|
||||
["automation_debug_chat_load_fail_on_final_mismatch", "LANGBOT_DEBUG_CHAT_LOAD_FAIL_ON_FINAL_MISMATCH"],
|
||||
["automation_fake_provider_response_text", "LANGBOT_FAKE_PROVIDER_RESPONSE_TEXT"],
|
||||
["automation_fake_provider_first_token_delay_ms", "LANGBOT_FAKE_PROVIDER_FIRST_TOKEN_DELAY_MS"],
|
||||
["automation_fake_provider_chunk_delay_ms", "LANGBOT_FAKE_PROVIDER_CHUNK_DELAY_MS"],
|
||||
["automation_fake_provider_chunk_count", "LANGBOT_FAKE_PROVIDER_CHUNK_COUNT"],
|
||||
["automation_fake_provider_fail_first_n", "LANGBOT_FAKE_PROVIDER_FAIL_FIRST_N"],
|
||||
["automation_fake_provider_fail_every_n", "LANGBOT_FAKE_PROVIDER_FAIL_EVERY_N"],
|
||||
["automation_fake_provider_fault_status", "LANGBOT_FAKE_PROVIDER_FAULT_STATUS"],
|
||||
["automation_fake_provider_fail_after_first_chunk", "LANGBOT_FAKE_PROVIDER_FAIL_AFTER_FIRST_CHUNK"],
|
||||
["automation_fake_provider_dynamic_response", "LANGBOT_FAKE_PROVIDER_DYNAMIC_RESPONSE"],
|
||||
["automation_filesystem_checks_json", "LANGBOT_E2E_FILESYSTEM_CHECKS_JSON"],
|
||||
["automation_plugin_package", "LANGBOT_E2E_PLUGIN_PACKAGE"],
|
||||
["automation_expected_plugin_id", "LANGBOT_E2E_EXPECTED_PLUGIN_ID"],
|
||||
|
||||
Reference in New Issue
Block a user