mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-25 23:14:20 +00:00
test(skills): clarify manual QA perf gates
This commit is contained in:
@@ -117,27 +117,35 @@ bin/lbs suite plan langbot-user-path-performance-gate
|
||||
bin/lbs suite run langbot-user-path-performance-gate --run-id langbot-user-path-local --include-manual-check
|
||||
```
|
||||
|
||||
Controlled Debug Chat message-path load gate:
|
||||
Controlled Debug Chat message-path load gate (manual/non-required; run fake-provider cases serially when they share `LANGBOT_FAKE_PROVIDER_URL`):
|
||||
|
||||
```bash
|
||||
bin/lbs suite plan langbot-debug-chat-load-gate
|
||||
bin/lbs test run langbot-fake-provider-debug-chat-load --run-id langbot-fake-load-local
|
||||
bin/lbs test run langbot-fake-provider-debug-chat-slow-load --run-id langbot-fake-slow-local
|
||||
bin/lbs test run langbot-fake-provider-debug-chat-cross-pipeline-isolation --run-id langbot-fake-cross-pipeline-local
|
||||
bin/lbs test run langbot-fake-provider-debug-chat-fault-recovery --run-id langbot-fake-fault-local
|
||||
bin/lbs test run langbot-space-debug-chat-concurrency-smoke --run-id langbot-space-smoke-local
|
||||
```
|
||||
|
||||
Cross-pipeline Debug Chat isolation is a separate manual regression gate because
|
||||
current releases may fail it due to product bug #2286:
|
||||
|
||||
```bash
|
||||
bin/lbs suite plan langbot-debug-chat-isolation-gate
|
||||
bin/lbs suite run langbot-debug-chat-isolation-gate --run-id langbot-debug-chat-isolation-local --include-manual-check
|
||||
```
|
||||
|
||||
Start with `langbot-fake-provider-debug-chat-load`. It launches a local
|
||||
OpenAI-compatible fake provider, creates the matching provider/model/pipeline,
|
||||
then sends concurrent WebSocket Debug Chat messages through the real backend.
|
||||
Use `langbot-fake-provider-debug-chat-slow-load` to measure the same path under
|
||||
deterministic streaming latency. Use
|
||||
`langbot-fake-provider-debug-chat-cross-pipeline-isolation` to verify that
|
||||
concurrent Debug Chat traffic on two pipelines does not leak assistant
|
||||
responses across pipeline boundaries. Use
|
||||
`langbot-fake-provider-debug-chat-fault-recovery` to inject bounded provider
|
||||
HTTP failures and confirm later Debug Chat requests recover.
|
||||
HTTP failures and confirm later Debug Chat requests recover. Use the separate
|
||||
`langbot-debug-chat-isolation-gate` to verify that concurrent Debug Chat traffic
|
||||
on two pipelines does not leak assistant responses across pipeline boundaries;
|
||||
current releases may fail that gate because of #2286, so keep it out of the
|
||||
normal load gate until the product fix lands.
|
||||
Use `langbot-space-debug-chat-concurrency-smoke` only as a low-volume live
|
||||
provider smoke; it includes Space/model/network latency and should be compared
|
||||
against the fake-provider baseline before attributing failures to LangBot.
|
||||
|
||||
@@ -14,8 +14,8 @@ import {
|
||||
} from "./lib/langbot-e2e.mjs";
|
||||
|
||||
const caseId = "ensure-fake-provider-cross-pipelines";
|
||||
const DEFAULT_PIPELINE_A_NAME = "Agent QA Fake Provider Debug Chat A";
|
||||
const DEFAULT_PIPELINE_B_NAME = "Agent QA Fake Provider Debug Chat B";
|
||||
const DEFAULT_PIPELINE_A_NAME = "LangBot QA Fake Provider Debug Chat A";
|
||||
const DEFAULT_PIPELINE_B_NAME = "LangBot QA Fake Provider Debug Chat B";
|
||||
|
||||
await loadEnvFiles();
|
||||
const paths = evidencePaths(caseId);
|
||||
@@ -57,6 +57,8 @@ const result = {
|
||||
};
|
||||
|
||||
try {
|
||||
console.error(`[langbot-qa] configuring cross-pipeline QA fixtures: pipeline_a=\"${pipelineAName}\", pipeline_b=\"${pipelineBName}\"`);
|
||||
console.error("[langbot-qa] run these fake-provider setup/probe commands serially when they share LANGBOT_FAKE_PROVIDER_URL.");
|
||||
if (pipelineAName === pipelineBName) {
|
||||
throw new Error("LANGBOT_FAKE_PROVIDER_PIPELINE_A_NAME and LANGBOT_FAKE_PROVIDER_PIPELINE_B_NAME must be different.");
|
||||
}
|
||||
|
||||
@@ -16,8 +16,9 @@ import {
|
||||
|
||||
const RUNNER_ID = "local-agent";
|
||||
const DEFAULT_LOCAL_PASSWORD = "LangBotE2ELocalPass!2026";
|
||||
const DEFAULT_PIPELINE_NAME = "Agent QA Fake Provider Debug Chat";
|
||||
const DEFAULT_PIPELINE_NAME = "LangBot QA Fake Provider Debug Chat";
|
||||
const DEFAULT_PROVIDER_NAME = "LangBot QA Fake OpenAI Provider";
|
||||
const QA_RESOURCE_DESCRIPTION = "Managed by LangBot skills QA automation for controlled fake-provider Debug Chat tests. Safe to delete when local QA fixtures are no longer needed.";
|
||||
const DEFAULT_MODEL_NAME = "gpt-4o-mini";
|
||||
const DEFAULT_REQUESTER = "openai-chat-completions";
|
||||
|
||||
@@ -90,6 +91,8 @@ const result = {
|
||||
};
|
||||
|
||||
try {
|
||||
console.error(`[langbot-qa] configuring QA-owned fake-provider fixtures: provider=\"${providerName}\", pipeline=\"${pipelineName}\"`);
|
||||
console.error("[langbot-qa] this setup may create or update local QA provider/model/pipeline resources on the selected backend.");
|
||||
if (!backendUrl) {
|
||||
result.status = "env_issue";
|
||||
throw new Error("LANGBOT_BACKEND_URL is not configured.");
|
||||
@@ -488,7 +491,7 @@ async function ensurePipeline({ backendUrl, token, name, modelUuid }) {
|
||||
token,
|
||||
body: {
|
||||
name,
|
||||
description: "Local QA pipeline for controlled fake-provider Debug Chat load tests.",
|
||||
description: QA_RESOURCE_DESCRIPTION,
|
||||
emoji: "QA",
|
||||
},
|
||||
});
|
||||
@@ -555,7 +558,7 @@ async function ensurePipeline({ backendUrl, token, name, modelUuid }) {
|
||||
token,
|
||||
body: {
|
||||
name,
|
||||
description: "Local QA pipeline for controlled fake-provider Debug Chat load tests.",
|
||||
description: QA_RESOURCE_DESCRIPTION,
|
||||
emoji: "QA",
|
||||
config: updatedConfig,
|
||||
},
|
||||
|
||||
@@ -1413,6 +1413,7 @@
|
||||
"suites": [
|
||||
"agent-runner-release-gate",
|
||||
"core-smoke",
|
||||
"langbot-debug-chat-isolation-gate",
|
||||
"langbot-debug-chat-load-gate",
|
||||
"langbot-live-backend-gate",
|
||||
"langbot-performance-contract-gate",
|
||||
@@ -1480,10 +1481,27 @@
|
||||
"local-agent-basic-debug-chat"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langbot-debug-chat-isolation-gate",
|
||||
"title": "LangBot Debug Chat isolation gate",
|
||||
"description": "Manual/non-required cross-pipeline Debug Chat isolation gate. Current releases may fail this gate because of product bug #2286; use it as regression evidence after the routing fix lands.",
|
||||
"type": "reliability",
|
||||
"priority": "p1",
|
||||
"tags": [
|
||||
"reliability",
|
||||
"debug-chat",
|
||||
"websocket",
|
||||
"isolation",
|
||||
"concurrency"
|
||||
],
|
||||
"cases": [
|
||||
"langbot-fake-provider-debug-chat-cross-pipeline-isolation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "langbot-debug-chat-load-gate",
|
||||
"title": "LangBot Debug Chat load gate",
|
||||
"description": "Message-path load checks for Pipeline Debug Chat: controlled fake-provider baseline, slow-provider and fault-recovery profiles, plus optional real Space-provider smoke.",
|
||||
"description": "Manual/non-required message-path load checks for Pipeline Debug Chat: controlled fake-provider baseline, slow-provider and fault-recovery profiles, plus optional real Space-provider smoke. Cross-pipeline isolation is split into langbot-debug-chat-isolation-gate because current releases may fail it due to product bug #2286.",
|
||||
"type": "performance",
|
||||
"priority": "p1",
|
||||
"tags": [
|
||||
@@ -1495,7 +1513,6 @@
|
||||
"cases": [
|
||||
"langbot-fake-provider-debug-chat-load",
|
||||
"langbot-fake-provider-debug-chat-slow-load",
|
||||
"langbot-fake-provider-debug-chat-cross-pipeline-isolation",
|
||||
"langbot-fake-provider-debug-chat-fault-recovery",
|
||||
"langbot-space-debug-chat-concurrency-smoke"
|
||||
]
|
||||
|
||||
+3
@@ -68,6 +68,9 @@ diagnostics:
|
||||
- "This probe targets Debug Chat isolation under concurrent traffic from two pipelines."
|
||||
- "It is designed to expose regressions where global pipeline state causes one pipeline's assistant response to be delivered to another pipeline's Debug Chat session."
|
||||
- "Same-pipeline foreign responses are tolerated because Debug Chat intentionally broadcasts within the same pipeline/session; cross-pipeline tokens are never tolerated."
|
||||
- "Known product bug: current releases may fail this probe because Debug Chat replies can read singleton WebSocket proxy pipeline state after another pipeline overwrites it. See https://github.com/langbot-app/LangBot/issues/2286."
|
||||
expected_failures:
|
||||
- "https://github.com/langbot-app/LangBot/issues/2286"
|
||||
success_patterns:
|
||||
- "Debug Chat cross-pipeline isolation probe passed"
|
||||
failure_patterns:
|
||||
|
||||
@@ -3,6 +3,12 @@
|
||||
Use this reference when a QA request asks whether LangBot is fast enough,
|
||||
stable under load, or resilient to controlled faults.
|
||||
|
||||
These probes are manual/non-required QA gates unless a case or suite explicitly
|
||||
states otherwise. They depend on a live local backend, mutable QA fixtures, and
|
||||
operator-selected environment variables, so do not promote them to required CI
|
||||
checks until fake-provider isolation, ownership markers, and cleanup are in
|
||||
place.
|
||||
|
||||
## Scope
|
||||
|
||||
Treat `skills/` as the QA control plane:
|
||||
@@ -139,7 +145,12 @@ This is not a mocked backend test. It still exercises:
|
||||
|
||||
The fake provider is deterministic and can inject controlled latency or faults
|
||||
with `LANGBOT_FAKE_PROVIDER_*` variables, so it is the baseline for LangBot
|
||||
message-path overhead. The probe uses unique expected response tokens per
|
||||
message-path overhead. A fake-provider process keeps process-global config,
|
||||
request counters, and recent request history; run fake-provider probes serially
|
||||
or give each run its own provider instance. Concurrent probes against the same
|
||||
fake-provider URL can reset or reconfigure each other's metrics.
|
||||
|
||||
The probe uses unique expected response tokens per
|
||||
request because Debug Chat broadcasts messages to every connection in the same
|
||||
session; unique tokens prevent one connection from counting another
|
||||
connection's response as its own.
|
||||
@@ -160,16 +171,19 @@ When the fake provider is used, reports also include provider-side timing in
|
||||
|
||||
After the baseline passes, run `langbot-fake-provider-debug-chat-slow-load` to
|
||||
keep the same live backend path while injecting deterministic streaming latency.
|
||||
Run `langbot-fake-provider-debug-chat-cross-pipeline-isolation` to open
|
||||
concurrent Debug Chat connections against two fake-provider pipelines and fail
|
||||
if one pipeline receives the other pipeline's response token. This targets
|
||||
global pipeline-state regressions in the WebSocket Debug Chat path.
|
||||
Run `langbot-fake-provider-debug-chat-fault-recovery` to inject bounded HTTP
|
||||
provider failures and require both observed failures and later successful
|
||||
requests. The fault-recovery case is deliberately sequential because failed
|
||||
Debug Chat responses do not carry a unique success token that can be attributed
|
||||
to one concurrent connection.
|
||||
|
||||
Run `langbot-fake-provider-debug-chat-cross-pipeline-isolation` separately via
|
||||
`langbot-debug-chat-isolation-gate`. Current LangBot releases may fail it because
|
||||
of product bug [#2286](https://github.com/langbot-app/LangBot/issues/2286), where
|
||||
Debug Chat replies can read singleton WebSocket proxy pipeline state after a
|
||||
later message overwrites it. Treat that failure as regression evidence for the
|
||||
product fix rather than as a fake-provider latency finding.
|
||||
|
||||
Use `langbot-space-debug-chat-concurrency-smoke` after the fake-provider
|
||||
baseline. It runs a deliberately small real Space-provider batch and reports
|
||||
user-visible latency, not pure LangBot overhead. Space/model/network failures
|
||||
@@ -183,8 +197,8 @@ Useful commands:
|
||||
```bash
|
||||
rtk bin/lbs test run langbot-fake-provider-debug-chat-load --run-id langbot-fake-load-local
|
||||
rtk bin/lbs test run langbot-fake-provider-debug-chat-slow-load --run-id langbot-fake-slow-local
|
||||
rtk bin/lbs test run langbot-fake-provider-debug-chat-cross-pipeline-isolation --run-id langbot-fake-cross-pipeline-local
|
||||
rtk bin/lbs test run langbot-fake-provider-debug-chat-fault-recovery --run-id langbot-fake-fault-local
|
||||
rtk bin/lbs suite run langbot-debug-chat-isolation-gate --run-id langbot-debug-chat-isolation-local --include-manual-check
|
||||
rtk bin/lbs test run langbot-space-debug-chat-concurrency-smoke --run-id langbot-space-smoke-local
|
||||
rtk bin/lbs suite run langbot-debug-chat-load-gate --run-id langbot-debug-chat-load-local --include-manual-check
|
||||
```
|
||||
@@ -202,10 +216,13 @@ Use the smallest gate that answers the quality question:
|
||||
- `langbot-user-path-performance-gate`: browser-visible user path performance,
|
||||
starting with Pipeline Debug Chat send-to-visible-completion latency. Run it
|
||||
only when the browser profile and target pipeline are ready.
|
||||
- `langbot-debug-chat-load-gate`: WebSocket Debug Chat load checks, starting
|
||||
with controlled fake-provider baseline, slow-provider, cross-pipeline
|
||||
isolation, and fault-recovery profiles, plus an optional low-volume real
|
||||
Space-provider smoke.
|
||||
- `langbot-debug-chat-load-gate`: manual WebSocket Debug Chat load checks,
|
||||
starting with controlled fake-provider baseline, slow-provider, and
|
||||
fault-recovery profiles, plus an optional low-volume real Space-provider
|
||||
smoke. Run fake-provider cases serially when they share a provider URL.
|
||||
- `langbot-debug-chat-isolation-gate`: manual cross-pipeline Debug Chat
|
||||
isolation regression gate. Current releases may fail because of #2286; keep it
|
||||
separate from the normal load gate until that product fix lands.
|
||||
- `langbot-performance-reliability-gate`: combined starter gate for synthetic
|
||||
contracts plus live backend checks.
|
||||
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
id: langbot-debug-chat-isolation-gate
|
||||
title: "LangBot Debug Chat isolation gate"
|
||||
description: "Manual/non-required cross-pipeline Debug Chat isolation gate. Current releases may fail this gate because of product bug #2286; use it as regression evidence after the routing fix lands."
|
||||
type: reliability
|
||||
priority: p1
|
||||
tags:
|
||||
- reliability
|
||||
- debug-chat
|
||||
- websocket
|
||||
- isolation
|
||||
- concurrency
|
||||
cases:
|
||||
- langbot-fake-provider-debug-chat-cross-pipeline-isolation
|
||||
@@ -1,6 +1,6 @@
|
||||
id: langbot-debug-chat-load-gate
|
||||
title: "LangBot Debug Chat load gate"
|
||||
description: "Message-path load checks for Pipeline Debug Chat: controlled fake-provider baseline, slow-provider and fault-recovery profiles, plus optional real Space-provider smoke."
|
||||
description: "Manual/non-required message-path load checks for Pipeline Debug Chat: controlled fake-provider baseline, slow-provider and fault-recovery profiles, plus optional real Space-provider smoke. Cross-pipeline isolation is split into langbot-debug-chat-isolation-gate because current releases may fail it due to product bug #2286."
|
||||
type: performance
|
||||
priority: p1
|
||||
tags:
|
||||
@@ -11,6 +11,5 @@ tags:
|
||||
cases:
|
||||
- langbot-fake-provider-debug-chat-load
|
||||
- langbot-fake-provider-debug-chat-slow-load
|
||||
- langbot-fake-provider-debug-chat-cross-pipeline-isolation
|
||||
- langbot-fake-provider-debug-chat-fault-recovery
|
||||
- langbot-space-debug-chat-concurrency-smoke
|
||||
|
||||
Reference in New Issue
Block a user