Files
LangBot/skills/scripts/e2e/ensure-langrag-sentinel-kb.mjs
T
Junyan Chin e9dd584792 feat: MCP server + in-repo skills (agent-friendly platform) (#2269)
* feat(api): support global API key from config.yaml (api.global_api_key)

Accept a config-defined global API key anywhere a web-UI key is accepted
(X-API-Key / Bearer), with no login session and no DB record. Useful for
automated deployments and AI agents (HTTP API + MCP). Defaults to empty
(disabled); does not require the lbk_ prefix.

- templates/config.yaml: add api.global_api_key with security notes
- service/apikey.py: verify_api_key checks global key first (constant-time)
- docs/API_KEY_AUTH.md: document the global key + security guidance
- tests: cover global-key match, prefix-free, fallback-to-db, disabled

* feat(mcp): expose LangBot management as an MCP server at /mcp

Add an MCP (Model Context Protocol) server so external AI agents can manage a
LangBot instance. Reuses the same API-key auth as the HTTP API (including the
config.yaml global API key).

- pkg/api/mcp/server.py: FastMCP server wrapping the service layer; 21 curated
  tools across system/bots/pipelines/models/knowledge/mcp-servers/skills
- pkg/api/mcp/mount.py: ASGI dispatcher fronting Quart; authenticates /mcp
  requests with an API key, runs the streamable-HTTP session manager lifespan
- controller/main.py: serve the wrapped ASGI app via hypercorn (was run_task)
- web: new 'MCP' tab in the API integration dialog showing endpoint, auth, and
  client config; i18n for 8 locales
- tests/manual/mcp_smoke.py: e2e check (401 unauth, list tools, call tools)

Tool surface is intentionally curated (not all ~25 route groups) to keep the
agent surface small, safe, and maintainable. Extend deliberately.

* feat(skills): add in-repo skills/ as the single source of truth

Migrate the agent skills + QA/e2e test harness from the (now archived)
langbot-app/langbot-skills repo into LangBot/skills/, and add four new skills.

Migrated:
- langbot-plugin-dev, langbot-testing (e2e), langbot-env-setup,
  langbot-skills-maintenance, langbot-eba-adapter-dev
- the bin/lbs CLI (src/, test/, scripts/, schemas/, qa-agent-docs/)

New:
- langbot-dev      core backend + web development
- langbot-deploy   Docker/K8s deployment + config.yaml + global API key
- langbot-mcp-ops  operating the LangBot MCP server (/mcp)
- langbot-space-ops operating the Space marketplace MCP server

- src/cli.ts repoRoot(): recognize the skills assets root (skills.index.json +
  bin/lbs) so the CLI works when nested inside the LangBot repo
- README.md: unified skill catalog; skills.index.json regenerated

Parity with source verified: bin/lbs validate + node test suite match the
source repo (only the uncommitted .lbpkg build-artifact fixture differs).

* docs(agents): document agent-facing surfaces + API/MCP/skills sync rule

* docs(readme): add 'Built for AI Agents' section across all locales

Highlight MCP server, in-repo skills (single source of truth), AGENTS.md
sync rule, and llms.txt. Cross-link LangBot Space MCP marketplace.

* style(mcp): fix ruff format + prettier lint in MCP server and API panel

* style(web): prettier format MCP i18n locale entries

* docs(skills): note MCP instance control in dev/testing skills

All development-guidance skills now point to the LangBot instance MCP
server (/mcp) and the Space marketplace MCP server, reusing API keys.
2026-06-20 15:14:47 +08:00

294 lines
11 KiB
JavaScript

#!/usr/bin/env node
import { readFile, writeFile } from "node:fs/promises";
import { resolve } from "node:path";
import { env } from "node:process";
import {
apiJson,
ensureEvidence,
evidencePaths,
loadEnvFiles,
resetAndAuthLocalUser,
writeResult,
} from "./lib/langbot-e2e.mjs";
const caseId = env.LBS_CASE_ID || "ensure-langrag-sentinel-kb";
await loadEnvFiles();
const paths = evidencePaths(caseId);
await ensureEvidence(paths);
const backendUrl = env.LANGBOT_BACKEND_URL || "";
const user = env.LANGBOT_E2E_LOGIN_USER || "";
const password = env.LANGBOT_E2E_LOGIN_PASSWORD || "LangBotE2ELocalPass!2026";
const expectedText = env.LANGBOT_E2E_EXPECTED_TEXT || "azalea-cobalt-7421";
const query = env.LANGBOT_E2E_RETRIEVE_QUERY || "What is the local agent runner retrieval sentinel?";
const writeEnv = process.argv.includes("--write-env");
const checkOnly = process.argv.includes("--check-only");
const envLocalPath = resolve("skills/.env.local");
const kbName = env.LANGBOT_E2E_RAG_KB_NAME || "qa-local-agent-rag";
const sentinelPath = resolve(env.LANGBOT_E2E_RAG_SENTINEL_DOC || "skills/langbot-testing/fixtures/rag/sentinel-doc.txt");
const waitMs = Number(env.LANGBOT_E2E_RAG_WAIT_MS || 180_000);
const result = {
source: "automation",
case_id: caseId,
run_id: paths.runId,
status: "fail",
reason: "",
backend_url: backendUrl,
expected_text: expectedText,
query,
kb_uuid: "",
kb_name: "",
kb_created: false,
uploaded_file_id: "",
store_task_id: "",
embedding_model_uuid: "",
engine_plugin_id: "",
checked_bases: [],
file_statuses: [],
wrote_env: false,
evidence: {
automation_result_json: paths.automationResultJson,
result_json: paths.resultJson,
},
evidence_collected: ["api_diagnostic"],
};
try {
if (!backendUrl) throw new Error("LANGBOT_BACKEND_URL is not configured.");
if (!user) throw new Error("LANGBOT_E2E_LOGIN_USER is required.");
const auth = await resetAndAuthLocalUser({ backendUrl, user, password });
const basesResponse = await apiJson(backendUrl, "/api/v1/knowledge/bases", { token: auth.token });
if (basesResponse.status >= 400 || basesResponse.json.code !== 0) {
throw new Error(basesResponse.json.msg || `Failed to list knowledge bases: HTTP ${basesResponse.status}.`);
}
let bases = basesResponse.json.data?.bases || [];
await findSentinelBase(backendUrl, auth.token, bases, result);
if (!result.kb_uuid && !checkOnly) {
const targetBase = bases.find((base) => {
const uuid = base.uuid || base.id || "";
return (base.name || "") === kbName && !hasRetrieveFailure(result.checked_bases, uuid);
});
result.kb_uuid = targetBase?.uuid || targetBase?.id || "";
result.kb_name = targetBase?.name || kbName;
if (!result.kb_uuid) {
const setup = await createKnowledgeBase(backendUrl, auth.token, kbName);
result.kb_uuid = setup.kbUuid;
result.kb_name = kbName;
result.kb_created = true;
result.embedding_model_uuid = setup.embeddingModelUuid;
result.engine_plugin_id = setup.enginePluginId;
}
const upload = await uploadDocument(backendUrl, auth.token, sentinelPath);
result.uploaded_file_id = upload.fileId;
const store = await apiJson(backendUrl, `/api/v1/knowledge/bases/${encodeURIComponent(result.kb_uuid)}/files`, {
method: "POST",
token: auth.token,
body: { file_id: upload.fileId },
});
if (store.status >= 400 || store.json.code !== 0) {
throw new Error(store.json.msg || `Failed to store file in knowledge base: HTTP ${store.status}.`);
}
result.store_task_id = store.json.data?.task_id || "";
const ready = await waitForSentinel(backendUrl, auth.token, result.kb_uuid, query, expectedText, waitMs);
result.file_statuses = ready.fileStatuses;
if (ready.matched) {
result.checked_bases.push(ready.checked);
}
}
if (!result.kb_uuid) {
result.status = "env_issue";
result.reason = checkOnly
? `No existing knowledge base retrieved expected sentinel: ${expectedText}`
: `Could not create or verify LangRAG sentinel knowledge base: ${expectedText}`;
} else {
if (writeEnv) {
await upsertEnvLocal(envLocalPath, {
LANGBOT_LOCAL_AGENT_RAG_KB_UUID: result.kb_uuid,
});
result.wrote_env = true;
}
result.status = "pass";
result.reason = `Found LangRAG sentinel knowledge base: ${result.kb_uuid}`;
}
} catch (error) {
result.status = /not configured|required|No existing knowledge base/.test(error.message) ? "env_issue" : "fail";
result.reason = error.message;
} finally {
await writeResult(paths, result);
console.log(JSON.stringify(result, null, 2));
}
process.exit(result.status === "pass" ? 0 : result.status === "env_issue" ? 2 : 1);
async function findSentinelBase(backendUrl, token, bases, result) {
for (const base of bases) {
const uuid = base.uuid || base.id || "";
if (!uuid) continue;
const checked = await retrieveSentinel(backendUrl, token, uuid, base.name || "", result.query, result.expected_text);
result.checked_bases.push(checked);
if (checked.matched) {
result.kb_uuid = uuid;
result.kb_name = checked.name;
return;
}
}
}
async function createKnowledgeBase(backendUrl, token, name) {
const enginesResponse = await apiJson(backendUrl, "/api/v1/knowledge/engines", { token });
if (enginesResponse.status >= 400 || enginesResponse.json.code !== 0) {
throw new Error(enginesResponse.json.msg || `Failed to list knowledge engines: HTTP ${enginesResponse.status}.`);
}
const engines = enginesResponse.json.data?.engines || [];
const engine = engines.find((item) => item.plugin_id === "langbot-team/LangRAG")
|| engines.find((item) => JSON.stringify(item.name || item.label || "").includes("LangRAG"));
const enginePluginId = engine?.plugin_id || "";
if (!enginePluginId) throw new Error("LangRAG knowledge engine is not installed.");
const embeddingModelUuid = await pickEmbeddingModel(backendUrl, token);
const create = await apiJson(backendUrl, "/api/v1/knowledge/bases", {
method: "POST",
token,
body: {
name,
description: "Automated LangBot agent-runner RAG sentinel knowledge base.",
knowledge_engine_plugin_id: enginePluginId,
creation_settings: {
embedding_model_uuid: embeddingModelUuid,
index_type: "chunk",
chunk_size: 512,
overlap: 50,
},
retrieval_settings: {
top_k: 5,
search_type: "vector",
query_rewrite: "off",
rerank: "off",
context_window: 0,
},
},
});
const kbUuid = create.json.data?.uuid || "";
if (create.status >= 400 || create.json.code !== 0 || !kbUuid) {
throw new Error(create.json.msg || `Failed to create knowledge base: HTTP ${create.status}.`);
}
return { kbUuid, embeddingModelUuid, enginePluginId };
}
async function pickEmbeddingModel(backendUrl, token) {
const configured = env.LANGBOT_LOCAL_AGENT_RAG_EMBEDDING_MODEL_UUID || env.LANGBOT_RAG_EMBEDDING_MODEL_UUID || "";
if (configured) return configured;
const modelsResponse = await apiJson(backendUrl, "/api/v1/provider/models/embedding", { token });
if (modelsResponse.status >= 400 || modelsResponse.json.code !== 0) {
throw new Error(modelsResponse.json.msg || `Failed to list embedding models: HTTP ${modelsResponse.status}.`);
}
const models = modelsResponse.json.data?.models || [];
const preferred = models.find((model) => /chroma|MiniLM/i.test(model.name || ""))
|| models.find((model) => /text-embedding-3-small/i.test(model.name || ""))
|| [...models].sort((a, b) => (a.prefered_ranking ?? 9999) - (b.prefered_ranking ?? 9999))[0];
const uuid = preferred?.uuid || "";
if (!uuid) throw new Error("No embedding model is configured.");
return uuid;
}
async function uploadDocument(backendUrl, token, path) {
const bytes = await readFile(path);
const form = new FormData();
form.append("file", new Blob([bytes], { type: "text/plain" }), "sentinel-doc.txt");
const response = await fetch(`${backendUrl.replace(/\/$/, "")}/api/v1/files/documents`, {
method: "POST",
headers: {
Authorization: `Bearer ${token}`,
},
body: form,
});
const json = await response.json().catch(() => ({}));
const fileId = json.data?.file_id || "";
if (response.status >= 400 || json.code !== 0 || !fileId) {
throw new Error(json.msg || `Failed to upload sentinel document: HTTP ${response.status}.`);
}
return { fileId };
}
async function waitForSentinel(backendUrl, token, kbUuid, query, expectedText, timeoutMs) {
const started = Date.now();
let fileStatuses = [];
let lastChecked = null;
while (Date.now() - started < timeoutMs) {
const files = await apiJson(backendUrl, `/api/v1/knowledge/bases/${encodeURIComponent(kbUuid)}/files`, { token });
fileStatuses = files.json.data?.files || fileStatuses;
lastChecked = await retrieveSentinel(backendUrl, token, kbUuid, kbName, query, expectedText);
if (lastChecked.matched) {
return { matched: true, fileStatuses, checked: lastChecked };
}
if (fileStatuses.some((item) => item.status === "failed")) break;
await sleep(2_000);
}
result.reason = lastChecked?.msg
|| `LangRAG sentinel was not retrievable within ${timeoutMs}ms; file statuses: ${JSON.stringify(fileStatuses)}`;
result.kb_uuid = "";
return { matched: false, fileStatuses, checked: lastChecked };
}
async function retrieveSentinel(backendUrl, token, uuid, name, query, expectedText) {
const retrieve = await apiJson(backendUrl, `/api/v1/knowledge/bases/${encodeURIComponent(uuid)}/retrieve`, {
method: "POST",
token,
body: { query },
});
const text = JSON.stringify(retrieve.json.data?.results || []);
return {
uuid,
name,
http_status: retrieve.status,
code: retrieve.json.code ?? null,
msg: retrieve.json.msg || "",
matched: text.includes(expectedText),
};
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
function hasRetrieveFailure(checkedBases, uuid) {
const checked = checkedBases.find((item) => item.uuid === uuid);
return checked && (checked.http_status >= 500 || (typeof checked.code === "number" && checked.code < 0));
}
async function upsertEnvLocal(path, values) {
let text = "";
try {
text = await readFile(path, "utf8");
} catch {
text = "";
}
const lines = text.split(/\r?\n/);
const keys = new Set(Object.keys(values));
const output = [];
for (const line of lines) {
const match = line.match(/^([A-Z][A-Z0-9_]*)=/);
if (match && keys.has(match[1])) {
output.push(`${match[1]}=${values[match[1]]}`);
keys.delete(match[1]);
} else if (line !== "" || output.length > 0) {
output.push(line);
}
}
if (keys.size > 0 && output.length > 0 && output[output.length - 1] !== "") output.push("");
for (const key of keys) output.push(`${key}=${values[key]}`);
await writeFile(path, `${output.join("\n").replace(/\n+$/, "")}\n`, "utf8");
}