LangBot/skills/scripts/e2e/local-agent-steering-debug-chat.mjs

#!/usr/bin/env node

import { writeFile } from "node:fs/promises";
import { env } from "node:process";
import {
  DEBUG_CHAT_FAILURE_SIGNALS,
  openPipelineDebugChat,
  setDebugChatStreamOutput,
  visibleDebugChatMessages,
  waitForDebugChatTextStable,
} from "./lib/debug-chat.mjs";
import {
  createBrowser,
  ensureEvidence,
  evidencePaths,
  exitCode,
  localIsoWithOffset,
  loadEnvFiles,
  pathExists,
  safeScreenshot,
  writeResult,
} from "./lib/langbot-e2e.mjs";

await loadEnvFiles();

const caseId = env.LBS_CASE_ID || "local-agent-steering-debug-chat";
const paths = evidencePaths(caseId);
await ensureEvidence(paths);

const backendUrl = (env.LANGBOT_BACKEND_URL || "").replace(/\/$/, "");
const pipelineUrl = env.LANGBOT_E2E_PIPELINE_URL || env.LANGBOT_LOCAL_AGENT_PIPELINE_URL || env.LANGBOT_PIPELINE_URL || "";
const pipelineName = env.LANGBOT_E2E_PIPELINE_NAME || env.LANGBOT_LOCAL_AGENT_PIPELINE_NAME || env.LANGBOT_PIPELINE_NAME || "";
const expectedRunnerId = env.LANGBOT_E2E_EXPECTED_RUNNER_ID || "plugin:langbot/local-agent/default";
const expectedText = env.LANGBOT_E2E_EXPECTED_TEXT || "qa_steering_sentinel_6194";
const responseTimeoutMs = positiveInt(env.LANGBOT_E2E_RESPONSE_TIMEOUT_MS, 240000);
const followupDelayMs = 1000;
const followupEnabledTimeoutMs = 1500;
const firstPrompt = env.LANGBOT_E2E_PROMPT || [
  "You are running the LangBot steering E2E test.",
  "First call the qa_plugin_sleep tool with seconds=8 and text=steering-e2e-anchor.",
  "Do not answer before the tool result is available.",
  "After the tool returns, answer the latest user follow-up.",
  "If no follow-up was injected, reply only STEERING_NO_FOLLOWUP.",
].join(" ");
const followupPrompt = [
  "This is a steering follow-up sent while the first tool call is still active.",
  `Return only ${expectedText}.`,
].join(" ");

const pipelineConfigDiagnosticPath = `${paths.evidenceDir}/pipeline-config-diagnostic.json`;
const debugChatResetDiagnosticPath = `${paths.evidenceDir}/debug-chat-reset-diagnostic.json`;
const toolDiagnosticPath = `${paths.evidenceDir}/tool-diagnostic.json`;

let browser;
const result = {
  source: "automation",
  case_id: caseId,
  run_id: paths.runId,
  status: "fail",
  reason: "",
  started_at: new Date().toISOString(),
  started_at_local: localIsoWithOffset(new Date()),
  url: "",
  backend_url: backendUrl,
  pipeline_url: pipelineUrl,
  pipeline_name: pipelineName,
  expected_runner_id: expectedRunnerId,
  first_prompt: firstPrompt,
  followup_prompt: followupPrompt,
  expected_text: expectedText,
  followup_delay_ms: followupDelayMs,
  followup_enabled_timeout_ms: followupEnabledTimeoutMs,
  response_timeout_ms: responseTimeoutMs,
  pipeline_config: null,
  debug_chat_reset: null,
  tool_diagnostic: null,
  steering: null,
  evidence: {
    console_log: paths.consoleLog,
    network_log: paths.networkLog,
    screenshot: paths.screenshot,
    automation_result_json: paths.automationResultJson,
    result_json: paths.resultJson,
  },
  evidence_collected: ["ui", "console", "network", "screenshot"],
};

try {
  if (!backendUrl) {
    result.status = "env_issue";
    result.reason = "LANGBOT_BACKEND_URL is required.";
    throw new Error(result.reason);
  }

  browser = await createBrowser(paths);
  const { page } = browser;

  const openResult = await openPipelineDebugChat(page, {
    pipelineUrl,
    pipelineName,
    envHint: "case-specific pipeline env mapped to LANGBOT_E2E_PIPELINE_URL or LANGBOT_E2E_PIPELINE_NAME",
  });
  result.url = page.url();
  if (!openResult.opened) {
    result.status = openResult.status;
    result.reason = openResult.reason;
  } else {
    const pipelineDiagnostic = await inspectPipeline(page, {
      backendUrl,
      pipelineUrl,
      pipelineName,
      expectedRunnerId,
    });
    await writeFile(pipelineConfigDiagnosticPath, `${JSON.stringify(pipelineDiagnostic, null, 2)}\n`, "utf8");
    result.evidence.pipeline_config_diagnostic_json = pipelineConfigDiagnosticPath;
    result.pipeline_config = pipelineDiagnostic;
    if (!result.evidence_collected.includes("api_diagnostic")) result.evidence_collected.push("api_diagnostic");

    const toolDiagnostic = await inspectToolNames(page, { backendUrl });
    await writeFile(toolDiagnosticPath, `${JSON.stringify(toolDiagnostic, null, 2)}\n`, "utf8");
    result.evidence.tool_diagnostic_json = toolDiagnosticPath;
    result.tool_diagnostic = toolDiagnostic;

    if (pipelineDiagnostic.status === "fail" || pipelineDiagnostic.status === "blocked") {
      result.status = pipelineDiagnostic.status;
      result.reason = pipelineDiagnostic.reason || "Pipeline diagnostic failed.";
    } else if (toolDiagnostic.status === "fail" || toolDiagnostic.status === "blocked") {
      result.status = toolDiagnostic.status;
      result.reason = toolDiagnostic.reason || "Tool diagnostic failed.";
    } else if (!toolDiagnostic.tool_names.includes("qa_plugin_sleep")) {
      result.status = "blocked";
      result.reason = "qa_plugin_sleep is not exposed by /api/v1/tools; rebuild/reinstall qa-plugin-smoke before running steering E2E.";
    } else {
      const resetDiagnostic = await resetPipelineDebugChat(page, {
        backendUrl,
        pipelineId: pipelineDiagnostic.pipeline_id,
        sessionType: "person",
      });
      await writeFile(debugChatResetDiagnosticPath, `${JSON.stringify(resetDiagnostic, null, 2)}\n`, "utf8");
      result.evidence.debug_chat_reset_diagnostic_json = debugChatResetDiagnosticPath;
      result.debug_chat_reset = resetDiagnostic;

      if (resetDiagnostic.status === "fail" || resetDiagnostic.status === "blocked") {
        result.status = resetDiagnostic.status;
        result.reason = resetDiagnostic.reason || "Debug Chat reset failed.";
      } else {
        await page.waitForTimeout(1000);
        const reopenResult = await openPipelineDebugChat(page, {
          pipelineUrl,
          pipelineName,
          envHint: "case-specific pipeline env mapped to LANGBOT_E2E_PIPELINE_URL or LANGBOT_E2E_PIPELINE_NAME",
        });
        result.url = page.url();
        if (!reopenResult.opened) {
          result.status = reopenResult.status;
          result.reason = reopenResult.reason;
        } else {
          const streamResult = await setDebugChatStreamOutput(page, true);
          if (streamResult.status === "blocked" || streamResult.status === "fail") {
            result.status = streamResult.status;
            result.reason = streamResult.reason;
          } else {
            result.steering = await runSteeringProbe(page);
            result.status = result.steering.status;
            result.reason = result.steering.reason;
          }
        }
      }
    }
  }
} catch (error) {
  if (!["env_issue", "blocked", "fail", "pass"].includes(result.status) || !result.reason) {
    result.status = /Playwright is not installed|LANGBOT_FRONTEND_URL/.test(error.message) ? "env_issue" : "fail";
  }
  result.reason = result.reason || error.message;
} finally {
  if (browser?.page) await safeScreenshot(browser.page, paths.screenshot);
  if (browser) await browser.close().catch(() => {});
  const finishedAt = new Date();
  result.finished_at = finishedAt.toISOString();
  result.finished_at_local = localIsoWithOffset(finishedAt);
  const existingEvidence = {};
  for (const [key, value] of Object.entries(result.evidence)) {
    if (typeof value !== "string") continue;
    const isResultFile = value === paths.automationResultJson || value === paths.resultJson;
    if (isResultFile || await pathExists(value)) existingEvidence[key] = value;
  }
  result.evidence = existingEvidence;
  await writeResult(paths, result);
  console.log(JSON.stringify(result, null, 2));
}

process.exit(exitCode(result.status));

async function runSteeringProbe(page) {
  const beforeMessages = await visibleDebugChatMessages(page);
  const beforeAssistantCount = countRole(beforeMessages, "assistant");
  const beforeUserCount = countRole(beforeMessages, "user");
  const firstStartedAt = Date.now();
  const firstSend = await sendPrompt(page, firstPrompt, { enabledTimeoutMs: 5000 });
  if (!firstSend.sent) {
    return {
      status: "fail",
      reason: firstSend.reason || "Could not send first Debug Chat prompt.",
      first_send: firstSend,
      before_assistant_count: beforeAssistantCount,
      before_user_count: beforeUserCount,
    };
  }

  await page.waitForTimeout(followupDelayMs);
  const preFollowupMessages = await visibleDebugChatMessages(page);
  const preFollowupAssistantCount = countRole(preFollowupMessages, "assistant");
  const followupStartedAt = Date.now();
  const followupSend = await sendPrompt(page, followupPrompt, { enabledTimeoutMs: followupEnabledTimeoutMs });
  const followupSentAt = Date.now();
  if (!followupSend.sent) {
    return {
      status: "fail",
      reason: followupSend.reason || "Could not send steering follow-up while the first run was active.",
      first_send: firstSend,
      followup_send: followupSend,
      first_to_followup_attempt_ms: followupStartedAt - firstStartedAt,
      followup_send_latency_ms: followupSentAt - followupStartedAt,
      before_assistant_count: beforeAssistantCount,
      pre_followup_assistant_count: preFollowupAssistantCount,
      before_user_count: beforeUserCount,
    };
  }

  const waitResult = await waitForLatestAssistantContaining(page, {
    expectedText,
    beforeAssistantCount,
    timeoutMs: responseTimeoutMs,
  });
  await waitForDebugChatTextStable(page);
  const afterMessages = await visibleDebugChatMessages(page);
  const afterAssistantCount = countRole(afterMessages, "assistant");
  const afterUserCount = countRole(afterMessages, "user");
  const latestAssistantText = latestRoleText(afterMessages, "assistant");
  const failureSignal = findFailureSignal(latestAssistantText) || findFailureSignal(messagesText(afterMessages));
  const newAssistantCount = afterAssistantCount - beforeAssistantCount;
  const newUserCount = afterUserCount - beforeUserCount;

  const base = {
    first_send: firstSend,
    followup_send: followupSend,
    first_to_followup_attempt_ms: followupStartedAt - firstStartedAt,
    followup_send_latency_ms: followupSentAt - followupStartedAt,
    before_assistant_count: beforeAssistantCount,
    pre_followup_assistant_count: preFollowupAssistantCount,
    after_assistant_count: afterAssistantCount,
    new_assistant_count: newAssistantCount,
    before_user_count: beforeUserCount,
    after_user_count: afterUserCount,
    new_user_count: newUserCount,
    latest_assistant_text: latestAssistantText,
    assistant_containing_expected_seen: waitResult.seen,
    failure_signal: failureSignal,
  };

  if (failureSignal) {
    return {
      ...base,
      status: "fail",
      reason: `Debug Chat displayed a known failure signal: ${failureSignal}`,
    };
  }
  if (!waitResult.seen) {
    return {
      ...base,
      status: "fail",
      reason: `No new assistant message contained steering sentinel ${expectedText}.`,
    };
  }
  if (!latestAssistantText.includes(expectedText)) {
    return {
      ...base,
      status: "fail",
      reason: `Latest assistant message did not contain steering sentinel ${expectedText}.`,
    };
  }
  if (newUserCount < 2) {
    return {
      ...base,
      status: "fail",
      reason: `Expected two new user messages, saw ${newUserCount}.`,
    };
  }
  if (newAssistantCount !== 1) {
    return {
      ...base,
      status: "fail",
      reason: `Expected one assistant response for one claimed steering run, saw ${newAssistantCount}. More than one usually means the follow-up became a separate run.`,
    };
  }
  if (latestAssistantText.includes("STEERING_NO_FOLLOWUP")) {
    return {
      ...base,
      status: "fail",
      reason: "Runner answered the no-follow-up branch, so steering was not injected.",
    };
  }

  return {
    ...base,
    status: "pass",
    reason: `Follow-up sentinel ${expectedText} appeared in the only new assistant response after two user messages.`,
  };
}

function debugChatInput(page) {
  return page
    .locator('input[placeholder*="message"], input[placeholder*="消息"], textarea[placeholder*="message"], textarea[placeholder*="消息"]')
    .last();
}

async function sendPrompt(page, prompt, { enabledTimeoutMs }) {
  const input = debugChatInput(page);
  const inputVisible = await input.isVisible({ timeout: 5000 }).catch(() => false);
  if (!inputVisible) return { sent: false, reason: "Debug Chat input is not visible." };
  const inputEnabled = await input.isEnabled({ timeout: enabledTimeoutMs }).catch(() => false);
  if (!inputEnabled) return { sent: false, reason: `Debug Chat input was not enabled within ${enabledTimeoutMs}ms.` };

  await input.fill(prompt).catch(async () => {
    await input.click();
    await input.pressSequentially(prompt);
  });
  await input.press("Enter");
  await page.getByText(prompt, { exact: false }).last().waitFor({ state: "visible", timeout: 10000 }).catch(() => {});
  return {
    sent: true,
    submitted_by: "keyboard_enter",
  };
}

async function waitForLatestAssistantContaining(page, { expectedText, beforeAssistantCount, timeoutMs }) {
  const deadline = Date.now() + timeoutMs;
  let lastMessages = [];
  let latestAssistantText = "";
  while (Date.now() < deadline) {
    const messages = await visibleDebugChatMessages(page);
    lastMessages = messages;
    latestAssistantText = latestRoleText(messages, "assistant");
    if (countRole(messages, "assistant") > beforeAssistantCount && latestAssistantText.includes(expectedText)) {
      return {
        seen: true,
        latest_assistant_text: latestAssistantText,
        messages,
      };
    }
    const failureSignal = findFailureSignal(latestAssistantText);
    if (failureSignal) {
      return {
        seen: false,
        latest_assistant_text: latestAssistantText,
        messages,
        failure_signal: failureSignal,
      };
    }
    await page.waitForTimeout(500);
  }
  return {
    seen: false,
    latest_assistant_text: latestAssistantText,
    messages: lastMessages,
  };
}

async function inspectPipeline(page, { backendUrl, pipelineUrl, pipelineName, expectedRunnerId }) {
  const pipelineIdFromUrl = pipelineIdFromUrlValue(pipelineUrl);
  return await page.evaluate(async ({ backendUrl, pipelineIdFromUrl, pipelineName, expectedRunnerId }) => {
    const token = localStorage.getItem("token");
    if (!token) {
      return {
        status: "blocked",
        authenticated: false,
        reason: "Browser profile has no localStorage token.",
      };
    }
    const getJson = async (path) => {
      const response = await fetch(`${backendUrl}${path}`, {
        headers: {
          Authorization: `Bearer ${token}`,
          "Content-Type": "application/json",
        },
      });
      return {
        status: response.status,
        json: await response.json().catch(() => ({})),
      };
    };

    let pipelineId = pipelineIdFromUrl;
    let matchedBy = pipelineId ? "url" : "";
    if (!pipelineId) {
      if (!pipelineName) {
        return {
          status: "blocked",
          authenticated: true,
          pipeline_resolved: false,
          reason: "Set LANGBOT_LOCAL_AGENT_PIPELINE_URL or LANGBOT_LOCAL_AGENT_PIPELINE_NAME.",
        };
      }
      const list = await getJson("/api/v1/pipelines");
      const pipelines = list.json.data?.pipelines || [];
      const match = pipelines.find((pipeline) => pipeline.name === pipelineName);
      if (!match) {
        return {
          status: "blocked",
          authenticated: true,
          pipeline_resolved: false,
          list_status: list.status,
          reason: `Could not find pipeline named ${pipelineName}.`,
        };
      }
      pipelineId = match.uuid;
      matchedBy = "name";
    }

    const loaded = await getJson(`/api/v1/pipelines/${encodeURIComponent(pipelineId)}`);
    const pipeline = loaded.json.data?.pipeline;
    if (loaded.status >= 400 || !pipeline) {
      return {
        status: "fail",
        authenticated: true,
        pipeline_resolved: false,
        pipeline_id: pipelineId,
        get_status: loaded.status,
        reason: loaded.json.msg || "Could not load pipeline.",
      };
    }
    const config = pipeline.config || {};
    const runner = config.ai?.runner || {};
    const runnerId = runner.id || runner.runner || "";
    if (!runnerId) {
      return {
        status: "blocked",
        authenticated: true,
        pipeline_resolved: true,
        pipeline_id: pipelineId,
        pipeline_name: pipeline.name,
        matched_by: matchedBy,
        reason: "Pipeline has no ai.runner.id or legacy ai.runner.runner.",
      };
    }
    if (expectedRunnerId && runnerId !== expectedRunnerId) {
      return {
        status: "blocked",
        authenticated: true,
        pipeline_resolved: true,
        pipeline_id: pipelineId,
        pipeline_name: pipeline.name,
        matched_by: matchedBy,
        runner_id: runnerId,
        expected_runner_id: expectedRunnerId,
        reason: `Pipeline runner mismatch: expected ${expectedRunnerId}, got ${runnerId}.`,
      };
    }
    return {
      status: "ready",
      authenticated: true,
      pipeline_resolved: true,
      pipeline_id: pipelineId,
      pipeline_name: pipeline.name,
      matched_by: matchedBy,
      runner_id: runnerId,
      expected_runner_id: expectedRunnerId || "",
    };
  }, { backendUrl, pipelineIdFromUrl, pipelineName, expectedRunnerId });
}

async function inspectToolNames(page, { backendUrl }) {
  return await page.evaluate(async ({ backendUrl }) => {
    const token = localStorage.getItem("token");
    if (!token) {
      return {
        status: "blocked",
        authenticated: false,
        tool_names: [],
        reason: "Browser profile has no localStorage token.",
      };
    }
    const response = await fetch(`${backendUrl}/api/v1/tools`, {
      headers: {
        Authorization: `Bearer ${token}`,
        "Content-Type": "application/json",
      },
    });
    const json = await response.json().catch(() => ({}));
    const toolNames = (json.data?.tools || [])
      .map((tool) => tool.name || tool.tool_name || tool.function?.name || "")
      .filter(Boolean)
      .sort();
    return {
      status: response.status >= 400 ? "fail" : "ready",
      authenticated: true,
      http_status: response.status,
      code: json.code ?? null,
      tool_names: toolNames,
      reason: response.status >= 400 ? json.msg || "Could not list tools." : "Tool list loaded.",
    };
  }, { backendUrl });
}

async function resetPipelineDebugChat(page, { backendUrl, pipelineId, sessionType }) {
  return await page.evaluate(async ({ backendUrl, pipelineId, sessionType }) => {
    const token = localStorage.getItem("token");
    if (!token) {
      return {
        status: "blocked",
        authenticated: false,
        pipeline_id: pipelineId,
        session_type: sessionType,
        reason: "Browser profile has no localStorage token.",
      };
    }
    const response = await fetch(
      `${backendUrl}/api/v1/pipelines/${encodeURIComponent(pipelineId)}/ws/reset/${encodeURIComponent(sessionType)}`,
      {
        method: "POST",
        headers: {
          Authorization: `Bearer ${token}`,
          "Content-Type": "application/json",
        },
      },
    );
    const json = await response.json().catch(() => ({}));
    return {
      status: response.status >= 400 ? "fail" : "ready",
      authenticated: true,
      pipeline_id: pipelineId,
      session_type: sessionType,
      reset_status: response.status,
      reset_code: json.code ?? null,
      reason: response.status >= 400 ? json.msg || "Debug Chat reset failed." : "Debug Chat session reset.",
    };
  }, { backendUrl, pipelineId, sessionType });
}

function pipelineIdFromUrlValue(value) {
  const match = String(value || "").match(/\/pipelines?\/([^/?#]+)/i);
  return match ? decodeURIComponent(match[1]) : "";
}

function countRole(messages, role) {
  return messages.filter((message) => message.role === role).length;
}

function latestRoleText(messages, role) {
  return messages.filter((message) => message.role === role).at(-1)?.text || "";
}

function messagesText(messages) {
  return messages.map((message) => message.text).join("\n");
}

function findFailureSignal(text) {
  return DEBUG_CHAT_FAILURE_SIGNALS.find((signal) => String(text || "").includes(signal)) || "";
}

function positiveInt(value, fallback) {
  const parsed = Number.parseInt(String(value || ""), 10);
  return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
}