From 5bfa38cbf2c113b069e69065ced2bb60ae87c8c6 Mon Sep 17 00:00:00 2001
From: RockChinQ <rockchinq@gmail.com>
Date: Sat, 13 Jun 2026 08:01:18 -0400
Subject: [PATCH 01/12] feat(plugins): show plugin logs on detail page via
 Docs/Logs tablist

Add a Logs tab beside Documentation on the plugin detail page, showing
the output a plugin prints through the standard Python logger (per the
wiki style guide). Logs are captured from the plugin's stderr by the
plugin runtime and fetched on demand.

- Bump langbot-plugin pin to 0.4.4 (adds GET_PLUGIN_LOGS action)
- plugin_connector/handler: get_plugin_logs RPC client
- HTTP route GET /api/v1/plugins/<author>/<name>/logs (limit + level)
- Frontend: wrap detail right panel in Docs/Logs Tabs; PluginLogs
  component with level filter, manual + 3s auto refresh, bottom-follow
- i18n: 7 new keys across all 8 locales
---
 pyproject.toml                                |   2 +-
 .../pkg/api/http/controller/groups/plugins.py |  16 ++
 src/langbot/pkg/plugin/connector.py           |  10 ++
 src/langbot/pkg/plugin/handler.py             |  25 +++
 uv.lock                                       |   8 +-
 .../app/home/plugins/PluginDetailContent.tsx  |  33 +++-
 .../plugin-logs/PluginLogs.tsx                | 156 ++++++++++++++++++
 web/src/app/infra/entities/plugin/index.ts    |   7 +
 web/src/app/infra/http/BackendClient.ts       |  17 ++
 web/src/i18n/locales/en-US.ts                 |   8 +
 web/src/i18n/locales/es-ES.ts                 |   8 +
 web/src/i18n/locales/ja-JP.ts                 |   8 +
 web/src/i18n/locales/ru-RU.ts                 |   8 +
 web/src/i18n/locales/th-TH.ts                 |   7 +
 web/src/i18n/locales/vi-VN.ts                 |   8 +
 web/src/i18n/locales/zh-Hans.ts               |   7 +
 web/src/i18n/locales/zh-Hant.ts               |   7 +
 17 files changed, 328 insertions(+), 7 deletions(-)
 create mode 100644 web/src/app/home/plugins/components/plugin-installed/plugin-logs/PluginLogs.tsx
diff --git a/pyproject.toml b/pyproject.toml
index 0ec0d418..024aa021 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,7 +70,7 @@ dependencies = [
     "chromadb>=1.0.0,<2.0.0",
     "qdrant-client (>=1.15.1,<2.0.0)",
     "pyseekdb==1.1.0.post3",
-    "langbot-plugin==0.4.3",
+    "langbot-plugin==0.4.4",
     "asyncpg>=0.30.0",
     "line-bot-sdk>=3.19.0",
     "matrix-nio>=0.25.2",
diff --git a/src/langbot/pkg/api/http/controller/groups/plugins.py b/src/langbot/pkg/api/http/controller/groups/plugins.py
index 05a8a271..52b0fb20 100644
--- a/src/langbot/pkg/api/http/controller/groups/plugins.py
+++ b/src/langbot/pkg/api/http/controller/groups/plugins.py
@@ -271,6 +271,22 @@ class PluginsRouterGroup(group.RouterGroup):
             readme = await self.ap.plugin_connector.get_plugin_readme(author, plugin_name, language=language)
             return self.success(data={'readme': readme})
 
+        @self.route(
+            '/<author>/<plugin_name>/logs',
+            methods=['GET'],
+            auth_type=group.AuthType.USER_TOKEN_OR_API_KEY,
+        )
+        async def _(author: str, plugin_name: str) -> quart.Response:
+            try:
+                limit = int(quart.request.args.get('limit', 200))
+            except (TypeError, ValueError):
+                limit = 200
+            level = quart.request.args.get('level') or None
+            logs = await self.ap.plugin_connector.get_plugin_logs(
+                author, plugin_name, limit=limit, level=level
+            )
+            return self.success(data={'logs': logs})
+
         @self.route(
             '/<author>/<plugin_name>/icon',
             methods=['GET'],
diff --git a/src/langbot/pkg/plugin/connector.py b/src/langbot/pkg/plugin/connector.py
index 2bb9088d..12413e49 100644
--- a/src/langbot/pkg/plugin/connector.py
+++ b/src/langbot/pkg/plugin/connector.py
@@ -689,6 +689,16 @@ class PluginRuntimeConnector(ManagedRuntimeConnector):
     async def get_plugin_readme(self, plugin_author: str, plugin_name: str, language: str = 'en') -> str:
         return await self.handler.get_plugin_readme(plugin_author, plugin_name, language)
 
+    async def get_plugin_logs(
+        self,
+        plugin_author: str,
+        plugin_name: str,
+        limit: int = 200,
+        level: str | None = None,
+    ) -> list[dict[str, Any]]:
+        # Not cached: logs are live and change constantly.
+        return await self.handler.get_plugin_logs(plugin_author, plugin_name, limit, level)
+
     @alru_cache(ttl=5 * 60)
     async def get_plugin_assets(self, plugin_author: str, plugin_name: str, filepath: str) -> dict[str, Any]:
         return await self.handler.get_plugin_assets(plugin_author, plugin_name, filepath)
diff --git a/src/langbot/pkg/plugin/handler.py b/src/langbot/pkg/plugin/handler.py
index f5a8511e..3bd85ae6 100644
--- a/src/langbot/pkg/plugin/handler.py
+++ b/src/langbot/pkg/plugin/handler.py
@@ -953,6 +953,31 @@ class RuntimeConnectionHandler(handler.Handler):
 
         return readme_bytes.decode('utf-8')
 
+    async def get_plugin_logs(
+        self,
+        plugin_author: str,
+        plugin_name: str,
+        limit: int = 200,
+        level: str | None = None,
+    ) -> list[dict[str, Any]]:
+        """Get recent log lines captured from the plugin's stderr."""
+        try:
+            result = await self.call_action(
+                LangBotToRuntimeAction.GET_PLUGIN_LOGS,
+                {
+                    'plugin_author': plugin_author,
+                    'plugin_name': plugin_name,
+                    'limit': limit,
+                    'level': level,
+                },
+                timeout=20,
+            )
+        except Exception:
+            traceback.print_exc()
+            return []
+
+        return result.get('logs', [])
+
     async def get_plugin_assets(self, plugin_author: str, plugin_name: str, filepath: str) -> dict[str, Any]:
         """Get plugin assets"""
         result = await self.call_action(
diff --git a/uv.lock b/uv.lock
index 450328f5..24d8428e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2082,7 +2082,7 @@ requires-dist = [
     { name = "ebooklib", specifier = ">=0.18" },
     { name = "gewechat-client", specifier = ">=0.1.5" },
     { name = "html2text", specifier = ">=2024.2.26" },
-    { name = "langbot-plugin", specifier = "==0.4.3" },
+    { name = "langbot-plugin", specifier = "==0.4.4" },
     { name = "langchain", specifier = ">=0.2.0" },
     { name = "langchain-core", specifier = ">=1.3.3" },
     { name = "langchain-text-splitters", specifier = ">=1.1.2" },
@@ -2146,7 +2146,7 @@ dev = [
 
 [[package]]
 name = "langbot-plugin"
-version = "0.4.3"
+version = "0.4.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiofiles" },
@@ -2167,9 +2167,9 @@ dependencies = [
     { name = "watchdog" },
     { name = "websockets" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8e/f1/32ec67e8b8eb91159d2b9703f466cc2a763c8cea380dd56561efe793a55b/langbot_plugin-0.4.3.tar.gz", hash = "sha256:747fb78bc666cfac3842cb35130fa8323759dd8768fdaa1975099157a3749c6e", size = 309655, upload-time = "2026-06-13T04:58:10.279Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/68/1a/636c057f6e07a0c87dc7b9c1a373d73df82787b7706ba3ba1a95f633ce7c/langbot_plugin-0.4.4.tar.gz", hash = "sha256:8fdad2d22fe8360d2911557fac17f258f57e85f1a36bd50cd488cb44f61225a4", size = 312741, upload-time = "2026-06-13T11:59:36.772Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/28/05/84bd7537efd45fc02044ca9509973160a7d6d10520ff73e31424141a3a6c/langbot_plugin-0.4.3-py3-none-any.whl", hash = "sha256:46aca36e2193c18f9cf332460760dd7b9340ee2e96a57f2e4ae621c4d4c4b61c", size = 211384, upload-time = "2026-06-13T04:58:11.668Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/c6/3c313e4ec431fca68326f348bd2c7a61777d43c940bb46ae6c8ebfb66973/langbot_plugin-0.4.4-py3-none-any.whl", hash = "sha256:c91f082ca431539f34790e497e2f056f4e7030e46e0d2bf01a6114b055dd2feb", size = 214164, upload-time = "2026-06-13T11:59:38.053Z" },
 ]
 
 [[package]]
diff --git a/web/src/app/home/plugins/PluginDetailContent.tsx b/web/src/app/home/plugins/PluginDetailContent.tsx
index 829e6868..0d1e56e5 100644
--- a/web/src/app/home/plugins/PluginDetailContent.tsx
+++ b/web/src/app/home/plugins/PluginDetailContent.tsx
@@ -2,10 +2,12 @@ import { useEffect, useState } from 'react';
 import { useNavigate } from 'react-router-dom';
 import PluginForm from '@/app/home/plugins/components/plugin-installed/plugin-form/PluginForm';
 import PluginReadme from '@/app/home/plugins/components/plugin-installed/plugin-readme/PluginReadme';
+import PluginLogs from '@/app/home/plugins/components/plugin-installed/plugin-logs/PluginLogs';
 import PluginComponentList from '@/app/home/plugins/components/plugin-installed/PluginComponentList';
 import { useSidebarData } from '@/app/home/components/home-sidebar/SidebarDataContext';
 import { useTranslation } from 'react-i18next';
 import { Badge } from '@/components/ui/badge';
+import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs';
 import { Button } from '@/components/ui/button';
 import { Checkbox } from '@/components/ui/checkbox';
 import {
@@ -217,8 +219,35 @@ export default function PluginDetailContent({ id }: { id: string }) {
             {dangerZone}
           </div>
           <div className="hidden w-px shrink-0 bg-border md:block" />
-          <div className="min-w-0 flex-1 pb-6 md:min-h-0 md:overflow-y-auto md:overflow-x-hidden">
-            <PluginReadme pluginAuthor={pluginAuthor} pluginName={pluginName} />
+          <div className="flex min-w-0 flex-1 flex-col pb-6 md:min-h-0 md:overflow-hidden">
+            <Tabs defaultValue="docs" className="flex min-h-0 flex-1 flex-col">
+              <TabsList className="mb-2 shrink-0">
+                <TabsTrigger value="docs" className="flex-none px-4">
+                  {t('plugins.tabDocs')}
+                </TabsTrigger>
+                <TabsTrigger value="logs" className="flex-none px-4">
+                  {t('plugins.tabLogs')}
+                </TabsTrigger>
+              </TabsList>
+              <TabsContent
+                value="docs"
+                className="min-h-0 flex-1 md:overflow-y-auto md:overflow-x-hidden"
+              >
+                <PluginReadme
+                  pluginAuthor={pluginAuthor}
+                  pluginName={pluginName}
+                />
+              </TabsContent>
+              <TabsContent
+                value="logs"
+                className="min-h-0 flex-1 md:overflow-hidden"
+              >
+                <PluginLogs
+                  pluginAuthor={pluginAuthor}
+                  pluginName={pluginName}
+                />
+              </TabsContent>
+            </Tabs>
           </div>
         </div>
       </div>
diff --git a/web/src/app/home/plugins/components/plugin-installed/plugin-logs/PluginLogs.tsx b/web/src/app/home/plugins/components/plugin-installed/plugin-logs/PluginLogs.tsx
new file mode 100644
index 00000000..08643aad
--- /dev/null
+++ b/web/src/app/home/plugins/components/plugin-installed/plugin-logs/PluginLogs.tsx
@@ -0,0 +1,156 @@
+import { useCallback, useEffect, useRef, useState } from 'react';
+import { httpClient } from '@/app/infra/http/HttpClient';
+import { useTranslation } from 'react-i18next';
+import { PluginLogEntry } from '@/app/infra/entities/plugin';
+import { Button } from '@/components/ui/button';
+import {
+  Select,
+  SelectContent,
+  SelectItem,
+  SelectTrigger,
+  SelectValue,
+} from '@/components/ui/select';
+import { RefreshCw } from 'lucide-react';
+
+const LEVEL_OPTIONS = ['ALL', 'DEBUG', 'INFO', 'WARNING', 'ERROR'] as const;
+
+function levelClassName(level: string): string {
+  switch (level) {
+    case 'ERROR':
+    case 'CRITICAL':
+      return 'text-red-500';
+    case 'WARNING':
+      return 'text-amber-500';
+    case 'DEBUG':
+      return 'text-gray-400 dark:text-gray-500';
+    default:
+      return 'text-gray-700 dark:text-gray-300';
+  }
+}
+
+export default function PluginLogs({
+  pluginAuthor,
+  pluginName,
+}: {
+  pluginAuthor: string;
+  pluginName: string;
+}) {
+  const { t } = useTranslation();
+  const [logs, setLogs] = useState<PluginLogEntry[]>([]);
+  const [isLoading, setIsLoading] = useState(false);
+  const [level, setLevel] = useState<string>('ALL');
+  const [autoRefresh, setAutoRefresh] = useState(true);
+  const scrollRef = useRef<HTMLDivElement>(null);
+  const atBottomRef = useRef(true);
+
+  const fetchLogs = useCallback(() => {
+    setIsLoading(true);
+    httpClient
+      .getPluginLogs(
+        pluginAuthor,
+        pluginName,
+        500,
+        level === 'ALL' ? undefined : level,
+      )
+      .then((res) => {
+        setLogs(res.logs ?? []);
+      })
+      .catch(() => {
+        setLogs([]);
+      })
+      .finally(() => {
+        setIsLoading(false);
+      });
+  }, [pluginAuthor, pluginName, level]);
+
+  useEffect(() => {
+    fetchLogs();
+  }, [fetchLogs]);
+
+  // Auto-refresh poll loop.
+  useEffect(() => {
+    if (!autoRefresh) return;
+    const timer = setInterval(fetchLogs, 3000);
+    return () => clearInterval(timer);
+  }, [autoRefresh, fetchLogs]);
+
+  // Keep view pinned to bottom when the user is already at the bottom.
+  useEffect(() => {
+    const el = scrollRef.current;
+    if (el && atBottomRef.current) {
+      el.scrollTop = el.scrollHeight;
+    }
+  }, [logs]);
+
+  function handleScroll() {
+    const el = scrollRef.current;
+    if (!el) return;
+    atBottomRef.current = el.scrollHeight - el.scrollTop - el.clientHeight < 40;
+  }
+
+  return (
+    <div className="flex h-full flex-col">
+      <div className="flex shrink-0 flex-wrap items-center gap-2 px-6 pb-3">
+        <Select value={level} onValueChange={setLevel}>
+          <SelectTrigger className="h-8 w-[130px]">
+            <SelectValue />
+          </SelectTrigger>
+          <SelectContent>
+            {LEVEL_OPTIONS.map((opt) => (
+              <SelectItem key={opt} value={opt}>
+                {opt === 'ALL' ? t('plugins.logsLevelAll') : opt}
+              </SelectItem>
+            ))}
+          </SelectContent>
+        </Select>
+        <Button
+          type="button"
+          variant="outline"
+          size="sm"
+          className="h-8"
+          onClick={fetchLogs}
+          disabled={isLoading}
+        >
+          <RefreshCw
+            className={`mr-1.5 size-3.5 ${isLoading ? 'animate-spin' : ''}`}
+          />
+          {t('plugins.logsRefresh')}
+        </Button>
+        <Button
+          type="button"
+          variant={autoRefresh ? 'default' : 'outline'}
+          size="sm"
+          className="h-8"
+          onClick={() => setAutoRefresh((v) => !v)}
+        >
+          {autoRefresh
+            ? t('plugins.logsAutoRefreshOn')
+            : t('plugins.logsAutoRefreshOff')}
+        </Button>
+      </div>
+
+      <div
+        ref={scrollRef}
+        onScroll={handleScroll}
+        className="min-h-0 flex-1 overflow-auto bg-gray-50 px-6 py-3 font-mono text-xs leading-relaxed dark:bg-gray-900/40"
+      >
+        {logs.length === 0 ? (
+          <div className="py-8 text-center text-sm text-gray-500 dark:text-gray-400">
+            {t('plugins.logsEmpty')}
+          </div>
+        ) : (
+          logs.map((entry, idx) => (
+            <div
+              key={`${entry.ts}-${idx}`}
+              className={`whitespace-pre-wrap break-all ${levelClassName(
+                entry.level,
+              )}`}
+            >
+              {entry.text}
+            </div>
+          ))
+        )}
+      </div>
+    </div>
+  );
+}
diff --git a/web/src/app/infra/entities/plugin/index.ts b/web/src/app/infra/entities/plugin/index.ts
index ac9b9563..ad661211 100644
--- a/web/src/app/infra/entities/plugin/index.ts
+++ b/web/src/app/infra/entities/plugin/index.ts
@@ -21,6 +21,13 @@ export interface PluginComponent {
   };
 }
 
+// A single log line captured from a running plugin's stderr.
+export interface PluginLogEntry {
+  ts: number;
+  level: string;
+  text: string;
+}
+
 // marketplace plugin v4
 export enum PluginV4Status {
   Any = 'any',
diff --git a/web/src/app/infra/http/BackendClient.ts b/web/src/app/infra/http/BackendClient.ts
index e9cdb51c..b2f3f7b5 100644
--- a/web/src/app/infra/http/BackendClient.ts
+++ b/web/src/app/infra/http/BackendClient.ts
@@ -55,6 +55,7 @@ import {
   ApiRespSkill,
 } from '@/app/infra/entities/api';
 import { Plugin } from '@/app/infra/entities/plugin';
+import type { PluginLogEntry } from '@/app/infra/entities/plugin';
 import type { I18nObject } from '@/app/infra/entities/common';
 import { GetBotLogsRequest } from '@/app/infra/http/requestParam/bots/GetBotLogsRequest';
 import { GetBotLogsResponse } from '@/app/infra/http/requestParam/bots/GetBotLogsResponse';
@@ -604,6 +605,22 @@ export class BackendClient extends BaseHttpClient {
     );
   }
 
+  public getPluginLogs(
+    author: string,
+    name: string,
+    limit: number = 200,
+    level?: string,
+  ): Promise<{ logs: PluginLogEntry[] }> {
+    const params = new URLSearchParams();
+    params.set('limit', String(limit));
+    if (level) {
+      params.set('level', level);
+    }
+    return this.get(
+      `/api/v1/plugins/${author}/${name}/logs?${params.toString()}`,
+    );
+  }
+
   public getPluginAssetURL(
     author: string,
     name: string,
diff --git a/web/src/i18n/locales/en-US.ts b/web/src/i18n/locales/en-US.ts
index ddf1ab43..91f0b6f3 100644
--- a/web/src/i18n/locales/en-US.ts
+++ b/web/src/i18n/locales/en-US.ts
@@ -577,6 +577,14 @@ const enUS = {
     viewSource: 'View Source',
     loadingReadme: 'Loading documentation...',
     noReadme: 'This plugin does not provide README documentation',
+    tabDocs: 'Documentation',
+    tabLogs: 'Logs',
+    logsLevelAll: 'All levels',
+    logsRefresh: 'Refresh',
+    logsAutoRefreshOn: 'Auto-refresh: On',
+    logsAutoRefreshOff: 'Auto-refresh: Off',
+    logsEmpty:
+      'No logs yet. Logs printed by the plugin via logger will appear here.',
     fileUpload: {
       tooLarge: 'File size exceeds 10MB limit',
       success: 'File uploaded successfully',
diff --git a/web/src/i18n/locales/es-ES.ts b/web/src/i18n/locales/es-ES.ts
index 1e85f504..5f802815 100644
--- a/web/src/i18n/locales/es-ES.ts
+++ b/web/src/i18n/locales/es-ES.ts
@@ -589,6 +589,14 @@ const esES = {
     viewSource: 'Ver código fuente',
     loadingReadme: 'Cargando documentación...',
     noReadme: 'Este plugin no proporciona documentación README',
+    tabDocs: 'Documentación',
+    tabLogs: 'Registros',
+    logsLevelAll: 'Todos los niveles',
+    logsRefresh: 'Actualizar',
+    logsAutoRefreshOn: 'Auto-actualizar: Activado',
+    logsAutoRefreshOff: 'Auto-actualizar: Desactivado',
+    logsEmpty:
+      'Aún no hay registros. Los registros que el plugin imprima mediante logger aparecerán aquí.',
     fileUpload: {
       tooLarge: 'El tamaño del archivo supera el límite de 10MB',
       success: 'Archivo subido correctamente',
diff --git a/web/src/i18n/locales/ja-JP.ts b/web/src/i18n/locales/ja-JP.ts
index d87113c0..c8c65152 100644
--- a/web/src/i18n/locales/ja-JP.ts
+++ b/web/src/i18n/locales/ja-JP.ts
@@ -582,6 +582,14 @@ const jaJP = {
     viewSource: 'ソースを表示',
     loadingReadme: 'ドキュメントを読み込み中...',
     noReadme: 'このプラグインはREADMEドキュメントを提供していません',
+    tabDocs: 'ドキュメント',
+    tabLogs: 'ログ',
+    logsLevelAll: 'すべてのレベル',
+    logsRefresh: '更新',
+    logsAutoRefreshOn: '自動更新：オン',
+    logsAutoRefreshOff: '自動更新：オフ',
+    logsEmpty:
+      'ログはまだありません。プラグインが logger で出力したログがここに表示されます。',
     fileUpload: {
       tooLarge: 'ファイルサイズが 10MB の制限を超えています',
       success: 'ファイルのアップロードに成功しました',
diff --git a/web/src/i18n/locales/ru-RU.ts b/web/src/i18n/locales/ru-RU.ts
index 846e16f7..8ebb4fa2 100644
--- a/web/src/i18n/locales/ru-RU.ts
+++ b/web/src/i18n/locales/ru-RU.ts
@@ -588,6 +588,14 @@ const ruRU = {
     viewSource: 'Исходный код',
     loadingReadme: 'Загрузка документации...',
     noReadme: 'Этот плагин не предоставляет документацию README',
+    tabDocs: 'Документация',
+    tabLogs: 'Журналы',
+    logsLevelAll: 'Все уровни',
+    logsRefresh: 'Обновить',
+    logsAutoRefreshOn: 'Автообновление: вкл.',
+    logsAutoRefreshOff: 'Автообновление: выкл.',
+    logsEmpty:
+      'Журналов пока нет. Здесь появятся логи, выводимые плагином через logger.',
     fileUpload: {
       tooLarge: 'Размер файла превышает лимит 10 МБ',
       success: 'Файл успешно загружен',
diff --git a/web/src/i18n/locales/th-TH.ts b/web/src/i18n/locales/th-TH.ts
index 0922b31a..ac976402 100644
--- a/web/src/i18n/locales/th-TH.ts
+++ b/web/src/i18n/locales/th-TH.ts
@@ -569,6 +569,13 @@ const thTH = {
     viewSource: 'ดูซอร์สโค้ด',
     loadingReadme: 'กำลังโหลดเอกสาร...',
     noReadme: 'ปลั๊กอินนี้ไม่มีเอกสาร README',
+    tabDocs: 'เอกสาร',
+    tabLogs: 'บันทึก',
+    logsLevelAll: 'ทุกระดับ',
+    logsRefresh: 'รีเฟรช',
+    logsAutoRefreshOn: 'รีเฟรชอัตโนมัติ: เปิด',
+    logsAutoRefreshOff: 'รีเฟรชอัตโนมัติ: ปิด',
+    logsEmpty: 'ยังไม่มีบันทึก บันทึกที่ปลั๊กอินพิมพ์ผ่าน logger จะแสดงที่นี่',
     fileUpload: {
       tooLarge: 'ขนาดไฟล์เกินขีดจำกัด 10MB',
       success: 'อัปโหลดไฟล์สำเร็จ',
diff --git a/web/src/i18n/locales/vi-VN.ts b/web/src/i18n/locales/vi-VN.ts
index 47d51f2a..be1e7754 100644
--- a/web/src/i18n/locales/vi-VN.ts
+++ b/web/src/i18n/locales/vi-VN.ts
@@ -583,6 +583,14 @@ const viVN = {
     viewSource: 'Xem mã nguồn',
     loadingReadme: 'Đang tải tài liệu...',
     noReadme: 'Plugin này không cung cấp tài liệu README',
+    tabDocs: 'Tài liệu',
+    tabLogs: 'Nhật ký',
+    logsLevelAll: 'Tất cả cấp độ',
+    logsRefresh: 'Làm mới',
+    logsAutoRefreshOn: 'Tự động làm mới: Bật',
+    logsAutoRefreshOff: 'Tự động làm mới: Tắt',
+    logsEmpty:
+      'Chưa có nhật ký. Nhật ký do plugin in qua logger sẽ hiển thị ở đây.',
     fileUpload: {
       tooLarge: 'Kích thước tệp vượt quá giới hạn 10MB',
       success: 'Tải tệp lên thành công',
diff --git a/web/src/i18n/locales/zh-Hans.ts b/web/src/i18n/locales/zh-Hans.ts
index a7ef0a53..f32f039a 100644
--- a/web/src/i18n/locales/zh-Hans.ts
+++ b/web/src/i18n/locales/zh-Hans.ts
@@ -552,6 +552,13 @@ const zhHans = {
     viewSource: '查看来源',
     loadingReadme: '正在加载文档...',
     noReadme: '该插件没有提供 README 文档',
+    tabDocs: '文档',
+    tabLogs: '日志',
+    logsLevelAll: '全部级别',
+    logsRefresh: '刷新',
+    logsAutoRefreshOn: '自动刷新：开',
+    logsAutoRefreshOff: '自动刷新：关',
+    logsEmpty: '暂无日志。插件通过 logger 打印的日志会显示在这里。',
     fileUpload: {
       tooLarge: '文件大小超过 10MB 限制',
       success: '文件上传成功',
diff --git a/web/src/i18n/locales/zh-Hant.ts b/web/src/i18n/locales/zh-Hant.ts
index 1d0d9da2..539b34c4 100644
--- a/web/src/i18n/locales/zh-Hant.ts
+++ b/web/src/i18n/locales/zh-Hant.ts
@@ -552,6 +552,13 @@ const zhHant = {
     viewSource: '查看來源',
     loadingReadme: '正在載入文件...',
     noReadme: '該插件沒有提供 README 文件',
+    tabDocs: '文件',
+    tabLogs: '日誌',
+    logsLevelAll: '全部級別',
+    logsRefresh: '重新整理',
+    logsAutoRefreshOn: '自動重新整理：開',
+    logsAutoRefreshOff: '自動重新整理：關',
+    logsEmpty: '暫無日誌。外掛透過 logger 列印的日誌會顯示在這裡。',
     fileUpload: {
       tooLarge: '檔案大小超過 10MB 限制',
       success: '檔案上傳成功',

From b6fde30aa7de42a31bee0e4c97db230c3d5f7340 Mon Sep 17 00:00:00 2001
From: RockChinQ <rockchinq@gmail.com>
Date: Sat, 13 Jun 2026 08:03:29 -0400
Subject: [PATCH 02/12] style(plugins): ruff format logs route

---
 src/langbot/pkg/api/http/controller/groups/plugins.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/langbot/pkg/api/http/controller/groups/plugins.py b/src/langbot/pkg/api/http/controller/groups/plugins.py
index 52b0fb20..c291c123 100644
--- a/src/langbot/pkg/api/http/controller/groups/plugins.py
+++ b/src/langbot/pkg/api/http/controller/groups/plugins.py
@@ -282,9 +282,7 @@ class PluginsRouterGroup(group.RouterGroup):
             except (TypeError, ValueError):
                 limit = 200
             level = quart.request.args.get('level') or None
-            logs = await self.ap.plugin_connector.get_plugin_logs(
-                author, plugin_name, limit=limit, level=level
-            )
+            logs = await self.ap.plugin_connector.get_plugin_logs(author, plugin_name, limit=limit, level=level)
             return self.success(data={'logs': logs})
 
         @self.route(

From 7fe3eedeea189e266b1d430ecd3aaaa666260c0a Mon Sep 17 00:00:00 2001
From: huanghuoguoguo <1051233107@qq.com>
Date: Sat, 13 Jun 2026 21:27:47 +0800
Subject: [PATCH 03/12] fix(provider): use LiteLLM input window for context
 length (#2243)

---
 .../modelmgr/requesters/litellmchat.py        | 38 ++++++++++++-------
 tests/unit_tests/provider/test_litellmchat.py | 23 +++++++++--
 2 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
index 6b087916..63536a2c 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
@@ -75,22 +75,33 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
                 continue
         return False
 
+    @staticmethod
+    def _positive_int(value: typing.Any) -> int | None:
+        if isinstance(value, bool):
+            return None
+        if isinstance(value, int) and value > 0:
+            return value
+        if isinstance(value, str) and value.isdigit():
+            parsed_value = int(value)
+            if parsed_value > 0:
+                return parsed_value
+        return None
+
     def _context_length_from_scan_payload(self, model_payload: dict[str, typing.Any] | None) -> int | None:
         if not model_payload:
             return None
 
         for field_name in ('context_length', 'context_window', 'max_context_length'):
-            value = model_payload.get(field_name)
-            if isinstance(value, bool):
-                continue
-            if isinstance(value, int) and value > 0:
-                return value
-            if isinstance(value, str) and value.isdigit():
-                parsed_value = int(value)
-                if parsed_value > 0:
-                    return parsed_value
+            context_length = self._positive_int(model_payload.get(field_name))
+            if context_length is not None:
+                return context_length
         return None
 
+    def _context_length_from_litellm_model_info(self, model_info: typing.Any) -> int | None:
+        if isinstance(model_info, dict):
+            return self._positive_int(model_info.get('max_input_tokens'))
+        return self._positive_int(getattr(model_info, 'max_input_tokens', None))
+
     def _metadata_provider_candidates(self, model_name: str) -> list[str]:
         normalized_model_name = (model_name or '').lower()
         candidates = []
@@ -126,7 +137,7 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
         return None
 
     def _safe_context_length(self, model_name: str) -> int | None:
-        helper = getattr(litellm, 'get_max_tokens', None)
+        helper = getattr(litellm, 'get_model_info', None)
         if not callable(helper):
             return self._known_context_length_fallback(model_name)
 
@@ -143,11 +154,12 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
                 continue
             tried_candidates.append(candidate)
             try:
-                max_tokens = helper(candidate)
+                model_info = helper(candidate)
             except Exception:
                 continue
-            if isinstance(max_tokens, int) and max_tokens > 0:
-                return max_tokens
+            context_length = self._context_length_from_litellm_model_info(model_info)
+            if context_length is not None:
+                return context_length
         return self._known_context_length_fallback(model_name)
 
     def _supports_function_calling(self, model_name: str) -> bool:
diff --git a/tests/unit_tests/provider/test_litellmchat.py b/tests/unit_tests/provider/test_litellmchat.py
index 1ec12d82..2878d827 100644
--- a/tests/unit_tests/provider/test_litellmchat.py
+++ b/tests/unit_tests/provider/test_litellmchat.py
@@ -1034,11 +1034,28 @@ class TestScanModels:
             },
         )
 
-        with patch.object(litellmchat.litellm, 'get_max_tokens') as mock_get_max_tokens:
-            mock_get_max_tokens.side_effect = lambda model: 131072 if model == 'moonshot/moonshot-v1-128k' else None
+        with patch.object(litellmchat.litellm, 'get_model_info') as mock_get_model_info:
+            mock_get_model_info.side_effect = (
+                lambda model: {'max_input_tokens': 131072}
+                if model == 'moonshot/moonshot-v1-128k'
+                else {}
+            )
 
             assert requester._safe_context_length('moonshot-v1-128k') == 131072
 
+    def test_safe_context_length_uses_litellm_max_input_tokens(self):
+        """LiteLLM max_output_tokens must not be treated as the context window."""
+        requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
+
+        with patch.object(litellmchat.litellm, 'get_model_info') as mock_get_model_info:
+            mock_get_model_info.return_value = {
+                'max_input_tokens': 128000,
+                'max_output_tokens': 16384,
+                'max_tokens': 16384,
+            }
+
+            assert requester._safe_context_length('gpt-4o') == 128000
+
     def test_litellm_bool_helper_tries_moonshot_metadata_alias(self):
         """OpenAI-compatible Moonshot endpoints still use Moonshot metadata for abilities."""
         requester = litellmchat.LiteLLMRequester(
@@ -1102,7 +1119,7 @@ class TestScanModels:
             },
         )
 
-        with patch.object(litellmchat.litellm, 'get_max_tokens', side_effect=Exception('not mapped')):
+        with patch.object(litellmchat.litellm, 'get_model_info', side_effect=Exception('not mapped')):
             assert requester._safe_context_length('deepseek-v4-pro') == 1_000_000
             assert requester._safe_context_length('deepseek-v4-flash') == 1_000_000
 

From b7d8332cb0a4b745561aa8b63426677afc4fde45 Mon Sep 17 00:00:00 2001
From: RockChinQ <rockchinq@gmail.com>
Date: Sat, 13 Jun 2026 11:13:18 -0400
Subject: [PATCH 04/12] feat(telemetry): include instance_create_ts in
 heartbeat payload

Load the instance creation timestamp from data/labels/instance_id.json
(backfilling+persisting it for instances created before the field existed),
expose it as constants.instance_create_ts, and include it in the heartbeat
payload so Space can anchor Time-To-Value / onboarding analytics on real
install time rather than first-heartbeat.

Verified: py_compile, ruff, pytest tests/unit_tests/telemetry/ (37 passed).
---
 src/langbot/pkg/core/stages/load_config.py   | 10 ++++++++++
 src/langbot/pkg/telemetry/heartbeat.py       |  1 +
 src/langbot/pkg/utils/constants.py           |  8 ++++++++
 tests/unit_tests/telemetry/test_heartbeat.py |  1 +
 4 files changed, 20 insertions(+)

diff --git a/src/langbot/pkg/core/stages/load_config.py b/src/langbot/pkg/core/stages/load_config.py
index 26f4a9e1..6fc890f1 100644
--- a/src/langbot/pkg/core/stages/load_config.py
+++ b/src/langbot/pkg/core/stages/load_config.py
@@ -202,6 +202,16 @@ class LoadConfigStage(stage.BootingStage):
                 constants.instance_id = new_id
         constants.edition = ap.instance_config.data.get('system', {}).get('edition', 'community')
 
+        # Instance creation timestamp: sourced from data/labels/instance_id.json.
+        # Instances created before this field existed (or supplied via
+        # system.instance_id) won't have it, so backfill with the current time
+        # and persist it via the dump below — from then on it stays stable.
+        instance_create_ts = ap.instance_id.data.get('instance_create_ts', 0)
+        if not isinstance(instance_create_ts, int) or instance_create_ts <= 0:
+            instance_create_ts = int(time.time())
+            ap.instance_id.data['instance_create_ts'] = instance_create_ts
+        constants.instance_create_ts = instance_create_ts
+
         print(f'LangBot instance id: {constants.instance_id}')
         print(f'LangBot edition: {constants.edition}')
 
diff --git a/src/langbot/pkg/telemetry/heartbeat.py b/src/langbot/pkg/telemetry/heartbeat.py
index dd2a58d3..34b61673 100644
--- a/src/langbot/pkg/telemetry/heartbeat.py
+++ b/src/langbot/pkg/telemetry/heartbeat.py
@@ -109,6 +109,7 @@ async def build_heartbeat_payload(ap: core_app.Application) -> dict:
         'query_id': '',
         'version': constants.semantic_version,
         'instance_id': constants.instance_id,
+        'instance_create_ts': constants.instance_create_ts,
         'edition': constants.edition,
         'features': features,
         'timestamp': datetime.now(timezone.utc).isoformat(),
diff --git a/src/langbot/pkg/utils/constants.py b/src/langbot/pkg/utils/constants.py
index fb520da9..2af16486 100644
--- a/src/langbot/pkg/utils/constants.py
+++ b/src/langbot/pkg/utils/constants.py
@@ -16,3 +16,11 @@ debug_mode = False
 edition = 'community'
 
 instance_id = ''
+
+instance_create_ts = 0
+"""Unix timestamp (seconds) of when this instance was first created.
+
+Sourced from ``data/labels/instance_id.json``. Backfilled to the current
+time for instances created before this field existed, so it is always a
+positive value once load_config has run.
+"""
diff --git a/tests/unit_tests/telemetry/test_heartbeat.py b/tests/unit_tests/telemetry/test_heartbeat.py
index a5bd2e32..18d61f2d 100644
--- a/tests/unit_tests/telemetry/test_heartbeat.py
+++ b/tests/unit_tests/telemetry/test_heartbeat.py
@@ -62,6 +62,7 @@ class TestBuildHeartbeatPayload:
 
         assert payload['event_type'] == 'instance_heartbeat'
         assert payload['query_id'] == ''
+        assert 'instance_create_ts' in payload
         assert 'timestamp' in payload
         f = payload['features']
         assert f['database'] == 'postgresql'

From 2e7978317c650fc2b3c23e8a6d35cd007a3c45e5 Mon Sep 17 00:00:00 2001
From: RockChinQ <rockchinq@gmail.com>
Date: Sat, 13 Jun 2026 11:21:44 -0400
Subject: [PATCH 05/12] chore(release): bump version to 4.10.2

---
 pyproject.toml          | 2 +-
 src/langbot/__init__.py | 2 +-
 uv.lock                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 024aa021..96e95f7f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "langbot"
-version = "4.10.1"
+version = "4.10.2"
 description = "Production-grade platform for building agentic IM bots"
 readme = "README.md"
 license-files = ["LICENSE"]
diff --git a/src/langbot/__init__.py b/src/langbot/__init__.py
index e2cdf58d..f6a11354 100644
--- a/src/langbot/__init__.py
+++ b/src/langbot/__init__.py
@@ -1,3 +1,3 @@
 """LangBot - Production-grade platform for building agentic IM bots"""
 
-__version__ = '4.10.1'
+__version__ = '4.10.2'
diff --git a/uv.lock b/uv.lock
index 24d8428e..bca1495a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1967,7 +1967,7 @@ wheels = [
 
 [[package]]
 name = "langbot"
-version = "4.10.1"
+version = "4.10.2"
 source = { editable = "." }
 dependencies = [
     { name = "aiocqhttp" },

From 1ef4507d9a6ce1718cbbc83596ec0ea7c3ece485 Mon Sep 17 00:00:00 2001
From: huanghuoguoguo <1051233107@qq.com>
Date: Sun, 14 Jun 2026 10:57:53 +0800
Subject: [PATCH 06/12] [codex] Delegate web page bot stream helpers (#2245)

* fix(platform): delegate web page bot stream helpers

* style(platform): format web page bot adapter
---
 .../pkg/platform/sources/web_page_bot_adapter.py     | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/langbot/pkg/platform/sources/web_page_bot_adapter.py b/src/langbot/pkg/platform/sources/web_page_bot_adapter.py
index d424debd..fa7f8174 100644
--- a/src/langbot/pkg/platform/sources/web_page_bot_adapter.py
+++ b/src/langbot/pkg/platform/sources/web_page_bot_adapter.py
@@ -84,6 +84,18 @@ class WebPageBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter
     ):
         self.listeners.pop(event_type, None)
 
+    async def is_stream_output_supported(self) -> bool:
+        """Delegate stream output check to ws_adapter."""
+        if self._ws_adapter is not None:
+            return await self._ws_adapter.is_stream_output_supported()
+        return False
+
+    async def create_message_card(self, message_id: str | int, event: platform_events.MessageEvent) -> bool:
+        """Delegate create_message_card to ws_adapter."""
+        if self._ws_adapter is not None:
+            return await self._ws_adapter.create_message_card(message_id, event)
+        return False
+
     async def is_muted(self, group_id: int) -> bool:
         return False
 

From 27be09ab15287e31f1aab3ecf81bf6b80ac7e359 Mon Sep 17 00:00:00 2001
From: huanghuoguoguo <1051233107@qq.com>
Date: Sun, 14 Jun 2026 11:12:29 +0800
Subject: [PATCH 07/12] fix(provider): preserve litellm usage details (#2246)

---
 .../pkg/provider/modelmgr/requester.py        | 21 ++++-
 .../modelmgr/requesters/litellmchat.py        | 84 +++++++++++++++----
 tests/unit_tests/provider/test_litellmchat.py | 39 +++++++--
 3 files changed, 117 insertions(+), 27 deletions(-)

diff --git a/src/langbot/pkg/provider/modelmgr/requester.py b/src/langbot/pkg/provider/modelmgr/requester.py
index b673c758..377f7d4a 100644
--- a/src/langbot/pkg/provider/modelmgr/requester.py
+++ b/src/langbot/pkg/provider/modelmgr/requester.py
@@ -12,6 +12,19 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 
 
+LLM_USAGE_QUERY_VARIABLE = '_llm_usage'
+STREAM_USAGE_QUERY_VARIABLE = '_stream_usage'
+
+
+def _store_llm_usage(query: pipeline_query.Query | None, usage_info: dict | None) -> None:
+    """Store the latest provider usage on the query for upstream action handlers."""
+    if query is None or not usage_info:
+        return
+    if query.variables is None:
+        query.variables = {}
+    query.variables[LLM_USAGE_QUERY_VARIABLE] = dict(usage_info)
+
+
 class RuntimeProvider:
     """运行时模型提供商"""
 
@@ -67,6 +80,7 @@ class RuntimeProvider:
             if isinstance(result, tuple):
                 msg, usage_info = result
                 if usage_info:
+                    _store_llm_usage(query, usage_info)
                     input_tokens = usage_info.get('prompt_tokens', 0)
                     output_tokens = usage_info.get('completion_tokens', 0)
                 return msg
@@ -146,11 +160,12 @@ class RuntimeProvider:
             if query:
                 if query.variables is None:
                     query.variables = {}
-                if '_stream_usage' in query.variables:
-                    usage_info = query.variables['_stream_usage']
+                if STREAM_USAGE_QUERY_VARIABLE in query.variables:
+                    usage_info = query.variables[STREAM_USAGE_QUERY_VARIABLE]
+                    _store_llm_usage(query, usage_info)
                     input_tokens = usage_info.get('prompt_tokens', 0)
                     output_tokens = usage_info.get('completion_tokens', 0)
-                    del query.variables['_stream_usage']
+                    del query.variables[STREAM_USAGE_QUERY_VARIABLE]
         except Exception as e:
             status = 'error'
             error_message = str(e)
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
index 63536a2c..8c750bd7 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
@@ -262,32 +262,82 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
         - dict with the same keys
         - missing ``total_tokens`` (derived from prompt + completion)
         - ``None`` / partially-populated usage (defaults to 0)
+        - provider-specific token details, including cache token counters
         """
-        if usage is None:
-            return {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0}
 
-        def _get(key: str) -> typing.Any:
-            if isinstance(usage, dict):
-                return usage.get(key)
-            return getattr(usage, key, None)
+        def _plain_value(value: typing.Any) -> typing.Any:
+            if value is None:
+                return None
+            if isinstance(value, dict):
+                return {k: _plain_value(v) for k, v in value.items() if v is not None}
+            if isinstance(value, (list, tuple)):
+                return [_plain_value(v) for v in value]
 
-        prompt_tokens = _get('prompt_tokens') or 0
-        completion_tokens = _get('completion_tokens') or 0
-        total_tokens = _get('total_tokens') or 0
+            model_dump = getattr(value, 'model_dump', None)
+            if callable(model_dump):
+                try:
+                    dumped = model_dump()
+                    if isinstance(dumped, dict):
+                        return _plain_value(dumped)
+                except Exception:
+                    pass
+
+            return value
+
+        def _usage_dict(value: typing.Any) -> dict[str, typing.Any]:
+            if value is None:
+                return {}
+            plain = _plain_value(value)
+            if isinstance(plain, dict):
+                return plain
+
+            def _is_mock_attr(attr: typing.Any) -> bool:
+                return type(attr).__module__.startswith('unittest.mock')
+
+            data: dict[str, typing.Any] = {}
+            for key in (
+                'prompt_tokens',
+                'completion_tokens',
+                'total_tokens',
+                'prompt_tokens_details',
+                'completion_tokens_details',
+                'cache_creation_input_tokens',
+                'cache_read_input_tokens',
+                'input_token_details',
+                'output_token_details',
+            ):
+                attr_value = getattr(value, key, None)
+                if attr_value is not None and not _is_mock_attr(attr_value):
+                    data[key] = _plain_value(attr_value)
+            return data
+
+        def _to_int(value: typing.Any) -> int:
+            try:
+                return int(value or 0)
+            except (TypeError, ValueError):
+                return 0
+
+        normalized = _usage_dict(usage)
+
+        prompt_tokens = _to_int(normalized.get('prompt_tokens'))
+        completion_tokens = _to_int(normalized.get('completion_tokens'))
+        total_tokens = _to_int(normalized.get('total_tokens'))
 
         # Some providers omit total_tokens in streaming usage; derive it.
         if not total_tokens:
             total_tokens = prompt_tokens + completion_tokens
 
-        return {
-            'prompt_tokens': int(prompt_tokens),
-            'completion_tokens': int(completion_tokens),
-            'total_tokens': int(total_tokens),
-        }
+        normalized['prompt_tokens'] = prompt_tokens
+        normalized['completion_tokens'] = completion_tokens
+        normalized['total_tokens'] = total_tokens
+        return normalized
 
-    def _extract_usage(self, response) -> dict:
+    def _extract_usage(self, response) -> dict | None:
         """Extract usage info from a non-streaming LiteLLM response."""
-        return self._normalize_usage(getattr(response, 'usage', None))
+        usage = getattr(response, 'usage', None)
+        if usage is None:
+            return None
+        return self._normalize_usage(usage)
 
     @staticmethod
     def _as_dict(value: typing.Any) -> dict:
@@ -486,7 +536,7 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
                     if query is not None:
                         if query.variables is None:
                             query.variables = {}
-                        query.variables['_stream_usage'] = usage_info
+                        query.variables[requester.STREAM_USAGE_QUERY_VARIABLE] = usage_info
 
                 if not hasattr(chunk, 'choices') or not chunk.choices:
                     continue
diff --git a/tests/unit_tests/provider/test_litellmchat.py b/tests/unit_tests/provider/test_litellmchat.py
index 2878d827..abe0cf49 100644
--- a/tests/unit_tests/provider/test_litellmchat.py
+++ b/tests/unit_tests/provider/test_litellmchat.py
@@ -115,6 +115,15 @@ class TestExtractUsage:
         assert result['prompt_tokens'] == 0
         assert result['completion_tokens'] == 0
 
+    def test_extract_usage_without_provider_usage(self):
+        """Missing provider usage is not treated as authoritative zero usage."""
+        requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
+
+        response = Mock()
+        response.usage = None
+
+        assert requester._extract_usage(response) is None
+
 
 class TestNormalizeUsage:
     """Test _normalize_usage helper covering real-world usage shapes"""
@@ -131,6 +140,22 @@ class TestNormalizeUsage:
         )
         assert result == {'prompt_tokens': 12, 'completion_tokens': 8, 'total_tokens': 20}
 
+    def test_preserves_token_details(self):
+        """Provider token details such as cache counters are preserved."""
+        result = litellmchat.LiteLLMRequester._normalize_usage(
+            {
+                'prompt_tokens': 12,
+                'completion_tokens': 8,
+                'total_tokens': 20,
+                'prompt_tokens_details': {'cached_tokens': 7},
+                'completion_tokens_details': {'reasoning_tokens': 3},
+            }
+        )
+
+        assert result['prompt_tokens'] == 12
+        assert result['prompt_tokens_details'] == {'cached_tokens': 7}
+        assert result['completion_tokens_details'] == {'reasoning_tokens': 3}
+
     def test_missing_total_is_derived(self):
         """When total_tokens is absent/zero it is derived from prompt + completion"""
         usage = Mock()
@@ -166,9 +191,7 @@ class TestInvokeLLMStreamUsage:
         if has_choice:
             choice = Mock()
             delta = Mock()
-            delta.model_dump = Mock(
-                return_value={'role': 'assistant', 'content': content, 'tool_calls': tool_calls}
-            )
+            delta.model_dump = Mock(return_value={'role': 'assistant', 'content': content, 'tool_calls': tool_calls})
             choice.delta = delta
             choice.finish_reason = finish_reason
             chunk.choices = [choice]
@@ -313,7 +336,8 @@ class TestInvokeLLMStreamUsage:
 
         with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())):
             collected = [
-                chunk async for chunk in requester.invoke_llm_stream(
+                chunk
+                async for chunk in requester.invoke_llm_stream(
                     query=query,
                     model=model,
                     messages=messages,
@@ -788,7 +812,9 @@ class TestInvokeRerank:
         with patch('httpx.AsyncClient', return_value=mock_client):
             # arerank must NOT be called on the openai-compatible path
             with patch.object(
-                litellmchat, 'arerank', new_callable=AsyncMock,
+                litellmchat,
+                'arerank',
+                new_callable=AsyncMock,
                 side_effect=AssertionError('arerank must not be used for openai-compatible provider'),
             ):
                 results = await requester.invoke_rerank(
@@ -1068,8 +1094,7 @@ class TestScanModels:
 
         with patch.object(litellmchat.litellm, 'supports_function_calling') as mock_supports_function_calling:
             mock_supports_function_calling.side_effect = (
-                lambda model, custom_llm_provider=None: model == 'moonshot/kimi-k2.6'
-                and custom_llm_provider is None
+                lambda model, custom_llm_provider=None: model == 'moonshot/kimi-k2.6' and custom_llm_provider is None
             )
 
             assert requester._supports_function_calling('kimi-k2.6') is True

From e9fe2f2d432d1566bf3403ca64c7aa16712f8706 Mon Sep 17 00:00:00 2001
From: huanghuoguoguo <1051233107@qq.com>
Date: Sun, 14 Jun 2026 11:29:57 +0800
Subject: [PATCH 08/12] feat(agent-runner): support host tool lookup (#2244)

---
 src/langbot/pkg/provider/tools/errors.py      |  6 ++++++
 src/langbot/pkg/provider/tools/loader.py      | 10 ++++++++++
 src/langbot/pkg/provider/tools/loaders/mcp.py |  7 +++++++
 .../pkg/provider/tools/loaders/native.py      |  3 ++-
 .../pkg/provider/tools/loaders/plugin.py      |  3 ++-
 src/langbot/pkg/provider/tools/toolmgr.py     | 19 ++++++++++++++++++-
 .../unit_tests/provider/test_tool_manager.py  |  4 ++--
 7 files changed, 47 insertions(+), 5 deletions(-)
 create mode 100644 src/langbot/pkg/provider/tools/errors.py

diff --git a/src/langbot/pkg/provider/tools/errors.py b/src/langbot/pkg/provider/tools/errors.py
new file mode 100644
index 00000000..d44b39ba
--- /dev/null
+++ b/src/langbot/pkg/provider/tools/errors.py
@@ -0,0 +1,6 @@
+class ToolNotFoundError(ValueError):
+    """Raised when a requested tool cannot be found in any active loader."""
+
+    def __init__(self, name: str):
+        self.name = name
+        super().__init__(f'Tool not found: {name}')
diff --git a/src/langbot/pkg/provider/tools/loader.py b/src/langbot/pkg/provider/tools/loader.py
index e90f07b3..f97e8216 100644
--- a/src/langbot/pkg/provider/tools/loader.py
+++ b/src/langbot/pkg/provider/tools/loader.py
@@ -4,12 +4,15 @@ import abc
 import typing
 from typing import TYPE_CHECKING
 
+from langbot_plugin.api.definition.components.manifest import ComponentManifest
 from langbot_plugin.api.entities.events import pipeline_query
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 
 if TYPE_CHECKING:
     from ...core import app
 
+ToolLookupResult = resource_tool.LLMTool | ComponentManifest
+
 
 preregistered_loaders: list[typing.Type[ToolLoader]] = []
 
@@ -43,6 +46,13 @@ class ToolLoader(abc.ABC):
         """获取所有工具"""
         pass
 
+    async def get_tool(self, name: str) -> ToolLookupResult | None:
+        """Get one tool by name."""
+        for tool in await self.get_tools():
+            if tool.name == name:
+                return tool
+        return None
+
     @abc.abstractmethod
     async def has_tool(self, name: str) -> bool:
         """检查工具是否存在"""
diff --git a/src/langbot/pkg/provider/tools/loaders/mcp.py b/src/langbot/pkg/provider/tools/loaders/mcp.py
index 117f29cd..8049b185 100644
--- a/src/langbot/pkg/provider/tools/loaders/mcp.py
+++ b/src/langbot/pkg/provider/tools/loaders/mcp.py
@@ -567,6 +567,13 @@ class MCPLoader(loader.ToolLoader):
                     return True
         return False
 
+    async def get_tool(self, name: str) -> resource_tool.LLMTool | None:
+        for session in self.sessions.values():
+            for function in session.get_tools():
+                if function.name == name:
+                    return function
+        return None
+
     async def invoke_tool(self, name: str, parameters: dict, query: pipeline_query.Query) -> typing.Any:
         """执行工具调用"""
         for session in self.sessions.values():
diff --git a/src/langbot/pkg/provider/tools/loaders/native.py b/src/langbot/pkg/provider/tools/loaders/native.py
index d6ef11d1..83390049 100644
--- a/src/langbot/pkg/provider/tools/loaders/native.py
+++ b/src/langbot/pkg/provider/tools/loaders/native.py
@@ -7,6 +7,7 @@ import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 from langbot_plugin.api.entities.events import pipeline_query
 
 from .. import loader
+from ..errors import ToolNotFoundError
 from . import skill as skill_loader
 
 EXEC_TOOL_NAME = 'exec'
@@ -90,7 +91,7 @@ class NativeToolLoader(loader.ToolLoader):
             return await self._invoke_glob(parameters, query)
         if name == GREP_TOOL_NAME:
             return await self._invoke_grep(parameters, query)
-        raise ValueError(f'未找到工具: {name}')
+        raise ToolNotFoundError(name)
 
     async def shutdown(self):
         pass
diff --git a/src/langbot/pkg/provider/tools/loaders/plugin.py b/src/langbot/pkg/provider/tools/loaders/plugin.py
index 5b741848..544882d3 100644
--- a/src/langbot/pkg/provider/tools/loaders/plugin.py
+++ b/src/langbot/pkg/provider/tools/loaders/plugin.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import typing
 import traceback
 
+from langbot_plugin.api.definition.components.manifest import ComponentManifest
 from langbot_plugin.api.entities.events import pipeline_query
 
 from .. import loader
@@ -39,7 +40,7 @@ class PluginToolLoader(loader.ToolLoader):
                 return True
         return False
 
-    async def _get_tool(self, name: str) -> resource_tool.LLMTool:
+    async def get_tool(self, name: str) -> ComponentManifest | None:
         for tool in await self.ap.plugin_connector.list_tools():
             if tool.metadata.name == name:
                 return tool
diff --git a/src/langbot/pkg/provider/tools/toolmgr.py b/src/langbot/pkg/provider/tools/toolmgr.py
index fd03b303..38b08aa1 100644
--- a/src/langbot/pkg/provider/tools/toolmgr.py
+++ b/src/langbot/pkg/provider/tools/toolmgr.py
@@ -6,6 +6,9 @@ from typing import TYPE_CHECKING
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 from langbot_plugin.api.entities.events import pipeline_query
 
+from . import loader as tool_loader
+from .errors import ToolNotFoundError
+
 if TYPE_CHECKING:
     from ...core import app
     from langbot.pkg.provider.tools.loaders import (
@@ -67,6 +70,20 @@ class ToolManager:
 
         return all_functions
 
+    async def get_tool_by_name(self, name: str) -> tool_loader.ToolLookupResult | None:
+        """Get tool by name from any active loader."""
+        for active_loader in (
+            self.native_tool_loader,
+            self.plugin_tool_loader,
+            self.mcp_tool_loader,
+            self.skill_tool_loader,
+        ):
+            tool = await active_loader.get_tool(name)
+            if tool:
+                return tool
+
+        return None
+
     async def generate_tools_for_openai(self, use_funcs: list[resource_tool.LLMTool]) -> list:
         tools = []
 
@@ -98,7 +115,7 @@ class ToolManager:
         if await self.skill_tool_loader.has_tool(name):
             telemetry_features.increment(query, 'tool_calls', 'skill')
             return await self.skill_tool_loader.invoke_tool(name, parameters, query)
-        raise ValueError(f'未找到工具: {name}')
+        raise ToolNotFoundError(name)
 
     async def shutdown(self):
         await self.native_tool_loader.shutdown()
diff --git a/tests/unit_tests/provider/test_tool_manager.py b/tests/unit_tests/provider/test_tool_manager.py
index fbfcb13f..2fcf25fb 100644
--- a/tests/unit_tests/provider/test_tool_manager.py
+++ b/tests/unit_tests/provider/test_tool_manager.py
@@ -226,7 +226,7 @@ class TestToolManagerExecuteFuncCall:
 
     @pytest.mark.asyncio
     async def test_execute_raises_when_tool_not_found(self, mock_app_with_loaders, sample_query):
-        """Test that execute_func_call raises ValueError when tool not found."""
+        """Test that execute_func_call raises ToolNotFoundError when tool not found."""
         toolmgr = get_toolmgr_module()
 
         mock_app, mock_plugin_loader, mock_mcp_loader = mock_app_with_loaders
@@ -236,7 +236,7 @@ class TestToolManagerExecuteFuncCall:
         manager = toolmgr.ToolManager(mock_app)
         self._wire_loaders(manager, mock_app, mock_plugin_loader, mock_mcp_loader)
 
-        with pytest.raises(ValueError, match='未找到工具'):
+        with pytest.raises(toolmgr.ToolNotFoundError, match='Tool not found: unknown_tool'):
             await manager.execute_func_call('unknown_tool', {}, sample_query)
 
     @pytest.mark.asyncio

From a60827f2218f198c0993167a09604d7a50a434a4 Mon Sep 17 00:00:00 2001
From: huanghuoguoguo <60681390+huanghuoguoguo@users.noreply.github.com>
Date: Sun, 14 Jun 2026 11:07:46 +0800
Subject: [PATCH 09/12] fix(tools): harden agent runner tool runtimes

---
 .../provider/tools/loaders/availability.py    |  18 +
 .../pkg/provider/tools/loaders/mcp_stdio.py   | 104 +++-
 .../pkg/provider/tools/loaders/native.py      | 537 ++++++++++++++++--
 .../pkg/provider/tools/loaders/skill.py       | 111 ++++
 .../provider/tools/loaders/skill_authoring.py |  75 +--
 .../provider/test_mcp_box_integration.py      |  48 +-
 tests/unit_tests/provider/test_skill_tools.py |   6 +-
 7 files changed, 745 insertions(+), 154 deletions(-)
 create mode 100644 src/langbot/pkg/provider/tools/loaders/availability.py

diff --git a/src/langbot/pkg/provider/tools/loaders/availability.py b/src/langbot/pkg/provider/tools/loaders/availability.py
new file mode 100644
index 00000000..58d79586
--- /dev/null
+++ b/src/langbot/pkg/provider/tools/loaders/availability.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+from typing import Any
+
+
+async def is_box_backend_available(ap: Any) -> bool:
+    """Return whether the configured Box backend is ready for tool execution."""
+    box_service = getattr(ap, 'box_service', None)
+    if box_service is None:
+        return False
+    if not getattr(box_service, 'available', False):
+        return False
+    try:
+        status = await box_service.get_status()
+        backend_info = status.get('backend', {})
+        return bool(backend_info.get('available', False))
+    except Exception:
+        return False
diff --git a/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py
index ff607e66..736dacea 100644
--- a/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py
+++ b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py
@@ -5,6 +5,7 @@ import asyncio
 import os
 import shutil
 import shlex
+import threading
 from typing import TYPE_CHECKING, Any
 
 import pydantic
@@ -18,12 +19,26 @@ from ....box.workspace import (
     rewrite_mounted_path,
     rewrite_venv_command,
     unwrap_venv_path,
+    wrap_python_command_with_env,
 )
 
 if TYPE_CHECKING:
     from .mcp import RuntimeMCPSession
 
 
+_WORKSPACE_COPY_LOCKS: dict[str, threading.Lock] = {}
+_WORKSPACE_COPY_LOCKS_GUARD = threading.Lock()
+
+
+def _workspace_copy_lock(path: str) -> threading.Lock:
+    with _WORKSPACE_COPY_LOCKS_GUARD:
+        lock = _WORKSPACE_COPY_LOCKS.get(path)
+        if lock is None:
+            lock = threading.Lock()
+            _WORKSPACE_COPY_LOCKS[path] = lock
+        return lock
+
+
 class MCPSessionErrorPhase(enum.Enum):
     """Which phase of the MCP lifecycle failed."""
 
@@ -49,7 +64,7 @@ class MCPServerBoxConfig(pydantic.BaseModel):
     host_path: str | None = None
     host_path_mode: str = 'ro'  # MCP servers default to read-write mount only when explicitly requested
     env: dict[str, str] = pydantic.Field(default_factory=dict)
-    startup_timeout_sec: int = 120  # Longer default to allow dependency bootstrap
+    startup_timeout_sec: int = 300  # First Docker bootstrap may need to build a venv and install MCP deps.
     cpus: float | None = None
     memory_mb: int | None = None
     pids_limit: int | None = None
@@ -128,6 +143,7 @@ class BoxStdioSessionRuntime:
         workspace = self._build_workspace(host_path=None)
         host_path = self.resolve_host_path()
         process_cwd = '/workspace'
+        install_cmd: str | None = None
 
         try:
             await workspace.create_session()
@@ -168,6 +184,8 @@ class BoxStdioSessionRuntime:
                 env=self.server_config.get('env', {}),
                 cwd=process_cwd,
             )
+            if install_cmd:
+                payload = self._wrap_process_payload_with_python_env(payload, process_cwd)
             payload['process_id'] = self.process_id
             await workspace.box_service.start_managed_process(workspace.session_id, payload)
         except Exception:
@@ -253,14 +271,44 @@ class BoxStdioSessionRuntime:
 
     @staticmethod
     def _copy_workspace_tree(source_path: str, process_host_root: str, process_host_workspace: str) -> None:
-        shutil.rmtree(process_host_root, ignore_errors=True)
-        os.makedirs(process_host_root, exist_ok=True)
-        shutil.copytree(
-            source_path,
-            process_host_workspace,
-            symlinks=True,
-            ignore=shutil.ignore_patterns('.git', '__pycache__', '.pytest_cache', '.mypy_cache', '.ruff_cache'),
-        )
+        # Docker-backed bootstrap writes root-owned runtime directories such as
+        # .venv/.tmp into the staged workspace. The host process may not be able
+        # to delete them, so refresh source files in place and preserve runtime
+        # directories instead of rmtree'ing the whole staging root.
+        with _workspace_copy_lock(process_host_root):
+            preserved_names = {'.venv', 'venv', 'env', '.env', '.cache', '.tmp', '.langbot'}
+            os.makedirs(process_host_workspace, exist_ok=True)
+            for name in os.listdir(process_host_workspace):
+                if name in preserved_names:
+                    continue
+                path = os.path.join(process_host_workspace, name)
+                if os.path.isdir(path) and not os.path.islink(path):
+                    shutil.rmtree(path, ignore_errors=True)
+                else:
+                    try:
+                        os.unlink(path)
+                    except FileNotFoundError:
+                        pass
+            shutil.copytree(
+                source_path,
+                process_host_workspace,
+                symlinks=True,
+                dirs_exist_ok=True,
+                ignore=shutil.ignore_patterns(
+                    '.git',
+                    '__pycache__',
+                    '.pytest_cache',
+                    '.mypy_cache',
+                    '.ruff_cache',
+                    '.venv',
+                    'venv',
+                    'env',
+                    '.env',
+                    '.cache',
+                    '.tmp',
+                    '.langbot',
+                ),
+            )
 
     async def _cleanup_staged_workspace(self) -> None:
         if not self.resolve_host_path():
@@ -343,23 +391,31 @@ class BoxStdioSessionRuntime:
     @staticmethod
     def detect_install_command(host_path: str, workspace_path: str = '/workspace') -> str | None:
         workspace_kind = classify_python_workspace(host_path)
-        quoted_workspace_path = shlex.quote(workspace_path)
-        if workspace_kind == 'package':
-            return (
-                'mkdir -p /opt/_lb_src'
-                f' && tar -C {quoted_workspace_path}'
-                ' --exclude=.venv --exclude=.git --exclude=__pycache__'
-                ' --exclude=node_modules --exclude=.tox --exclude=.nox'
-                ' --exclude="*.egg-info" --exclude=.uv-cache'
-                ' -cf - .'
-                ' | tar -C /opt/_lb_src -xf -'
-                ' && pip install --no-cache-dir /opt/_lb_src'
-                ' && rm -rf /opt/_lb_src'
-            )
-        if workspace_kind == 'requirements':
-            return f'pip install --no-cache-dir -r {quoted_workspace_path}/requirements.txt'
+        if workspace_kind in {'package', 'requirements'}:
+            return wrap_python_command_with_env('python -c "pass"', mount_path=workspace_path).rstrip()
         return None
 
+    @staticmethod
+    def _wrap_process_payload_with_python_env(payload: dict[str, Any], workspace_path: str) -> dict[str, Any]:
+        """Start a prepared Python workspace without writing bootstrap output to MCP stdio."""
+        workspace_root = workspace_path.rstrip('/') or '/workspace'
+        venv_dir = f'{workspace_root}/.venv'
+        venv_bin = f'{venv_dir}/bin'
+        command = ' '.join(
+            [shlex.quote(payload['command']), *[shlex.quote(arg) for arg in payload.get('args', [])]]
+        )
+        wrapped = dict(payload)
+        wrapped['command'] = 'sh'
+        wrapped['args'] = [
+            '-lc',
+            (
+                f'export VIRTUAL_ENV={shlex.quote(venv_dir)}; '
+                f'export PATH={shlex.quote(venv_bin)}:$PATH; '
+                f'exec {command}'
+            ),
+        ]
+        return wrapped
+
     def build_box_session_payload(self, session_id: str, host_path: str | None = None) -> dict[str, Any]:
         workspace = self._build_workspace()
         workspace.session_id = session_id
diff --git a/src/langbot/pkg/provider/tools/loaders/native.py b/src/langbot/pkg/provider/tools/loaders/native.py
index 83390049..bf9f357f 100644
--- a/src/langbot/pkg/provider/tools/loaders/native.py
+++ b/src/langbot/pkg/provider/tools/loaders/native.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import json
+import mimetypes
 import os
 
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
@@ -8,6 +9,7 @@ from langbot_plugin.api.entities.events import pipeline_query
 
 from .. import loader
 from ..errors import ToolNotFoundError
+from .availability import is_box_backend_available
 from . import skill as skill_loader
 
 EXEC_TOOL_NAME = 'exec'
@@ -22,6 +24,15 @@ _ALL_TOOL_NAMES = {EXEC_TOOL_NAME, READ_TOOL_NAME, WRITE_TOOL_NAME, EDIT_TOOL_NA
 # Skip these dirs during grep walk to avoid noise
 _SKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', '.tox', 'dist', 'build'}
 
+_DEFAULT_READ_MAX_LINES = 2000
+_MAX_READ_MAX_LINES = 10000
+_DEFAULT_TOOL_RESULT_MAX_BYTES = 50 * 1024
+_BOX_FILE_SCRIPT_MAX_BYTES = 2048
+_GLOB_MAX_MATCHES = 100
+_GREP_MAX_MATCHES = 200
+_GREP_MAX_FILES = 5000
+_GREP_MAX_LINE_CHARS = 500
+
 
 class NativeToolLoader(loader.ToolLoader):
     def __init__(self, ap):
@@ -43,18 +54,7 @@ class NativeToolLoader(loader.ToolLoader):
 
     async def _check_backend_available(self) -> bool:
         """Check if the box backend is truly available (not just the runtime)."""
-        box_service = getattr(self.ap, 'box_service', None)
-        if box_service is None:
-            return False
-        if not getattr(box_service, 'available', False):
-            return False
-        # Check if backend is truly available via get_status
-        try:
-            status = await box_service.get_status()
-            backend_info = status.get('backend', {})
-            return backend_info.get('available', False)
-        except Exception:
-            return False
+        return await is_box_backend_available(self.ap)
 
     async def get_tools(self, bound_plugins: list[str] | None = None) -> list[resource_tool.LLMTool]:
         if not self._is_sandbox_available():
@@ -139,6 +139,7 @@ class NativeToolLoader(loader.ToolLoader):
         # via execute_tool. Skills are mounted at /workspace/.skills/{name}/
         # via extra_mounts built by BoxService.
         result = await self.ap.box_service.execute_tool(parameters, query)
+        result = self._normalize_exec_result(result)
 
         if selected_skill is not None:
             self._refresh_skill_from_disk(selected_skill)
@@ -227,19 +228,65 @@ class NativeToolLoader(loader.ToolLoader):
         except Exception:
             return {'ok': False, 'error': stdout or 'Box file operation returned no result'}
 
-    async def _read_workspace_via_box(self, path: str, query: pipeline_query.Query) -> dict:
+    async def _read_workspace_via_box(self, path: str, parameters: dict, query: pipeline_query.Query) -> dict:
+        offset = self._positive_int(parameters.get('offset'), default=1)
+        max_lines = self._positive_int(
+            parameters.get('limit'),
+            default=_DEFAULT_READ_MAX_LINES,
+            max_value=_MAX_READ_MAX_LINES,
+        )
+        # Box file fallback returns through exec stdout, which is already capped
+        # by BoxService. Keep this payload small enough to remain valid JSON.
+        max_bytes = min(
+            self._positive_int(parameters.get('max_bytes'), default=_DEFAULT_TOOL_RESULT_MAX_BYTES),
+            _BOX_FILE_SCRIPT_MAX_BYTES,
+        )
         script = f"""
 import json, os
 path = {json.dumps(path)}
+offset = {offset}
+max_lines = {max_lines}
+max_bytes = {max_bytes}
 if not path.startswith('/workspace'):
     print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
 elif not os.path.exists(path):
     print(json.dumps({{'ok': False, 'error': f'File not found: {{path}}'}}))
 elif os.path.isdir(path):
-    print(json.dumps({{'ok': True, 'content': '\\n'.join(sorted(os.listdir(path))), 'is_directory': True}}))
+    entries = sorted(os.listdir(path))
+    content = '\\n'.join(entries)
+    print(json.dumps({{'ok': True, 'content': content, 'is_directory': True, 'total': len(entries), 'truncated': False}}))
 else:
+    lines = []
+    output_bytes = 0
+    end_line = offset - 1
+    truncated = False
+    next_offset = None
     with open(path, 'r', encoding='utf-8', errors='replace') as f:
-        print(json.dumps({{'ok': True, 'content': f.read()}}))
+        for line_number, line in enumerate(f, 1):
+            if line_number < offset:
+                continue
+            if len(lines) >= max_lines:
+                truncated = True
+                next_offset = line_number
+                break
+            line_bytes = len(line.encode('utf-8'))
+            if output_bytes + line_bytes > max_bytes:
+                truncated = True
+                next_offset = line_number
+                break
+            lines.append(line.rstrip('\\n'))
+            output_bytes += line_bytes
+            end_line = line_number
+    print(json.dumps({{
+        'ok': True,
+        'content': '\\n'.join(lines),
+        'truncated': truncated,
+        'start_line': offset,
+        'end_line': end_line,
+        'next_offset': next_offset,
+        'max_lines': max_lines,
+        'max_bytes': max_bytes,
+    }}))
 """.strip()
         return await self._run_workspace_file_script(script, query)
 
@@ -307,12 +354,27 @@ else:
         if not any(part in skip_dirs for part in item.parts)
     ]
     hits.sort(key=lambda item: item.stat().st_mtime if item.exists() else 0, reverse=True)
-    shown = hits[:100]
+    shown = hits[:{_GLOB_MAX_MATCHES}]
     matches = []
+    output_bytes = 0
+    truncated_by_bytes = False
     for item in shown:
         rel = os.path.relpath(str(item), path)
-        matches.append(os.path.join(path, rel).replace(os.sep, '/'))
-    print(json.dumps({{'ok': True, 'matches': matches, 'total': len(hits), 'truncated': len(hits) > 100}}))
+        sandbox_path = os.path.join(path, rel).replace(os.sep, '/')
+        entry_bytes = len(sandbox_path.encode('utf-8')) + (1 if matches else 0)
+        if output_bytes + entry_bytes > {_DEFAULT_TOOL_RESULT_MAX_BYTES}:
+            truncated_by_bytes = True
+            break
+        matches.append(sandbox_path)
+        output_bytes += entry_bytes
+    print(json.dumps({{
+        'ok': True,
+        'matches': matches,
+        'preview': '\\n'.join(matches),
+        'total': len(hits),
+        'truncated': len(hits) > len(matches) or truncated_by_bytes,
+        'truncated_by': 'bytes' if truncated_by_bytes else ('matches' if len(hits) > len(matches) else None),
+    }}))
 """.strip()
         return await self._run_workspace_file_script(script, query)
 
@@ -350,29 +412,54 @@ else:
                     continue
                 if item.is_file():
                     files.append(item)
-                if len(files) >= 5000:
+                if len(files) >= {_GREP_MAX_FILES}:
                     break
 
         matches = []
+        output_bytes = 0
+        truncated_by = None
         for fp in files:
             try:
-                text = fp.read_text(errors='ignore')
+                handle = fp.open('r', encoding='utf-8', errors='ignore')
             except OSError:
                 continue
-            for lineno, line in enumerate(text.splitlines(), 1):
-                if regex.search(line):
-                    if base.is_file():
-                        file_path = path
-                    else:
-                        rel = os.path.relpath(str(fp), path)
-                        file_path = os.path.join(path, rel).replace(os.sep, '/')
-                    matches.append({{'file': file_path, 'line': lineno, 'content': line.rstrip()}})
-                    if len(matches) >= 200:
-                        break
-            if len(matches) >= 200:
+            with handle:
+                for lineno, line in enumerate(handle, 1):
+                    if regex.search(line):
+                        if base.is_file():
+                            file_path = path
+                        else:
+                            rel = os.path.relpath(str(fp), path)
+                            file_path = os.path.join(path, rel).replace(os.sep, '/')
+                        content = line.rstrip()
+                        line_truncated = False
+                        if len(content) > {_GREP_MAX_LINE_CHARS}:
+                            content = content[:{_GREP_MAX_LINE_CHARS}] + '... [truncated]'
+                            line_truncated = True
+                        entry = {{'file': file_path, 'line': lineno, 'content': content}}
+                        entry_bytes = len(json.dumps(entry, ensure_ascii=False).encode('utf-8')) + 1
+                        if output_bytes + entry_bytes > {_DEFAULT_TOOL_RESULT_MAX_BYTES}:
+                            truncated_by = 'bytes'
+                            break
+                        if line_truncated and truncated_by is None:
+                            truncated_by = 'line'
+                        matches.append(entry)
+                        output_bytes += entry_bytes
+                        if len(matches) >= {_GREP_MAX_MATCHES}:
+                            truncated_by = truncated_by or 'matches'
+                            break
+                if truncated_by == 'bytes' or len(matches) >= {_GREP_MAX_MATCHES}:
+                    break
+            if truncated_by == 'bytes' or len(matches) >= {_GREP_MAX_MATCHES}:
                 break
 
-        print(json.dumps({{'ok': True, 'matches': matches, 'total': len(matches), 'truncated': len(matches) >= 200}}))
+        print(json.dumps({{
+            'ok': True,
+            'matches': matches,
+            'total': len(matches),
+            'truncated': truncated_by is not None,
+            'truncated_by': truncated_by,
+        }}))
 """.strip()
         return await self._run_workspace_file_script(script, query)
 
@@ -387,14 +474,22 @@ else:
         )
         if skill_request is not None and hasattr(self.ap.box_service, 'read_skill_file'):
             selected_skill, relative = skill_request
+            host_path = self._resolve_skill_host_path(selected_skill, relative)
+            if host_path and os.path.exists(host_path):
+                if os.path.isdir(host_path):
+                    return self._build_directory_result(os.listdir(host_path))
+                result = self._read_text_file_preview(host_path, parameters)
+                host_root = str(selected_skill.get('package_root', '') or '')
+                return await self._attach_file_artifact_ref(result, host_path, host_root, path, query)
+
             try:
                 result = await self.ap.box_service.read_skill_file(selected_skill['name'], relative)
-                return {'ok': True, 'content': result.get('content', '')}
+                return self._build_read_result_from_text(str(result.get('content', '')), parameters)
             except Exception:
                 try:
                     result = await self.ap.box_service.list_skill_files(selected_skill['name'], relative)
                     entries = [entry['name'] for entry in result.get('entries', [])]
-                    return {'ok': True, 'content': '\n'.join(sorted(entries)), 'is_directory': True}
+                    return self._build_directory_result(entries)
                 except Exception as exc:
                     return {'ok': False, 'error': str(exc)}
 
@@ -405,15 +500,15 @@ else:
             include_activated=True,
         )
         if self._should_use_box_workspace_files(selected_skill):
-            return await self._read_workspace_via_box(path, query)
+            return await self._read_workspace_via_box(path, parameters, query)
         if not os.path.exists(host_path):
             return {'ok': False, 'error': f'File not found: {path}'}
         if os.path.isdir(host_path):
             entries = os.listdir(host_path)
-            return {'ok': True, 'content': '\n'.join(sorted(entries)), 'is_directory': True}
-        with open(host_path, 'r', errors='replace') as f:
-            content = f.read()
-        return {'ok': True, 'content': content}
+            return self._build_directory_result(entries)
+        result = self._read_text_file_preview(host_path, parameters)
+        host_root = self._get_host_root(selected_skill)
+        return await self._attach_file_artifact_ref(result, host_path, host_root, path, query)
 
     async def _invoke_write(self, parameters: dict, query: pipeline_query.Query) -> dict:
         path = parameters['path']
@@ -584,6 +679,29 @@ else:
                         'type': 'string',
                         'description': 'Absolute path to the file (must be under /workspace).',
                     },
+                    'offset': {
+                        'type': 'integer',
+                        'description': '1-indexed line number to start reading from. Defaults to 1.',
+                        'default': 1,
+                        'minimum': 1,
+                    },
+                    'limit': {
+                        'type': 'integer',
+                        'description': f'Maximum number of lines to return. Defaults to {_DEFAULT_READ_MAX_LINES}.',
+                        'default': _DEFAULT_READ_MAX_LINES,
+                        'minimum': 1,
+                        'maximum': _MAX_READ_MAX_LINES,
+                    },
+                    'max_bytes': {
+                        'type': 'integer',
+                        'description': (
+                            'Maximum bytes of file content to return. '
+                            f'Defaults to {_DEFAULT_TOOL_RESULT_MAX_BYTES}.'
+                        ),
+                        'default': _DEFAULT_TOOL_RESULT_MAX_BYTES,
+                        'minimum': 1,
+                        'maximum': _DEFAULT_TOOL_RESULT_MAX_BYTES,
+                    },
                 },
                 'required': ['path'],
                 'additionalProperties': False,
@@ -740,22 +858,30 @@ else:
         hits.sort(key=lambda p: p.stat().st_mtime if p.exists() else 0, reverse=True)
 
         total = len(hits)
-        shown = hits[:100]
+        shown = hits[:_GLOB_MAX_MATCHES]
 
         # Convert back to sandbox paths
         sandbox_paths = []
+        output_bytes = 0
+        truncated_by_bytes = False
         for h in shown:
             rel = os.path.relpath(str(h), host_path)
             sandbox_path = os.path.join(path, rel)
+            entry_bytes = len(sandbox_path.encode('utf-8')) + (1 if sandbox_paths else 0)
+            if output_bytes + entry_bytes > _DEFAULT_TOOL_RESULT_MAX_BYTES:
+                truncated_by_bytes = True
+                break
             sandbox_paths.append(sandbox_path)
+            output_bytes += entry_bytes
 
-        result_lines = sandbox_paths
-        result = '\n'.join(result_lines)
-
-        if total > 100:
-            result += f'\n... ({total} matches, showing first 100)'
-
-        return {'ok': True, 'matches': result_lines, 'total': total, 'truncated': total > 100}
+        return {
+            'ok': True,
+            'matches': sandbox_paths,
+            'preview': '\n'.join(sandbox_paths),
+            'total': total,
+            'truncated': total > len(sandbox_paths) or truncated_by_bytes,
+            'truncated_by': 'bytes' if truncated_by_bytes else ('matches' if total > len(sandbox_paths) else None),
+        }
 
     async def _invoke_grep(self, parameters: dict, query: pipeline_query.Query) -> dict:
         pattern = parameters['pattern']
@@ -791,32 +917,46 @@ else:
             files = self._grep_walk(base, include)
 
         matches = []
+        output_bytes = 0
+        truncated_by = None
         for fp in files:
             try:
-                text = fp.read_text(errors='ignore')
+                handle = fp.open('r', encoding='utf-8', errors='ignore')
             except OSError:
                 continue
-            for lineno, line in enumerate(text.splitlines(), 1):
-                if regex.search(line):
-                    rel = os.path.relpath(str(fp), host_path)
-                    sandbox_path = os.path.join(path, rel)
-                    matches.append(
-                        {
+            with handle:
+                for lineno, line in enumerate(handle, 1):
+                    if regex.search(line):
+                        rel = os.path.relpath(str(fp), host_path)
+                        sandbox_path = os.path.join(path, rel)
+                        content, line_truncated = self._truncate_grep_line(line.rstrip())
+                        entry = {
                             'file': sandbox_path,
                             'line': lineno,
-                            'content': line.rstrip(),
+                            'content': content,
                         }
-                    )
-                    if len(matches) >= 200:
-                        break
-            if len(matches) >= 200:
+                        entry_bytes = len(json.dumps(entry, ensure_ascii=False).encode('utf-8')) + 1
+                        if output_bytes + entry_bytes > _DEFAULT_TOOL_RESULT_MAX_BYTES:
+                            truncated_by = 'bytes'
+                            break
+                        if line_truncated and truncated_by is None:
+                            truncated_by = 'line'
+                        matches.append(entry)
+                        output_bytes += entry_bytes
+                        if len(matches) >= _GREP_MAX_MATCHES:
+                            truncated_by = truncated_by or 'matches'
+                            break
+                if truncated_by == 'bytes' or len(matches) >= _GREP_MAX_MATCHES:
+                    break
+            if truncated_by == 'bytes' or len(matches) >= _GREP_MAX_MATCHES:
                 break
 
         return {
             'ok': True,
             'matches': matches,
             'total': len(matches),
-            'truncated': len(matches) >= 200,
+            'truncated': truncated_by is not None,
+            'truncated_by': truncated_by,
         }
 
     @staticmethod
@@ -828,10 +968,285 @@ else:
                 continue
             if item.is_file():
                 results.append(item)
-            if len(results) >= 5000:
+            if len(results) >= _GREP_MAX_FILES:
                 break
         return results
 
+    @staticmethod
+    def _resolve_skill_host_path(selected_skill: dict, relative: str) -> str | None:
+        package_root = str(selected_skill.get('package_root', '') or '').strip()
+        if not package_root:
+            return None
+
+        host_root = os.path.realpath(package_root)
+        host_path = os.path.realpath(os.path.join(host_root, relative))
+        if not (host_path == host_root or host_path.startswith(host_root + os.sep)):
+            raise ValueError('Path escapes the skill package boundary.')
+        return host_path
+
+    def _get_host_root(self, selected_skill: dict | None) -> str:
+        if selected_skill is not None:
+            return str(selected_skill.get('package_root', '') or '')
+        return str(getattr(self.ap.box_service, 'default_workspace', '') or '')
+
+    async def _attach_file_artifact_ref(
+        self,
+        result: dict,
+        host_path: str,
+        host_root: str,
+        sandbox_path: str,
+        query: pipeline_query.Query,
+    ) -> dict:
+        if not result.get('ok') or not result.get('truncated') or result.get('artifact_refs'):
+            return result
+        if not host_root or not os.path.isfile(host_path):
+            return result
+
+        run_session = self._get_agent_run_session(query)
+        if not run_session:
+            return result
+
+        persistence_mgr = getattr(self.ap, 'persistence_mgr', None)
+        get_db_engine = getattr(persistence_mgr, 'get_db_engine', None)
+        if not callable(get_db_engine):
+            return result
+
+        try:
+            from langbot.pkg.agent.runner.artifact_store import ArtifactStore
+
+            authorization = run_session.get('authorization', {}) if isinstance(run_session, dict) else {}
+            mime_type = mimetypes.guess_type(host_path)[0] or 'text/plain'
+            size_bytes = os.path.getsize(host_path)
+            metadata = {
+                'tool_name': READ_TOOL_NAME,
+                'sandbox_path': sandbox_path,
+                'truncated_by': result.get('truncated_by'),
+                'start_line': result.get('start_line'),
+                'end_line': result.get('end_line'),
+                'next_offset': result.get('next_offset'),
+            }
+            artifact_id = await ArtifactStore(get_db_engine()).register_file_artifact(
+                artifact_id=None,
+                host_path=host_path,
+                host_root=host_root,
+                artifact_type='file',
+                source='tool',
+                mime_type=mime_type,
+                name=os.path.basename(host_path),
+                size_bytes=size_bytes,
+                conversation_id=authorization.get('conversation_id'),
+                run_id=run_session.get('run_id') if isinstance(run_session, dict) else None,
+                runner_id=run_session.get('runner_id') if isinstance(run_session, dict) else None,
+                bot_id=getattr(query, 'bot_uuid', None),
+                workspace_id=authorization.get('workspace_id'),
+                thread_id=authorization.get('thread_id'),
+                metadata=metadata,
+            )
+            artifact_ref = {
+                'artifact_id': artifact_id,
+                'artifact_type': 'file',
+                'mime_type': mime_type,
+                'name': os.path.basename(host_path),
+                'size_bytes': size_bytes,
+            }
+            enriched = dict(result)
+            enriched['preview'] = str(result.get('content') or '')
+            enriched['artifact_refs'] = [artifact_ref]
+            return enriched
+        except Exception as exc:
+            self.ap.logger.warning(f'Failed to register read artifact for {sandbox_path}: {exc}')
+            return result
+
+    @staticmethod
+    def _get_agent_run_session(query: pipeline_query.Query) -> dict | None:
+        session = getattr(query, '_agent_run_session', None)
+        return session if isinstance(session, dict) else None
+
+    def _normalize_exec_result(self, result: dict) -> dict:
+        normalized = dict(result)
+        stdout = str(normalized.get('stdout') or '')
+        stderr = str(normalized.get('stderr') or '')
+        stdout, stdout_capped = self._truncate_text_to_bytes_with_flag(stdout, _DEFAULT_TOOL_RESULT_MAX_BYTES)
+        stderr, stderr_capped = self._truncate_text_to_bytes_with_flag(stderr, _DEFAULT_TOOL_RESULT_MAX_BYTES)
+        normalized['stdout'] = stdout
+        normalized['stderr'] = stderr
+        normalized['stdout_truncated'] = bool(normalized.get('stdout_truncated') or stdout_capped)
+        normalized['stderr_truncated'] = bool(normalized.get('stderr_truncated') or stderr_capped)
+
+        if stdout and stderr:
+            preview_raw = f'stdout:\n{stdout}\n\nstderr:\n{stderr}'
+        else:
+            preview_raw = stdout or stderr
+        preview, preview_capped = self._truncate_text_to_bytes_with_flag(preview_raw, _DEFAULT_TOOL_RESULT_MAX_BYTES)
+        normalized['preview'] = preview
+        normalized['truncated'] = bool(
+            normalized['stdout_truncated'] or normalized['stderr_truncated'] or preview_capped
+        )
+        if preview_capped and not normalized.get('truncated_by'):
+            normalized['truncated_by'] = 'bytes'
+        return normalized
+
+    def _build_directory_result(self, entries: list[str]) -> dict:
+        sorted_entries = sorted(str(entry) for entry in entries)
+        content = '\n'.join(sorted_entries)
+        preview = self._truncate_text_to_bytes(content, _DEFAULT_TOOL_RESULT_MAX_BYTES)
+        truncated = preview != content
+        return {
+            'ok': True,
+            'content': preview,
+            'is_directory': True,
+            'total': len(sorted_entries),
+            'truncated': truncated,
+            'truncated_by': 'bytes' if truncated else None,
+        }
+
+    def _read_text_file_preview(self, host_path: str, parameters: dict) -> dict:
+        offset = self._positive_int(parameters.get('offset'), default=1)
+        max_lines = self._positive_int(
+            parameters.get('limit'),
+            default=_DEFAULT_READ_MAX_LINES,
+            max_value=_MAX_READ_MAX_LINES,
+        )
+        max_bytes = self._positive_int(
+            parameters.get('max_bytes'),
+            default=_DEFAULT_TOOL_RESULT_MAX_BYTES,
+            max_value=_DEFAULT_TOOL_RESULT_MAX_BYTES,
+        )
+        lines: list[str] = []
+        output_bytes = 0
+        end_line = offset - 1
+        truncated = False
+        truncated_by: str | None = None
+        next_offset: int | None = None
+
+        with open(host_path, 'r', encoding='utf-8', errors='replace') as f:
+            for line_number, line in enumerate(f, 1):
+                if line_number < offset:
+                    continue
+                if len(lines) >= max_lines:
+                    truncated = True
+                    truncated_by = 'lines'
+                    next_offset = line_number
+                    break
+
+                line_bytes = len(line.encode('utf-8'))
+                if output_bytes + line_bytes > max_bytes:
+                    truncated = True
+                    truncated_by = 'bytes'
+                    next_offset = line_number
+                    break
+
+                lines.append(line.rstrip('\n'))
+                output_bytes += line_bytes
+                end_line = line_number
+
+        if not lines and truncated_by == 'bytes':
+            content = (
+                f'[Line {next_offset or offset} exceeds the {self._format_size(max_bytes)} read limit. '
+                'Use exec with a byte-range command for this line, or read a different offset.]'
+            )
+        else:
+            content = '\n'.join(lines)
+
+        return {
+            'ok': True,
+            'content': content,
+            'truncated': truncated,
+            'truncated_by': truncated_by,
+            'start_line': offset,
+            'end_line': end_line,
+            'next_offset': next_offset,
+            'max_lines': max_lines,
+            'max_bytes': max_bytes,
+        }
+
+    def _build_read_result_from_text(self, content: str, parameters: dict) -> dict:
+        offset = self._positive_int(parameters.get('offset'), default=1)
+        max_lines = self._positive_int(
+            parameters.get('limit'),
+            default=_DEFAULT_READ_MAX_LINES,
+            max_value=_MAX_READ_MAX_LINES,
+        )
+        max_bytes = self._positive_int(
+            parameters.get('max_bytes'),
+            default=_DEFAULT_TOOL_RESULT_MAX_BYTES,
+            max_value=_DEFAULT_TOOL_RESULT_MAX_BYTES,
+        )
+        all_lines = content.splitlines()
+        start_index = offset - 1
+        if start_index >= len(all_lines) and all_lines:
+            return {'ok': False, 'error': f'Offset {offset} is beyond end of file ({len(all_lines)} lines total)'}
+        output_lines: list[str] = []
+        output_bytes = 0
+        truncated = False
+        truncated_by: str | None = None
+        next_offset: int | None = None
+        for index, line in enumerate(all_lines[start_index:], start_index + 1):
+            if len(output_lines) >= max_lines:
+                truncated = True
+                truncated_by = 'lines'
+                next_offset = index
+                break
+            line_bytes = len(line.encode('utf-8')) + (1 if output_lines else 0)
+            if output_bytes + line_bytes > max_bytes:
+                truncated = True
+                truncated_by = 'bytes'
+                next_offset = index
+                break
+            output_lines.append(line)
+            output_bytes += line_bytes
+
+        end_line = offset + len(output_lines) - 1
+        return {
+            'ok': True,
+            'content': '\n'.join(output_lines),
+            'truncated': truncated,
+            'truncated_by': truncated_by,
+            'start_line': offset,
+            'end_line': end_line,
+            'next_offset': next_offset,
+            'max_lines': max_lines,
+            'max_bytes': max_bytes,
+        }
+
+    @staticmethod
+    def _positive_int(value, *, default: int, max_value: int | None = None) -> int:
+        try:
+            parsed = int(value)
+        except (TypeError, ValueError):
+            parsed = default
+        if parsed <= 0:
+            parsed = default
+        if max_value is not None:
+            parsed = min(parsed, max_value)
+        return parsed
+
+    @staticmethod
+    def _truncate_grep_line(line: str) -> tuple[str, bool]:
+        if len(line) <= _GREP_MAX_LINE_CHARS:
+            return line, False
+        return f'{line[:_GREP_MAX_LINE_CHARS]}... [truncated]', True
+
+    @staticmethod
+    def _truncate_text_to_bytes(text: str, max_bytes: int) -> str:
+        return NativeToolLoader._truncate_text_to_bytes_with_flag(text, max_bytes)[0]
+
+    @staticmethod
+    def _truncate_text_to_bytes_with_flag(text: str, max_bytes: int) -> tuple[str, bool]:
+        data = text.encode('utf-8')
+        if len(data) <= max_bytes:
+            return text, False
+        truncated = data[:max_bytes]
+        while truncated and (truncated[-1] & 0xC0) == 0x80:
+            truncated = truncated[:-1]
+        return truncated.decode('utf-8', errors='ignore'), True
+
+    @staticmethod
+    def _format_size(bytes_count: int) -> str:
+        if bytes_count < 1024:
+            return f'{bytes_count}B'
+        return f'{bytes_count / 1024:.1f}KB'
+
     def _summarize_parameters(self, parameters: dict) -> dict:
         summary = dict(parameters)
         cmd = str(summary.get('command', '')).strip()
diff --git a/src/langbot/pkg/provider/tools/loaders/skill.py b/src/langbot/pkg/provider/tools/loaders/skill.py
index 9df94fd2..fbfd72b3 100644
--- a/src/langbot/pkg/provider/tools/loaders/skill.py
+++ b/src/langbot/pkg/provider/tools/loaders/skill.py
@@ -10,6 +10,7 @@ if typing.TYPE_CHECKING:
     from langbot_plugin.api.entities.events import pipeline_query
 
 ACTIVATED_SKILLS_KEY = '_activated_skills'
+ACTIVATED_SKILL_NAMES_STATE_KEY = 'host.activated_skills'
 PIPELINE_BOUND_SKILLS_KEY = '_pipeline_bound_skills'
 SKILL_MOUNT_PREFIX = '/workspace/.skills'
 _SKILL_MOUNT_PATTERN = re.compile(r'/workspace/\.skills/([A-Za-z0-9_-]+)')
@@ -72,6 +73,116 @@ def register_activated_skill(query: pipeline_query.Query, skill_data: dict) -> N
         activated[skill_name] = skill_data
 
 
+def _normalize_skill_names(value: typing.Any) -> list[str]:
+    if not isinstance(value, list):
+        return []
+
+    names: list[str] = []
+    for item in value:
+        skill_name = str(item or '').strip()
+        if skill_name and skill_name not in names:
+            names.append(skill_name)
+    return names
+
+
+def restore_activated_skills_from_state(
+    ap: app.Application,
+    query: pipeline_query.Query,
+    state: dict[str, dict[str, typing.Any]],
+) -> list[str]:
+    """Restore persisted activated skill names into Query variables.
+
+    The state value stores names only. Full skill metadata is rebuilt from the
+    current pipeline-visible skill cache so removed or unbound skills remain
+    unavailable to native exec/write/edit.
+    """
+    conversation_state = state.get('conversation', {}) if isinstance(state, dict) else {}
+    skill_names = _normalize_skill_names(conversation_state.get(ACTIVATED_SKILL_NAMES_STATE_KEY))
+    restored: list[str] = []
+    for skill_name in skill_names:
+        skill_data = get_visible_skill(ap, query, skill_name)
+        if skill_data is None:
+            continue
+        register_activated_skill(query, skill_data)
+        restored.append(skill_name)
+    return restored
+
+
+def _get_agent_run_authorization(query: pipeline_query.Query) -> dict[str, typing.Any] | None:
+    session = getattr(query, '_agent_run_session', None)
+    if not isinstance(session, dict):
+        return None
+    authorization = session.get('authorization')
+    return authorization if isinstance(authorization, dict) else None
+
+
+def _get_conversation_state_target(query: pipeline_query.Query) -> tuple[str, str, str, dict[str, typing.Any]] | None:
+    session = getattr(query, '_agent_run_session', None)
+    if not isinstance(session, dict):
+        return None
+
+    authorization = _get_agent_run_authorization(query)
+    if authorization is None:
+        return None
+
+    state_policy = authorization.get('state_policy') or {}
+    if not state_policy.get('enable_state', True):
+        return None
+    state_scopes = state_policy.get('state_scopes', ['conversation', 'actor'])
+    if 'conversation' not in state_scopes:
+        return None
+
+    state_context = authorization.get('state_context') or {}
+    scope_keys = state_context.get('scope_keys') or {}
+    scope_key = scope_keys.get('conversation')
+    if not scope_key:
+        return None
+
+    runner_id = str(session.get('runner_id') or 'unknown')
+    binding_identity = str(state_context.get('binding_identity') or 'unknown')
+    return scope_key, runner_id, binding_identity, state_context
+
+
+async def persist_activated_skill(ap: app.Application, query: pipeline_query.Query, skill_name: str) -> bool:
+    """Persist activated skill names for the current AgentRunner conversation.
+
+    Returns False when the call is outside an AgentRunner run or state policy
+    does not expose a conversation scope. The in-memory Query activation still
+    remains valid for the current turn.
+    """
+    target = _get_conversation_state_target(query)
+    if target is None:
+        return False
+
+    persistence_mgr = getattr(ap, 'persistence_mgr', None)
+    if persistence_mgr is None or not hasattr(persistence_mgr, 'get_db_engine'):
+        return False
+
+    from ....agent.runner.persistent_state_store import get_persistent_state_store
+
+    scope_key, runner_id, binding_identity, state_context = target
+    store = get_persistent_state_store(persistence_mgr.get_db_engine())
+    existing_names = _normalize_skill_names(await store.state_get(scope_key, ACTIVATED_SKILL_NAMES_STATE_KEY))
+    if skill_name not in existing_names:
+        existing_names.append(skill_name)
+
+    success, error = await store.state_set(
+        scope_key=scope_key,
+        state_key=ACTIVATED_SKILL_NAMES_STATE_KEY,
+        value=existing_names,
+        runner_id=runner_id,
+        binding_identity=binding_identity,
+        scope='conversation',
+        context=state_context,
+        logger=getattr(ap, 'logger', None),
+    )
+    if not success:
+        logger = getattr(ap, 'logger', None)
+        if logger is not None:
+            logger.warning(f'Failed to persist activated skill "{skill_name}": {error}')
+    return success
+
+
 def parse_skill_mount_path(sandbox_path: str) -> tuple[str | None, str]:
     normalized_path = str(sandbox_path or '/workspace').strip() or '/workspace'
     if normalized_path == SKILL_MOUNT_PREFIX:
diff --git a/src/langbot/pkg/provider/tools/loaders/skill_authoring.py b/src/langbot/pkg/provider/tools/loaders/skill_authoring.py
index 9d0fe6e9..cde4314b 100644
--- a/src/langbot/pkg/provider/tools/loaders/skill_authoring.py
+++ b/src/langbot/pkg/provider/tools/loaders/skill_authoring.py
@@ -6,6 +6,7 @@ import typing
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 
 from .. import loader
+from .availability import is_box_backend_available
 
 # Align with Claude Code's Skill tool design:
 # - activate: Activate a skill via Tool Call, returns SKILL.md content
@@ -45,18 +46,7 @@ class SkillToolLoader(loader.ToolLoader):
 
     async def _check_sandbox_available(self) -> bool:
         """Check if the box backend is truly available (not just the runtime)."""
-        box_service = getattr(self.ap, 'box_service', None)
-        if box_service is None:
-            return False
-        if not getattr(box_service, 'available', False):
-            return False
-        # Check if backend is truly available via get_status
-        try:
-            status = await box_service.get_status()
-            backend_info = status.get('backend', {})
-            return backend_info.get('available', False)
-        except Exception:
-            return False
+        return await is_box_backend_available(self.ap)
 
     async def get_tools(self, bound_plugins: list[str] | None = None) -> list[resource_tool.LLMTool]:
         if not self._is_available():
@@ -92,17 +82,17 @@ class SkillToolLoader(loader.ToolLoader):
         if not skill_name:
             raise ValueError('skill_name is required')
 
-        skill_mgr = self.ap.skill_mgr
-        skill_data = skill_mgr.get_skill_by_name(skill_name)
+        from . import skill as skill_loader
+
+        skill_data = skill_loader.get_visible_skill(self.ap, query, skill_name)
         if skill_data is None:
-            visible_skills = getattr(skill_mgr, 'skills', {})
+            visible_skills = skill_loader.get_visible_skills(self.ap, query)
             available_names = ', '.join(sorted(visible_skills.keys())) or 'none'
             raise ValueError(f'Skill "{skill_name}" not found. Available skills: {available_names}')
 
         # Register activated skill for sandbox mount path resolution
-        from . import skill as skill_loader
-
         skill_loader.register_activated_skill(query, skill_data)
+        await skill_loader.persist_activated_skill(self.ap, query, skill_name)
 
         # Return SKILL.md content as Tool Result (injects into context)
         instructions = skill_data.get('instructions', '')
@@ -201,13 +191,13 @@ class SkillToolLoader(loader.ToolLoader):
         return resource_tool.LLMTool(
             name=ACTIVATE_SKILL_TOOL_NAME,
             human_desc='Activate a skill',
-            description=self._build_activate_tool_description(),
+            description='Activate a pipeline-visible skill by name and return its instructions as a tool result.',
             parameters={
                 'type': 'object',
                 'properties': {
                     'skill_name': {
                         'type': 'string',
-                        'description': 'The skill name to activate (no arguments). E.g., "pdf" or "data-analysis"',
+                        'description': 'The skill name to activate.',
                     },
                 },
                 'required': ['skill_name'],
@@ -255,50 +245,3 @@ class SkillToolLoader(loader.ToolLoader):
             },
             func=lambda parameters: parameters,
         )
-
-    def _build_activate_tool_description(self) -> str:
-        """Build tool description with embedded available_skills list."""
-        skill_mgr = getattr(self.ap, 'skill_mgr', None)
-        if skill_mgr is None:
-            return 'Activate a skill. No skills are currently available.'
-
-        skills = getattr(skill_mgr, 'skills', {})
-        if not skills:
-            return 'Activate a skill. No skills are currently available.'
-
-        # Build <available_skills> section
-        available_skills_lines = ['<available_skills>']
-        for skill_name, skill_data in sorted(skills.items()):
-            description = skill_data.get('description', '')
-            available_skills_lines.append('<skill>')
-            available_skills_lines.append(f'<name>{skill_name}</name>')
-            available_skills_lines.append(f'<description>{description}</description>')
-            available_skills_lines.append('</skill>')
-        available_skills_lines.append('</available_skills>')
-
-        available_skills_block = '\n'.join(available_skills_lines)
-
-        return f"""Activate a skill within the main conversation.
-
-<skills_instructions>
-When users ask you to perform tasks, check if any of the available skills
-below can help complete the task more effectively. Skills provide specialized
-capabilities and domain knowledge.
-
-How to use skills:
-- Invoke skills using this tool with the skill name only (no arguments)
-- When you invoke a skill, you will see <command-message>
-The skill is activated
-</command-message>
-- The skill's instructions will be provided in the tool result
-- Examples:
-  - skill_name: "pdf" - invoke the pdf skill
-  - skill_name: "data-analysis" - invoke the data-analysis skill
-
-Important:
-- Only use skills listed in <available_skills> below
-- Do not invoke a skill that is already running
-- To create a new skill: prepare it in /workspace, then use register_skill tool
-</skills_instructions>
-
-{available_skills_block}"""
diff --git a/tests/unit_tests/provider/test_mcp_box_integration.py b/tests/unit_tests/provider/test_mcp_box_integration.py
index 3e3a7a4d..db1aa542 100644
--- a/tests/unit_tests/provider/test_mcp_box_integration.py
+++ b/tests/unit_tests/provider/test_mcp_box_integration.py
@@ -180,7 +180,7 @@ class TestMCPServerBoxConfig:
         assert cfg.host_path is None
         assert cfg.host_path_mode == 'ro'
         assert cfg.env == {}
-        assert cfg.startup_timeout_sec == 120
+        assert cfg.startup_timeout_sec == 300
         assert cfg.cpus is None
         assert cfg.memory_mb is None
         assert cfg.pids_limit is None
@@ -494,6 +494,52 @@ class TestBuildBoxProcessPayload:
         assert payload['args'] == ['/opt/other/server.py', '--flag']
 
 
+# ── Python Workspace Preparation ────────────────────────────────────
+
+
+class TestPythonWorkspacePreparation:
+    def test_requirements_workspace_uses_venv_bootstrap(self, mcp_module, tmp_path):
+        host_path = tmp_path / 'mcp-source'
+        host_path.mkdir()
+        (host_path / 'requirements.txt').write_text('mcp==1.26.0\n', encoding='utf-8')
+
+        command = mcp_module.BoxStdioSessionRuntime.detect_install_command(
+            str(host_path),
+            '/workspace/.mcp/u1/workspace',
+        )
+
+        assert command is not None
+        assert 'python -m venv "$_LB_VENV_DIR"' in command
+        assert 'python -m pip install -r "/workspace/.mcp/u1/workspace/requirements.txt"' in command
+        assert 'pip install --no-cache-dir -r' not in command
+
+    def test_staging_refresh_removes_stale_source_files_but_preserves_runtime_dirs(self, mcp_module, tmp_path):
+        source = tmp_path / 'source'
+        source.mkdir()
+        (source / 'server.py').write_text('print("new")\n', encoding='utf-8')
+        (source / 'requirements.txt').write_text('mcp==1.26.0\n', encoding='utf-8')
+
+        process_root = tmp_path / 'shared' / '.mcp' / 'u1'
+        workspace = process_root / 'workspace'
+        (workspace / '.venv' / 'bin').mkdir(parents=True)
+        (workspace / '.venv' / 'bin' / 'python').write_text('', encoding='utf-8')
+        (workspace / '.langbot').mkdir()
+        (workspace / '.langbot' / 'python-env.lock').mkdir()
+        (workspace / 'server.py').write_text('print("old")\n', encoding='utf-8')
+        (workspace / 'removed.py').write_text('stale\n', encoding='utf-8')
+        (workspace / 'removed_dir').mkdir()
+        (workspace / 'removed_dir' / 'old.txt').write_text('stale\n', encoding='utf-8')
+
+        mcp_module.BoxStdioSessionRuntime._copy_workspace_tree(str(source), str(process_root), str(workspace))
+
+        assert (workspace / 'server.py').read_text(encoding='utf-8') == 'print("new")\n'
+        assert (workspace / 'requirements.txt').read_text(encoding='utf-8') == 'mcp==1.26.0\n'
+        assert not (workspace / 'removed.py').exists()
+        assert not (workspace / 'removed_dir').exists()
+        assert (workspace / '.venv' / 'bin' / 'python').exists()
+        assert (workspace / '.langbot' / 'python-env.lock').is_dir()
+
+
 # ── get_runtime_info_dict ───────────────────────────────────────────
 
 
diff --git a/tests/unit_tests/provider/test_skill_tools.py b/tests/unit_tests/provider/test_skill_tools.py
index 847480c1..96316ee5 100644
--- a/tests/unit_tests/provider/test_skill_tools.py
+++ b/tests/unit_tests/provider/test_skill_tools.py
@@ -456,7 +456,9 @@ class TestNativeToolLoaderSkillPaths:
                 SimpleNamespace(query_id='q1', variables={PIPELINE_BOUND_SKILLS_KEY: ['demo']}),
             )
 
-            assert result == {'ok': True, 'content': 'demo instructions'}
+            assert result['ok'] is True
+            assert result['content'] == 'demo instructions'
+            assert result['truncated'] is False
 
     @pytest.mark.asyncio
     async def test_exec_in_activated_skill_mount_rewrites_command_and_refreshes(self):
@@ -485,7 +487,7 @@ class TestNativeToolLoaderSkillPaths:
                 query,
             )
 
-            assert result == {'ok': True}
+            assert result['ok'] is True
             tool_parameters = ap.box_service.execute_tool.await_args.args[0]
             assert tool_parameters['command'] == 'python /workspace/.skills/demo/scripts/run.py'
             assert tool_parameters['workdir'] == '/workspace/.skills/demo'

From 7b67dcc302b62cb5329719903566d6ff03496282 Mon Sep 17 00:00:00 2001
From: huanghuoguoguo <60681390+huanghuoguoguo@users.noreply.github.com>
Date: Sun, 14 Jun 2026 11:23:09 +0800
Subject: [PATCH 10/12] fix(tools): bootstrap Python workspaces with available
 interpreter

---
 src/langbot/pkg/box/workspace.py                      | 10 ++++++++--
 tests/unit_tests/provider/test_mcp_box_integration.py |  3 ++-
 tests/unit_tests/provider/test_skill_tools.py         |  3 ++-
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/langbot/pkg/box/workspace.py b/src/langbot/pkg/box/workspace.py
index 948622ef..06df6b22 100644
--- a/src/langbot/pkg/box/workspace.py
+++ b/src/langbot/pkg/box/workspace.py
@@ -151,8 +151,14 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
         export TMP="$_LB_TMP_DIR"
         export PIP_CACHE_DIR="$_LB_PIP_CACHE_DIR"
 
+        _LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"
+        if [ -z "$_LB_SYSTEM_PYTHON" ]; then
+          echo "No Python interpreter found for workspace bootstrap" >&2
+          exit 1
+        fi
+
         _lb_python_meta() {{
-          python - <<'PY'
+          "$_LB_SYSTEM_PYTHON" - <<'PY'
         import hashlib
         import json
         import os
@@ -225,7 +231,7 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
 
           if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
             rm -rf "$_LB_VENV_DIR"
-            python -m venv "$_LB_VENV_DIR"
+            "$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"
             . "$_LB_VENV_DIR/bin/activate"
             python -m pip install --upgrade pip setuptools wheel
             if [ -f "{mount_path}/requirements.txt" ]; then
diff --git a/tests/unit_tests/provider/test_mcp_box_integration.py b/tests/unit_tests/provider/test_mcp_box_integration.py
index db1aa542..c7e080c0 100644
--- a/tests/unit_tests/provider/test_mcp_box_integration.py
+++ b/tests/unit_tests/provider/test_mcp_box_integration.py
@@ -509,7 +509,8 @@ class TestPythonWorkspacePreparation:
         )
 
         assert command is not None
-        assert 'python -m venv "$_LB_VENV_DIR"' in command
+        assert '_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"' in command
+        assert '"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"' in command
         assert 'python -m pip install -r "/workspace/.mcp/u1/workspace/requirements.txt"' in command
         assert 'pip install --no-cache-dir -r' not in command
 
diff --git a/tests/unit_tests/provider/test_skill_tools.py b/tests/unit_tests/provider/test_skill_tools.py
index 96316ee5..7a9cde66 100644
--- a/tests/unit_tests/provider/test_skill_tools.py
+++ b/tests/unit_tests/provider/test_skill_tools.py
@@ -245,7 +245,8 @@ class TestSkillPathHelpers:
 
         command = wrap_skill_command_with_python_env('python scripts/run.py')
 
-        assert 'python -m venv "$_LB_VENV_DIR"' in command
+        assert '_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"' in command
+        assert '"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"' in command
         assert 'export VIRTUAL_ENV="$_LB_VENV_DIR"' in command
         assert command.rstrip().endswith('python scripts/run.py')
 

From 64b7e9c509a6aa7981fe0421c096163345721b5c Mon Sep 17 00:00:00 2001
From: huanghuoguoguo <60681390+huanghuoguoguo@users.noreply.github.com>
Date: Sun, 14 Jun 2026 11:25:09 +0800
Subject: [PATCH 11/12] fix(tools): clear stale Python workspace env locks

---
 src/langbot/pkg/box/workspace.py | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/langbot/pkg/box/workspace.py b/src/langbot/pkg/box/workspace.py
index 06df6b22..41fca039 100644
--- a/src/langbot/pkg/box/workspace.py
+++ b/src/langbot/pkg/box/workspace.py
@@ -146,17 +146,17 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
         _LB_PIP_CACHE_DIR="{mount_path}/.cache/pip"
 
         mkdir -p "$_LB_META_DIR" "$_LB_TMP_DIR" "$_LB_PIP_CACHE_DIR"
+        _LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"
+        if [ -z "$_LB_SYSTEM_PYTHON" ]; then
+          echo "python3 or python is required to prepare the workspace Python environment" >&2
+          exit 127
+        fi
+
         export TMPDIR="$_LB_TMP_DIR"
         export TEMP="$_LB_TMP_DIR"
         export TMP="$_LB_TMP_DIR"
         export PIP_CACHE_DIR="$_LB_PIP_CACHE_DIR"
 
-        _LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"
-        if [ -z "$_LB_SYSTEM_PYTHON" ]; then
-          echo "No Python interpreter found for workspace bootstrap" >&2
-          exit 1
-        fi
-
         _lb_python_meta() {{
           "$_LB_SYSTEM_PYTHON" - <<'PY'
         import hashlib
@@ -204,18 +204,29 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
         fi
 
         if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
+          if [ -d "$_LB_LOCK_DIR" ] && [ ! -f "$_LB_LOCK_DIR/pid" ]; then
+            echo "Clearing stale Python environment lock without owner: $_LB_LOCK_DIR" >&2
+            rm -rf "$_LB_LOCK_DIR" 2>/dev/null || true
+          fi
+
           _LB_LOCK_WAIT=0
           while ! mkdir "$_LB_LOCK_DIR" 2>/dev/null; do
             if [ "$_LB_LOCK_WAIT" -ge 120 ]; then
+              echo "Timed out waiting for Python environment lock, clearing stale lock: $_LB_LOCK_DIR" >&2
+              rm -rf "$_LB_LOCK_DIR" 2>/dev/null || true
+              if mkdir "$_LB_LOCK_DIR" 2>/dev/null; then
+                break
+              fi
               echo "Timed out waiting for Python environment lock: $_LB_LOCK_DIR" >&2
               exit 1
             fi
             sleep 1
             _LB_LOCK_WAIT=$((_LB_LOCK_WAIT + 1))
           done
+          printf '%s\\n' "$$" > "$_LB_LOCK_DIR/pid" 2>/dev/null || true
 
           _lb_cleanup_lock() {{
-            rmdir "$_LB_LOCK_DIR" >/dev/null 2>&1 || true
+            rm -rf "$_LB_LOCK_DIR" >/dev/null 2>&1 || true
           }}
           trap _lb_cleanup_lock EXIT INT TERM
 

From 9fa3251f3d3a58067753fd9ed34b8152d2e722bd Mon Sep 17 00:00:00 2001
From: huanghuoguoguo <60681390+huanghuoguoguo@users.noreply.github.com>
Date: Sun, 14 Jun 2026 21:15:21 +0800
Subject: [PATCH 12/12] fix(tools): decouple runtime from agent runner

---
 src/langbot/pkg/box/workspace.py              |  10 +-
 .../pkg/provider/tools/loaders/mcp_stdio.py   |  14 +--
 .../pkg/provider/tools/loaders/native.py      |  90 +---------------
 .../pkg/provider/tools/loaders/skill.py       | 100 +++---------------
 .../provider/tools/loaders/skill_authoring.py |   2 +-
 tests/unit_tests/box/test_workspace.py        |   4 +-
 .../provider/test_mcp_box_integration.py      |   3 +
 tests/unit_tests/provider/test_skill_tools.py |  24 +++++
 8 files changed, 57 insertions(+), 190 deletions(-)

diff --git a/src/langbot/pkg/box/workspace.py b/src/langbot/pkg/box/workspace.py
index 41fca039..26d1a41e 100644
--- a/src/langbot/pkg/box/workspace.py
+++ b/src/langbot/pkg/box/workspace.py
@@ -204,14 +204,14 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
         fi
 
         if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
-          if [ -d "$_LB_LOCK_DIR" ] && [ ! -f "$_LB_LOCK_DIR/pid" ]; then
-            echo "Clearing stale Python environment lock without owner: $_LB_LOCK_DIR" >&2
-            rm -rf "$_LB_LOCK_DIR" 2>/dev/null || true
-          fi
-
           _LB_LOCK_WAIT=0
           while ! mkdir "$_LB_LOCK_DIR" 2>/dev/null; do
             if [ "$_LB_LOCK_WAIT" -ge 120 ]; then
+              _LB_LOCK_OWNER="$(cat "$_LB_LOCK_DIR/pid" 2>/dev/null || true)"
+              if [ -n "$_LB_LOCK_OWNER" ] && kill -0 "$_LB_LOCK_OWNER" 2>/dev/null; then
+                echo "Timed out waiting for active Python environment lock: $_LB_LOCK_DIR" >&2
+                exit 1
+              fi
               echo "Timed out waiting for Python environment lock, clearing stale lock: $_LB_LOCK_DIR" >&2
               rm -rf "$_LB_LOCK_DIR" 2>/dev/null || true
               if mkdir "$_LB_LOCK_DIR" 2>/dev/null; then
diff --git a/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py
index 736dacea..b74b077b 100644
--- a/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py
+++ b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py
@@ -276,7 +276,7 @@ class BoxStdioSessionRuntime:
         # to delete them, so refresh source files in place and preserve runtime
         # directories instead of rmtree'ing the whole staging root.
         with _workspace_copy_lock(process_host_root):
-            preserved_names = {'.venv', 'venv', 'env', '.env', '.cache', '.tmp', '.langbot'}
+            preserved_names = {'.venv', 'venv', 'env', '.cache', '.tmp', '.langbot'}
             os.makedirs(process_host_workspace, exist_ok=True)
             for name in os.listdir(process_host_workspace):
                 if name in preserved_names:
@@ -288,6 +288,7 @@ class BoxStdioSessionRuntime:
                     try:
                         os.unlink(path)
                     except FileNotFoundError:
+                        # The entry may disappear between listdir and unlink if cleanup races us.
                         pass
             shutil.copytree(
                 source_path,
@@ -303,7 +304,6 @@ class BoxStdioSessionRuntime:
                     '.venv',
                     'venv',
                     'env',
-                    '.env',
                     '.cache',
                     '.tmp',
                     '.langbot',
@@ -401,18 +401,12 @@ class BoxStdioSessionRuntime:
         workspace_root = workspace_path.rstrip('/') or '/workspace'
         venv_dir = f'{workspace_root}/.venv'
         venv_bin = f'{venv_dir}/bin'
-        command = ' '.join(
-            [shlex.quote(payload['command']), *[shlex.quote(arg) for arg in payload.get('args', [])]]
-        )
+        command = ' '.join([shlex.quote(payload['command']), *[shlex.quote(arg) for arg in payload.get('args', [])]])
         wrapped = dict(payload)
         wrapped['command'] = 'sh'
         wrapped['args'] = [
             '-lc',
-            (
-                f'export VIRTUAL_ENV={shlex.quote(venv_dir)}; '
-                f'export PATH={shlex.quote(venv_bin)}:$PATH; '
-                f'exec {command}'
-            ),
+            (f'export VIRTUAL_ENV={shlex.quote(venv_dir)}; export PATH={shlex.quote(venv_bin)}:$PATH; exec {command}'),
         ]
         return wrapped
 
diff --git a/src/langbot/pkg/provider/tools/loaders/native.py b/src/langbot/pkg/provider/tools/loaders/native.py
index bf9f357f..f10335e6 100644
--- a/src/langbot/pkg/provider/tools/loaders/native.py
+++ b/src/langbot/pkg/provider/tools/loaders/native.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import json
-import mimetypes
 import os
 
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
@@ -478,9 +477,7 @@ else:
             if host_path and os.path.exists(host_path):
                 if os.path.isdir(host_path):
                     return self._build_directory_result(os.listdir(host_path))
-                result = self._read_text_file_preview(host_path, parameters)
-                host_root = str(selected_skill.get('package_root', '') or '')
-                return await self._attach_file_artifact_ref(result, host_path, host_root, path, query)
+                return self._read_text_file_preview(host_path, parameters)
 
             try:
                 result = await self.ap.box_service.read_skill_file(selected_skill['name'], relative)
@@ -506,9 +503,7 @@ else:
         if os.path.isdir(host_path):
             entries = os.listdir(host_path)
             return self._build_directory_result(entries)
-        result = self._read_text_file_preview(host_path, parameters)
-        host_root = self._get_host_root(selected_skill)
-        return await self._attach_file_artifact_ref(result, host_path, host_root, path, query)
+        return self._read_text_file_preview(host_path, parameters)
 
     async def _invoke_write(self, parameters: dict, query: pipeline_query.Query) -> dict:
         path = parameters['path']
@@ -695,8 +690,7 @@ else:
                     'max_bytes': {
                         'type': 'integer',
                         'description': (
-                            'Maximum bytes of file content to return. '
-                            f'Defaults to {_DEFAULT_TOOL_RESULT_MAX_BYTES}.'
+                            f'Maximum bytes of file content to return. Defaults to {_DEFAULT_TOOL_RESULT_MAX_BYTES}.'
                         ),
                         'default': _DEFAULT_TOOL_RESULT_MAX_BYTES,
                         'minimum': 1,
@@ -984,84 +978,6 @@ else:
             raise ValueError('Path escapes the skill package boundary.')
         return host_path
 
-    def _get_host_root(self, selected_skill: dict | None) -> str:
-        if selected_skill is not None:
-            return str(selected_skill.get('package_root', '') or '')
-        return str(getattr(self.ap.box_service, 'default_workspace', '') or '')
-
-    async def _attach_file_artifact_ref(
-        self,
-        result: dict,
-        host_path: str,
-        host_root: str,
-        sandbox_path: str,
-        query: pipeline_query.Query,
-    ) -> dict:
-        if not result.get('ok') or not result.get('truncated') or result.get('artifact_refs'):
-            return result
-        if not host_root or not os.path.isfile(host_path):
-            return result
-
-        run_session = self._get_agent_run_session(query)
-        if not run_session:
-            return result
-
-        persistence_mgr = getattr(self.ap, 'persistence_mgr', None)
-        get_db_engine = getattr(persistence_mgr, 'get_db_engine', None)
-        if not callable(get_db_engine):
-            return result
-
-        try:
-            from langbot.pkg.agent.runner.artifact_store import ArtifactStore
-
-            authorization = run_session.get('authorization', {}) if isinstance(run_session, dict) else {}
-            mime_type = mimetypes.guess_type(host_path)[0] or 'text/plain'
-            size_bytes = os.path.getsize(host_path)
-            metadata = {
-                'tool_name': READ_TOOL_NAME,
-                'sandbox_path': sandbox_path,
-                'truncated_by': result.get('truncated_by'),
-                'start_line': result.get('start_line'),
-                'end_line': result.get('end_line'),
-                'next_offset': result.get('next_offset'),
-            }
-            artifact_id = await ArtifactStore(get_db_engine()).register_file_artifact(
-                artifact_id=None,
-                host_path=host_path,
-                host_root=host_root,
-                artifact_type='file',
-                source='tool',
-                mime_type=mime_type,
-                name=os.path.basename(host_path),
-                size_bytes=size_bytes,
-                conversation_id=authorization.get('conversation_id'),
-                run_id=run_session.get('run_id') if isinstance(run_session, dict) else None,
-                runner_id=run_session.get('runner_id') if isinstance(run_session, dict) else None,
-                bot_id=getattr(query, 'bot_uuid', None),
-                workspace_id=authorization.get('workspace_id'),
-                thread_id=authorization.get('thread_id'),
-                metadata=metadata,
-            )
-            artifact_ref = {
-                'artifact_id': artifact_id,
-                'artifact_type': 'file',
-                'mime_type': mime_type,
-                'name': os.path.basename(host_path),
-                'size_bytes': size_bytes,
-            }
-            enriched = dict(result)
-            enriched['preview'] = str(result.get('content') or '')
-            enriched['artifact_refs'] = [artifact_ref]
-            return enriched
-        except Exception as exc:
-            self.ap.logger.warning(f'Failed to register read artifact for {sandbox_path}: {exc}')
-            return result
-
-    @staticmethod
-    def _get_agent_run_session(query: pipeline_query.Query) -> dict | None:
-        session = getattr(query, '_agent_run_session', None)
-        return session if isinstance(session, dict) else None
-
     def _normalize_exec_result(self, result: dict) -> dict:
         normalized = dict(result)
         stdout = str(normalized.get('stdout') or '')
diff --git a/src/langbot/pkg/provider/tools/loaders/skill.py b/src/langbot/pkg/provider/tools/loaders/skill.py
index fbfd72b3..b62f3e7d 100644
--- a/src/langbot/pkg/provider/tools/loaders/skill.py
+++ b/src/langbot/pkg/provider/tools/loaders/skill.py
@@ -10,7 +10,6 @@ if typing.TYPE_CHECKING:
     from langbot_plugin.api.entities.events import pipeline_query
 
 ACTIVATED_SKILLS_KEY = '_activated_skills'
-ACTIVATED_SKILL_NAMES_STATE_KEY = 'host.activated_skills'
 PIPELINE_BOUND_SKILLS_KEY = '_pipeline_bound_skills'
 SKILL_MOUNT_PREFIX = '/workspace/.skills'
 _SKILL_MOUNT_PATTERN = re.compile(r'/workspace/\.skills/([A-Za-z0-9_-]+)')
@@ -73,7 +72,8 @@ def register_activated_skill(query: pipeline_query.Query, skill_data: dict) -> N
         activated[skill_name] = skill_data
 
 
-def _normalize_skill_names(value: typing.Any) -> list[str]:
+def normalize_skill_names(value: typing.Any) -> list[str]:
+    """Return a de-duplicated list of non-empty skill names."""
     if not isinstance(value, list):
         return []
 
@@ -85,21 +85,24 @@ def _normalize_skill_names(value: typing.Any) -> list[str]:
     return names
 
 
-def restore_activated_skills_from_state(
+def get_activated_skill_names(query: pipeline_query.Query) -> list[str]:
+    """Return activated skill names for callers that own persistence policy."""
+    return normalize_skill_names(list(get_activated_skills(query).keys()))
+
+
+def restore_activated_skills(
     ap: app.Application,
     query: pipeline_query.Query,
-    state: dict[str, dict[str, typing.Any]],
+    skill_names: typing.Any,
 ) -> list[str]:
-    """Restore persisted activated skill names into Query variables.
+    """Restore caller-provided activated skill names into Query variables.
 
-    The state value stores names only. Full skill metadata is rebuilt from the
-    current pipeline-visible skill cache so removed or unbound skills remain
-    unavailable to native exec/write/edit.
+    Persistence and state scope ownership belong to higher-level flows. This
+    helper only rebuilds current Query state from pipeline-visible skills, so
+    removed or unbound skills stay unavailable to native exec/write/edit.
     """
-    conversation_state = state.get('conversation', {}) if isinstance(state, dict) else {}
-    skill_names = _normalize_skill_names(conversation_state.get(ACTIVATED_SKILL_NAMES_STATE_KEY))
     restored: list[str] = []
-    for skill_name in skill_names:
+    for skill_name in normalize_skill_names(skill_names):
         skill_data = get_visible_skill(ap, query, skill_name)
         if skill_data is None:
             continue
@@ -108,81 +111,6 @@ def restore_activated_skills_from_state(
     return restored
 
 
-def _get_agent_run_authorization(query: pipeline_query.Query) -> dict[str, typing.Any] | None:
-    session = getattr(query, '_agent_run_session', None)
-    if not isinstance(session, dict):
-        return None
-    authorization = session.get('authorization')
-    return authorization if isinstance(authorization, dict) else None
-
-
-def _get_conversation_state_target(query: pipeline_query.Query) -> tuple[str, str, str, dict[str, typing.Any]] | None:
-    session = getattr(query, '_agent_run_session', None)
-    if not isinstance(session, dict):
-        return None
-
-    authorization = _get_agent_run_authorization(query)
-    if authorization is None:
-        return None
-
-    state_policy = authorization.get('state_policy') or {}
-    if not state_policy.get('enable_state', True):
-        return None
-    state_scopes = state_policy.get('state_scopes', ['conversation', 'actor'])
-    if 'conversation' not in state_scopes:
-        return None
-
-    state_context = authorization.get('state_context') or {}
-    scope_keys = state_context.get('scope_keys') or {}
-    scope_key = scope_keys.get('conversation')
-    if not scope_key:
-        return None
-
-    runner_id = str(session.get('runner_id') or 'unknown')
-    binding_identity = str(state_context.get('binding_identity') or 'unknown')
-    return scope_key, runner_id, binding_identity, state_context
-
-
-async def persist_activated_skill(ap: app.Application, query: pipeline_query.Query, skill_name: str) -> bool:
-    """Persist activated skill names for the current AgentRunner conversation.
-
-    Returns False when the call is outside an AgentRunner run or state policy
-    does not expose a conversation scope. The in-memory Query activation still
-    remains valid for the current turn.
-    """
-    target = _get_conversation_state_target(query)
-    if target is None:
-        return False
-
-    persistence_mgr = getattr(ap, 'persistence_mgr', None)
-    if persistence_mgr is None or not hasattr(persistence_mgr, 'get_db_engine'):
-        return False
-
-    from ....agent.runner.persistent_state_store import get_persistent_state_store
-
-    scope_key, runner_id, binding_identity, state_context = target
-    store = get_persistent_state_store(persistence_mgr.get_db_engine())
-    existing_names = _normalize_skill_names(await store.state_get(scope_key, ACTIVATED_SKILL_NAMES_STATE_KEY))
-    if skill_name not in existing_names:
-        existing_names.append(skill_name)
-
-    success, error = await store.state_set(
-        scope_key=scope_key,
-        state_key=ACTIVATED_SKILL_NAMES_STATE_KEY,
-        value=existing_names,
-        runner_id=runner_id,
-        binding_identity=binding_identity,
-        scope='conversation',
-        context=state_context,
-        logger=getattr(ap, 'logger', None),
-    )
-    if not success:
-        logger = getattr(ap, 'logger', None)
-        if logger is not None:
-            logger.warning(f'Failed to persist activated skill "{skill_name}": {error}')
-    return success
-
-
 def parse_skill_mount_path(sandbox_path: str) -> tuple[str | None, str]:
     normalized_path = str(sandbox_path or '/workspace').strip() or '/workspace'
     if normalized_path == SKILL_MOUNT_PREFIX:
diff --git a/src/langbot/pkg/provider/tools/loaders/skill_authoring.py b/src/langbot/pkg/provider/tools/loaders/skill_authoring.py
index cde4314b..d5372178 100644
--- a/src/langbot/pkg/provider/tools/loaders/skill_authoring.py
+++ b/src/langbot/pkg/provider/tools/loaders/skill_authoring.py
@@ -92,7 +92,6 @@ class SkillToolLoader(loader.ToolLoader):
 
         # Register activated skill for sandbox mount path resolution
         skill_loader.register_activated_skill(query, skill_data)
-        await skill_loader.persist_activated_skill(self.ap, query, skill_name)
 
         # Return SKILL.md content as Tool Result (injects into context)
         instructions = skill_data.get('instructions', '')
@@ -117,6 +116,7 @@ class SkillToolLoader(loader.ToolLoader):
             'activated': True,
             'skill_name': skill_name,
             'mount_path': mount_path,
+            'activated_skill_names': skill_loader.get_activated_skill_names(query),
             'content': result_content,
         }
 
diff --git a/tests/unit_tests/box/test_workspace.py b/tests/unit_tests/box/test_workspace.py
index 809347e5..e4620ad3 100644
--- a/tests/unit_tests/box/test_workspace.py
+++ b/tests/unit_tests/box/test_workspace.py
@@ -54,7 +54,9 @@ def test_classify_python_workspace_detects_package_and_requirements():
 def test_wrap_python_command_with_env_contains_bootstrap_and_command():
     command = wrap_python_command_with_env('python script.py')
 
-    assert 'python -m venv "$_LB_VENV_DIR"' in command
+    assert '_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"' in command
+    assert '"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"' in command
+    assert 'kill -0 "$_LB_LOCK_OWNER"' in command
     assert 'export VIRTUAL_ENV="$_LB_VENV_DIR"' in command
     assert command.rstrip().endswith('python script.py')
 
diff --git a/tests/unit_tests/provider/test_mcp_box_integration.py b/tests/unit_tests/provider/test_mcp_box_integration.py
index c7e080c0..598f7dce 100644
--- a/tests/unit_tests/provider/test_mcp_box_integration.py
+++ b/tests/unit_tests/provider/test_mcp_box_integration.py
@@ -519,6 +519,7 @@ class TestPythonWorkspacePreparation:
         source.mkdir()
         (source / 'server.py').write_text('print("new")\n', encoding='utf-8')
         (source / 'requirements.txt').write_text('mcp==1.26.0\n', encoding='utf-8')
+        (source / '.env').write_text('TOKEN=new\n', encoding='utf-8')
 
         process_root = tmp_path / 'shared' / '.mcp' / 'u1'
         workspace = process_root / 'workspace'
@@ -526,6 +527,7 @@ class TestPythonWorkspacePreparation:
         (workspace / '.venv' / 'bin' / 'python').write_text('', encoding='utf-8')
         (workspace / '.langbot').mkdir()
         (workspace / '.langbot' / 'python-env.lock').mkdir()
+        (workspace / '.env').write_text('TOKEN=old\n', encoding='utf-8')
         (workspace / 'server.py').write_text('print("old")\n', encoding='utf-8')
         (workspace / 'removed.py').write_text('stale\n', encoding='utf-8')
         (workspace / 'removed_dir').mkdir()
@@ -535,6 +537,7 @@ class TestPythonWorkspacePreparation:
 
         assert (workspace / 'server.py').read_text(encoding='utf-8') == 'print("new")\n'
         assert (workspace / 'requirements.txt').read_text(encoding='utf-8') == 'mcp==1.26.0\n'
+        assert (workspace / '.env').read_text(encoding='utf-8') == 'TOKEN=new\n'
         assert not (workspace / 'removed.py').exists()
         assert not (workspace / 'removed_dir').exists()
         assert (workspace / '.venv' / 'bin' / 'python').exists()
diff --git a/tests/unit_tests/provider/test_skill_tools.py b/tests/unit_tests/provider/test_skill_tools.py
index 7a9cde66..9db7b945 100644
--- a/tests/unit_tests/provider/test_skill_tools.py
+++ b/tests/unit_tests/provider/test_skill_tools.py
@@ -193,6 +193,29 @@ class TestSkillPathHelpers:
 
         assert list(result.keys()) == ['visible']
 
+    def test_restore_activated_skills_uses_caller_provided_names_and_visibility(self):
+        from langbot.pkg.provider.tools.loaders.skill import (
+            ACTIVATED_SKILLS_KEY,
+            PIPELINE_BOUND_SKILLS_KEY,
+            get_activated_skill_names,
+            restore_activated_skills,
+        )
+
+        ap = _make_ap()
+        ap.skill_mgr = SimpleNamespace(
+            skills={
+                'visible': _make_skill_data(name='visible'),
+                'hidden': _make_skill_data(name='hidden'),
+            }
+        )
+        query = SimpleNamespace(variables={PIPELINE_BOUND_SKILLS_KEY: ['visible']})
+
+        restored = restore_activated_skills(ap, query, ['visible', 'hidden', 'visible', ''])
+
+        assert restored == ['visible']
+        assert list(query.variables[ACTIVATED_SKILLS_KEY].keys()) == ['visible']
+        assert get_activated_skill_names(query) == ['visible']
+
     def test_resolve_virtual_skill_path_allows_visible_skill_reads(self):
         from langbot.pkg.provider.tools.loaders.skill import (
             PIPELINE_BOUND_SKILLS_KEY,
@@ -282,6 +305,7 @@ class TestSkillToolLoader:
         assert result['activated'] is True
         assert result['skill_name'] == 'demo'
         assert result['mount_path'] == '/workspace/.skills/demo'
+        assert result['activated_skill_names'] == ['demo']
         assert 'Step 1' in result['content']
         assert set(query.variables[ACTIVATED_SKILLS_KEY].keys()) == {'demo'}