mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-27 07:54:19 +00:00
feat(box): bidirectional attachment transfer for sandbox (#2257)
* feat(box): bidirectional attachment transfer for sandbox Materialize inbound attachments into the sandbox workspace so agents can process user-sent files, and collect agent-produced files from the outbox to attach them back to the reply. - box(service): add materialize_inbound_attachments / collect_outbound attachments. Prefer direct host-filesystem read/write on the bind-mounted workspace (no size limit), falling back to chunked exec only for non-shared backends (e2b/remote). Clear per-query inbox/outbox dirs at turn start to avoid query_id-reuse collisions. - provider(localagent): inject inbound attachment descriptors into the sandbox and append a system note telling the agent the inbox/outbox paths. - pipeline(wrapper): collect outbox files on the final stream chunk and append them as attachment components to the response chain. - web(debug-dialog): render File components with a download link when base64/url is present; add base64/path fields to the File entity. - tests: cover inbound/outbound, large-file transfer without truncation, and stale-dir clearing (86 passing). * feat(box): support voice/file attachment round-trip end-to-end Extends the bidirectional attachment transfer to audio and arbitrary files through the real webchat UI, and fixes the model-payload errors that non-image attachments triggered. - platform(websocket_adapter): resolve Voice/File component storage keys to base64 (previously only Image), so audio/documents reach the sandbox inbox. - web(debug-dialog): accept audio/* and any file in the uploader (was image-only), classify by mimetype, upload Voice/File via the documents endpoint, and render non-image staged attachments as a chip. - provider(litellmchat): drop non-image file parts (file_base64 / file_url) when building the OpenAI/LiteLLM payload. These come from Voice/File attachments — including ones replayed from conversation history — and the agent reads their bytes from the sandbox, not the model. Without this the provider rejects the request: 'invalid content type=file_base64'. - provider(localagent): also strip those parts from the current user message alongside the sandbox-path note (model-facing clarity; the requester is the real safety net for history). - tests: cover the requester strip/keep behavior (file dropped, image kept and reshaped to image_url, mixed history, plain-string content). * test(box): cover inbound/outbound attachment helpers; fix ruff format - ruff format localagent.py (CI ruff format --check was failing) - add unit tests for ResponseWrapper outbound-attachment helpers (wrapper.py 78%->98%) - add unit tests for LocalAgentRunner._inject_inbound_attachments - add unit tests for WebSocketAdapter._process_image_components (0%->covered) Lifts PR patch coverage from 68.97% to ~88% (>75% target).
This commit is contained in:
@@ -312,12 +312,18 @@ class WebSocketAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter)
|
||||
|
||||
async def _process_image_components(self, message_chain_obj: list):
|
||||
"""
|
||||
处理消息链中的图片和文件组件,将path转换为base64
|
||||
处理消息链中的图片、语音和文件组件,将 path 转换为 base64
|
||||
|
||||
Image / Voice / File components uploaded from the web client carry a
|
||||
storage key in ``path``. Resolve it to a base64 data URI so downstream
|
||||
stages (multimodal LLM input and the Box sandbox inbox) have a usable
|
||||
payload, then drop the now-consumed storage object.
|
||||
|
||||
Args:
|
||||
message_chain_obj: 消息链对象列表
|
||||
"""
|
||||
import base64
|
||||
import mimetypes
|
||||
|
||||
storage_mgr = self.ap.storage_mgr
|
||||
|
||||
@@ -325,31 +331,33 @@ class WebSocketAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter)
|
||||
comp_type = component.get('type', '')
|
||||
comp_path = component.get('path', '')
|
||||
|
||||
if not comp_path:
|
||||
if not comp_path or comp_type not in ('Image', 'Voice', 'File'):
|
||||
continue
|
||||
|
||||
if comp_type == 'Image':
|
||||
try:
|
||||
file_content = await storage_mgr.storage_provider.load(comp_path)
|
||||
base64_str = base64.b64encode(file_content).decode('utf-8')
|
||||
try:
|
||||
file_content = await storage_mgr.storage_provider.load(comp_path)
|
||||
base64_str = base64.b64encode(file_content).decode('utf-8')
|
||||
|
||||
file_key = comp_path
|
||||
if file_key.lower().endswith(('.jpg', '.jpeg')):
|
||||
lowered = comp_path.lower()
|
||||
if comp_type == 'Image':
|
||||
if lowered.endswith(('.jpg', '.jpeg')):
|
||||
mime_type = 'image/jpeg'
|
||||
elif file_key.lower().endswith('.png'):
|
||||
mime_type = 'image/png'
|
||||
elif file_key.lower().endswith('.gif'):
|
||||
elif lowered.endswith('.gif'):
|
||||
mime_type = 'image/gif'
|
||||
elif file_key.lower().endswith('.webp'):
|
||||
elif lowered.endswith('.webp'):
|
||||
mime_type = 'image/webp'
|
||||
else:
|
||||
mime_type = 'image/png'
|
||||
elif comp_type == 'Voice':
|
||||
mime_type = mimetypes.guess_type(comp_path)[0] or 'audio/wav'
|
||||
else: # File
|
||||
mime_type = mimetypes.guess_type(comp_path)[0] or 'application/octet-stream'
|
||||
|
||||
component['base64'] = f'data:{mime_type};base64,{base64_str}'
|
||||
await storage_mgr.storage_provider.delete(comp_path)
|
||||
component['path'] = ''
|
||||
except Exception as e:
|
||||
await self.logger.error(f'Failed to load image file {comp_path}: {e}')
|
||||
component['base64'] = f'data:{mime_type};base64,{base64_str}'
|
||||
await storage_mgr.storage_provider.delete(comp_path)
|
||||
component['path'] = ''
|
||||
except Exception as e:
|
||||
await self.logger.error(f'Failed to load {comp_type} file {comp_path}: {e}')
|
||||
|
||||
async def handle_websocket_message(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user