mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-18 11:44:18 +00:00
75e5af26d0
Extends the bidirectional attachment transfer to audio and arbitrary files through the real webchat UI, and fixes the model-payload errors that non-image attachments triggered. - platform(websocket_adapter): resolve Voice/File component storage keys to base64 (previously only Image), so audio/documents reach the sandbox inbox. - web(debug-dialog): accept audio/* and any file in the uploader (was image-only), classify by mimetype, upload Voice/File via the documents endpoint, and render non-image staged attachments as a chip. - provider(litellmchat): drop non-image file parts (file_base64 / file_url) when building the OpenAI/LiteLLM payload. These come from Voice/File attachments — including ones replayed from conversation history — and the agent reads their bytes from the sandbox, not the model. Without this the provider rejects the request: 'invalid content type=file_base64'. - provider(localagent): also strip those parts from the current user message alongside the sandbox-path note (model-facing clarity; the requester is the real safety net for history). - tests: cover the requester strip/keep behavior (file dropped, image kept and reshaped to image_url, mixed history, plain-string content).
94 lines
3.5 KiB
Python
94 lines
3.5 KiB
Python
"""Unit tests for LiteLLMRequester._convert_messages.
|
|
|
|
Focus: the content-part normalization that (a) converts image_base64 parts to
|
|
the OpenAI image_url shape and (b) drops non-image file parts (file_base64 /
|
|
file_url) which OpenAI-compatible chat models reject. The latter is essential
|
|
for Voice/File attachments — including ones replayed from conversation history —
|
|
since the agent consumes their bytes via the sandbox, not the model payload.
|
|
"""
|
|
|
|
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
|
|
|
from langbot.pkg.provider.modelmgr.requesters.litellmchat import LiteLLMRequester
|
|
|
|
|
|
def _make_requester() -> LiteLLMRequester:
|
|
# _convert_messages does not touch instance config, so bypass __init__.
|
|
return LiteLLMRequester.__new__(LiteLLMRequester)
|
|
|
|
|
|
def test_convert_messages_drops_file_base64_part():
|
|
req = _make_requester()
|
|
msg = provider_message.Message(
|
|
role='user',
|
|
content=[
|
|
provider_message.ContentElement.from_text('analyze this audio'),
|
|
provider_message.ContentElement.from_file_base64('data:audio/wav;base64,AAAA', 'voice.wav'),
|
|
],
|
|
)
|
|
out = req._convert_messages([msg])
|
|
parts = out[0]['content']
|
|
types = [p.get('type') for p in parts]
|
|
assert 'file_base64' not in types
|
|
assert types == ['text']
|
|
assert parts[0]['text'] == 'analyze this audio'
|
|
|
|
|
|
def test_convert_messages_drops_file_url_part():
|
|
req = _make_requester()
|
|
msg = provider_message.Message(
|
|
role='user',
|
|
content=[
|
|
provider_message.ContentElement.from_text('here is a doc'),
|
|
provider_message.ContentElement.from_file_url('http://example.com/report.xlsx', 'report.xlsx'),
|
|
],
|
|
)
|
|
out = req._convert_messages([msg])
|
|
types = [p.get('type') for p in out[0]['content']]
|
|
assert types == ['text']
|
|
|
|
|
|
def test_convert_messages_keeps_image_and_converts_to_image_url():
|
|
req = _make_requester()
|
|
msg = provider_message.Message(
|
|
role='user',
|
|
content=[
|
|
provider_message.ContentElement.from_text('look'),
|
|
provider_message.ContentElement.from_image_base64('data:image/png;base64,AAAA'),
|
|
],
|
|
)
|
|
out = req._convert_messages([msg])
|
|
parts = out[0]['content']
|
|
types = [p.get('type') for p in parts]
|
|
# image is preserved and reshaped to the OpenAI image_url form
|
|
assert types == ['text', 'image_url']
|
|
img_part = parts[1]
|
|
assert img_part['image_url'] == {'url': 'data:image/png;base64,AAAA'}
|
|
assert 'image_base64' not in img_part
|
|
|
|
|
|
def test_convert_messages_mixed_history_strips_only_files():
|
|
req = _make_requester()
|
|
# Simulate replayed history: an old voice turn + a current text turn.
|
|
history_voice = provider_message.Message(
|
|
role='user',
|
|
content=[
|
|
provider_message.ContentElement.from_text('old audio turn'),
|
|
provider_message.ContentElement.from_file_base64('data:audio/wav;base64,BBBB', 'voice.wav'),
|
|
],
|
|
)
|
|
current = provider_message.Message(
|
|
role='user',
|
|
content=[provider_message.ContentElement.from_text('now do the csv')],
|
|
)
|
|
out = req._convert_messages([history_voice, current])
|
|
assert [p.get('type') for p in out[0]['content']] == ['text']
|
|
assert [p.get('type') for p in out[1]['content']] == ['text']
|
|
|
|
|
|
def test_convert_messages_plain_string_content_untouched():
|
|
req = _make_requester()
|
|
msg = provider_message.Message(role='user', content='just text')
|
|
out = req._convert_messages([msg])
|
|
assert out[0]['content'] == 'just text'
|