From ed02ac4710993e916fc8e118e0019507e9c586c5 Mon Sep 17 00:00:00 2001 From: huanghuoguoguo <1051233107@qq.com> Date: Sat, 16 May 2026 11:28:34 +0800 Subject: [PATCH] fix(utils): classify runner URLs safely (#2191) * fix(utils): classify runner URLs safely * fix(utils): keep runner parse failures unknown --- src/langbot/pkg/utils/runner.py | 45 ++++++++++++++++++++++++-- tests/unit_tests/utils/test_runner.py | 46 +++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 tests/unit_tests/utils/test_runner.py diff --git a/src/langbot/pkg/utils/runner.py b/src/langbot/pkg/utils/runner.py index 43aecc06..16d42f20 100644 --- a/src/langbot/pkg/utils/runner.py +++ b/src/langbot/pkg/utils/runner.py @@ -1,5 +1,7 @@ from __future__ import annotations +import ipaddress +import re from urllib.parse import urlparse @@ -44,6 +46,40 @@ LOCAL_PATTERNS = [ '172.31.', ] +HOST_LABEL_PATTERN = re.compile(r'^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?$') + + +def _is_valid_hostname(host: str) -> bool: + if host == 'localhost': + return True + + try: + ipaddress.ip_address(host) + return True + except ValueError: + pass + + if not host or len(host) > 253 or any(char.isspace() for char in host): + return False + + host = host.rstrip('.') + if not host: + return False + + return all(HOST_LABEL_PATTERN.match(label) for label in host.split('.')) + + +def _is_local_host(host: str) -> bool: + if host == 'localhost': + return True + + try: + ip_address = ipaddress.ip_address(host) + except ValueError: + return False + + return ip_address.is_private or ip_address.is_loopback or ip_address.is_unspecified + def get_runner_category(runner_name: str, runner_url: str) -> str: if not runner_url: @@ -52,12 +88,15 @@ def get_runner_category(runner_name: str, runner_url: str) -> str: try: parsed_url = urlparse(runner_url) host = parsed_url.hostname.lower() if parsed_url.hostname else '' + _ = parsed_url.port except Exception: return RunnerCategory.UNKNOWN - for pattern in LOCAL_PATTERNS: - if host.startswith(pattern): - return RunnerCategory.LOCAL + if not parsed_url.scheme or not host or not _is_valid_hostname(host): + return RunnerCategory.UNKNOWN + + if _is_local_host(host): + return RunnerCategory.LOCAL for domain in CLOUD_DOMAINS: if host.endswith(domain): diff --git a/tests/unit_tests/utils/test_runner.py b/tests/unit_tests/utils/test_runner.py new file mode 100644 index 00000000..5c7a9dda --- /dev/null +++ b/tests/unit_tests/utils/test_runner.py @@ -0,0 +1,46 @@ +import pytest + +from langbot.pkg.utils.runner import RunnerCategory, get_runner_category + + +@pytest.mark.parametrize( + 'runner_url', + [ + 'api.dify.ai/v1', + 'localhost:7860', + 'https:///v1', + 'https://', + 'https://exa mple.com', + 'http://[::1', + 'http://localhost:bad', + ], +) +def test_get_runner_category_returns_unknown_for_invalid_urls(runner_url): + assert get_runner_category('dify-service-api', runner_url) == RunnerCategory.UNKNOWN + + +@pytest.mark.parametrize( + 'runner_url', + [ + 'http://localhost:7860', + 'http://127.0.0.1:7860', + 'http://10.0.0.1:7860', + 'http://172.16.0.1:7860', + 'http://172.31.255.255:7860', + 'http://192.168.1.20:7860', + 'http://[::1]:7860', + ], +) +def test_get_runner_category_detects_local_hosts_with_ipaddress(runner_url): + assert get_runner_category('langflow-api', runner_url) == RunnerCategory.LOCAL + + +@pytest.mark.parametrize( + 'runner_url', + [ + 'http://10.evil.com', + 'http://192.168.example.com', + ], +) +def test_get_runner_category_does_not_treat_private_ip_prefix_domains_as_local(runner_url): + assert get_runner_category('langflow-api', runner_url) == RunnerCategory.CLOUD