From d43cbf0243f3ef2eec8154ab7bbea0f4f78d7b51 Mon Sep 17 00:00:00 2001 From: huanghuoguoguo <1051233107@qq.com> Date: Sat, 16 May 2026 10:53:24 +0800 Subject: [PATCH] fix(utils): classify runner URLs safely --- src/langbot/pkg/utils/runner.py | 69 ++++++++++++++++----------- tests/unit_tests/utils/__init__.py | 1 + tests/unit_tests/utils/test_runner.py | 46 ++++++++++++++++++ 3 files changed, 89 insertions(+), 27 deletions(-) create mode 100644 tests/unit_tests/utils/__init__.py create mode 100644 tests/unit_tests/utils/test_runner.py diff --git a/src/langbot/pkg/utils/runner.py b/src/langbot/pkg/utils/runner.py index 43aecc06..bcad66f0 100644 --- a/src/langbot/pkg/utils/runner.py +++ b/src/langbot/pkg/utils/runner.py @@ -1,5 +1,7 @@ from __future__ import annotations +import ipaddress +import re from urllib.parse import urlparse @@ -20,29 +22,39 @@ CLOUD_DOMAINS = [ '.langflow.org', ] -LOCAL_PATTERNS = [ - 'localhost', - '127.0.0.1', - '0.0.0.0', - '192.168.', - '10.', - '172.16.', - '172.17.', - '172.18.', - '172.19.', - '172.20.', - '172.21.', - '172.22.', - '172.23.', - '172.24.', - '172.25.', - '172.26.', - '172.27.', - '172.28.', - '172.29.', - '172.30.', - '172.31.', -] +HOST_LABEL_PATTERN = re.compile(r'^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?$') + + +def _is_valid_hostname(host: str) -> bool: + if host == 'localhost': + return True + + try: + ipaddress.ip_address(host) + return True + except ValueError: + pass + + if not host or len(host) > 253 or any(char.isspace() for char in host): + return False + + host = host.rstrip('.') + if not host: + return False + + return all(HOST_LABEL_PATTERN.match(label) for label in host.split('.')) + + +def _is_local_host(host: str) -> bool: + if host == 'localhost': + return True + + try: + ip_address = ipaddress.ip_address(host) + except ValueError: + return False + + return ip_address.is_private or ip_address.is_loopback or ip_address.is_unspecified def get_runner_category(runner_name: str, runner_url: str) -> str: @@ -52,12 +64,15 @@ def get_runner_category(runner_name: str, runner_url: str) -> str: try: parsed_url = urlparse(runner_url) host = parsed_url.hostname.lower() if parsed_url.hostname else '' - except Exception: + _ = parsed_url.port + except (TypeError, ValueError): return RunnerCategory.UNKNOWN - for pattern in LOCAL_PATTERNS: - if host.startswith(pattern): - return RunnerCategory.LOCAL + if not parsed_url.scheme or not host or not _is_valid_hostname(host): + return RunnerCategory.UNKNOWN + + if _is_local_host(host): + return RunnerCategory.LOCAL for domain in CLOUD_DOMAINS: if host.endswith(domain): diff --git a/tests/unit_tests/utils/__init__.py b/tests/unit_tests/utils/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/tests/unit_tests/utils/__init__.py @@ -0,0 +1 @@ + diff --git a/tests/unit_tests/utils/test_runner.py b/tests/unit_tests/utils/test_runner.py new file mode 100644 index 00000000..5c7a9dda --- /dev/null +++ b/tests/unit_tests/utils/test_runner.py @@ -0,0 +1,46 @@ +import pytest + +from langbot.pkg.utils.runner import RunnerCategory, get_runner_category + + +@pytest.mark.parametrize( + 'runner_url', + [ + 'api.dify.ai/v1', + 'localhost:7860', + 'https:///v1', + 'https://', + 'https://exa mple.com', + 'http://[::1', + 'http://localhost:bad', + ], +) +def test_get_runner_category_returns_unknown_for_invalid_urls(runner_url): + assert get_runner_category('dify-service-api', runner_url) == RunnerCategory.UNKNOWN + + +@pytest.mark.parametrize( + 'runner_url', + [ + 'http://localhost:7860', + 'http://127.0.0.1:7860', + 'http://10.0.0.1:7860', + 'http://172.16.0.1:7860', + 'http://172.31.255.255:7860', + 'http://192.168.1.20:7860', + 'http://[::1]:7860', + ], +) +def test_get_runner_category_detects_local_hosts_with_ipaddress(runner_url): + assert get_runner_category('langflow-api', runner_url) == RunnerCategory.LOCAL + + +@pytest.mark.parametrize( + 'runner_url', + [ + 'http://10.evil.com', + 'http://192.168.example.com', + ], +) +def test_get_runner_category_does_not_treat_private_ip_prefix_domains_as_local(runner_url): + assert get_runner_category('langflow-api', runner_url) == RunnerCategory.CLOUD