feat(xray): add tunnel health monitor (#5480)

* feat(xray): add tunnel health monitor

* fix(tunnelmonitor): reuse netproxy client and init logger in tests

Replace the duplicated newHTTPClient/dialContextWithProxy with netproxy.NewHTTPClient, which centralises the http/https/socks5 handling and avoids the dial-goroutine connection leak on context cancellation. Cap failures at the threshold during cooldown so the counter stays a true consecutive-failure count. Add TestMain to initialise the logger and fix the nil-pointer panic in the success-after-failure path.

* fix(tunnelmonitor): observable recovery, signal headroom, and hardening

Address the remaining review findings on the tunnel health monitor:

- Recovery is now synchronous and observable: the callback calls
  server.RestartXray() directly and returns its error instead of just
  enqueuing SIGUSR1, so a failed restart no longer masks as success and
  arms the cooldown while the tunnel is still down.
- Give the OS signal channel headroom (buffer 8) so producers cannot
  starve a SIGTERM/SIGINT out of the single slot.
- Warn at startup when the monitor is enabled without a proxy, since the
  probe then measures host connectivity rather than the xray tunnel.
- Cap failures at the threshold in the nil-recover branch too, matching
  the cooldown cap.
- Document the XUI_TUNNEL_HEALTH_* vars in .env.example and the README.
- Add tests for status-code classification, Normalize bounds, New proxy
  scheme errors, the recovery-error and nil-recover paths, the cooldown
  cap, and Run context cancellation (coverage 90%).

---------

Co-authored-by: Sanaei <ho3ein.sanaei@gmail.com>
This commit is contained in:
Rick Sanchez
2026-06-24 23:31:37 +03:30
committed by GitHub
parent 3ba43bd86d
commit fe025e8af3
5 changed files with 773 additions and 1 deletions
+13
View File
@@ -4,3 +4,16 @@ XUI_LOG_FOLDER=x-ui
XUI_BIN_FOLDER=x-ui
XUI_INIT_WEB_BASE_PATH=/
# XUI_PORT=8080
# Optional tunnel health monitor (disabled by default). It periodically probes a
# URL and restarts xray-core after repeated failures. Point XUI_TUNNEL_HEALTH_PROXY
# at a local xray inbound so the probe tests the tunnel; without it the probe only
# checks host connectivity and a restart will not fix host network issues. A restart
# drops every connected client.
# XUI_TUNNEL_HEALTH_MONITOR=true
# XUI_TUNNEL_HEALTH_PROXY=socks5://127.0.0.1:1080
# XUI_TUNNEL_HEALTH_URL=https://www.cloudflare.com/cdn-cgi/trace
# XUI_TUNNEL_HEALTH_INTERVAL=30s
# XUI_TUNNEL_HEALTH_TIMEOUT=10s
# XUI_TUNNEL_HEALTH_FAILURES=3
# XUI_TUNNEL_HEALTH_COOLDOWN=5m
+7
View File
@@ -146,6 +146,13 @@ docker run -d --cap-add=NET_ADMIN --cap-add=NET_RAW ... ghcr.io/mhsanaei/3x-ui
| `XUI_ENABLE_FAIL2BAN` | Enable Fail2ban-based IP-limit enforcement | `true` |
| `XUI_LOG_LEVEL` | Log verbosity (`debug`, `info`, `warning`, `error`) | `info` |
| `XUI_DEBUG` | Enable debug mode | `false` |
| `XUI_TUNNEL_HEALTH_MONITOR` | Enable the tunnel health monitor (probes a URL and restarts xray after repeated failures; a restart drops all clients) | `false` |
| `XUI_TUNNEL_HEALTH_PROXY` | Proxy the probe is sent through; point it at a local xray inbound so the probe tests the tunnel (e.g. `socks5://127.0.0.1:1080`). Empty means the probe only checks host connectivity | — |
| `XUI_TUNNEL_HEALTH_URL` | URL probed for tunnel health | `https://www.cloudflare.com/cdn-cgi/trace` |
| `XUI_TUNNEL_HEALTH_INTERVAL` | Interval between probes | `30s` |
| `XUI_TUNNEL_HEALTH_TIMEOUT` | Per-probe timeout | `10s` |
| `XUI_TUNNEL_HEALTH_FAILURES` | Consecutive failures before a restart is triggered | `3` |
| `XUI_TUNNEL_HEALTH_COOLDOWN` | Minimum delay between consecutive restarts | `5m` |
## Supported Languages
+271
View File
@@ -0,0 +1,271 @@
package tunnelmonitor
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"os"
"strconv"
"strings"
"time"
"github.com/mhsanaei/3x-ui/v3/internal/logger"
"github.com/mhsanaei/3x-ui/v3/internal/util/netproxy"
)
const (
defaultHealthURL = "https://www.cloudflare.com/cdn-cgi/trace"
defaultInterval = 30 * time.Second
defaultTimeout = 10 * time.Second
defaultFailureThreshold = 3
defaultCooldown = 5 * time.Minute
)
// Config controls the optional tunnel health monitor.
type Config struct {
Enabled bool
URL string
ProxyURL string
Interval time.Duration
Timeout time.Duration
FailureThreshold int
Cooldown time.Duration
}
// RecoveryFunc performs recovery after the monitor reaches the configured
// failure threshold. The panel wires this to an Xray core restart.
type RecoveryFunc func(context.Context) error
// Monitor periodically probes a URL and triggers recovery after repeated
// failures. It is intentionally independent from panel settings/UI so it can be
// enabled safely through service environment variables first.
type Monitor struct {
cfg Config
client *http.Client
recover RecoveryFunc
failures int
lastRecovery time.Time
now func() time.Time
}
// DefaultConfig returns disabled-by-default monitor settings.
func DefaultConfig() Config {
return Config{
Enabled: false,
URL: defaultHealthURL,
Interval: defaultInterval,
Timeout: defaultTimeout,
FailureThreshold: defaultFailureThreshold,
Cooldown: defaultCooldown,
}
}
// ConfigFromEnv builds Config from XUI_TUNNEL_HEALTH_* environment variables.
//
// Supported variables:
// - XUI_TUNNEL_HEALTH_MONITOR=true
// - XUI_TUNNEL_HEALTH_URL=https://www.cloudflare.com/cdn-cgi/trace
// - XUI_TUNNEL_HEALTH_PROXY=socks5://127.0.0.1:1080
// - XUI_TUNNEL_HEALTH_INTERVAL=30s
// - XUI_TUNNEL_HEALTH_TIMEOUT=10s
// - XUI_TUNNEL_HEALTH_FAILURES=3
// - XUI_TUNNEL_HEALTH_COOLDOWN=5m
func ConfigFromEnv() Config {
cfg := DefaultConfig()
cfg.Enabled = parseBool(os.Getenv("XUI_TUNNEL_HEALTH_MONITOR"))
cfg.URL = firstNonEmpty(os.Getenv("XUI_TUNNEL_HEALTH_URL"), cfg.URL)
cfg.ProxyURL = strings.TrimSpace(os.Getenv("XUI_TUNNEL_HEALTH_PROXY"))
cfg.Interval = parseDurationEnv("XUI_TUNNEL_HEALTH_INTERVAL", cfg.Interval)
cfg.Timeout = parseDurationEnv("XUI_TUNNEL_HEALTH_TIMEOUT", cfg.Timeout)
cfg.Cooldown = parseDurationEnv("XUI_TUNNEL_HEALTH_COOLDOWN", cfg.Cooldown)
cfg.FailureThreshold = parseIntEnv("XUI_TUNNEL_HEALTH_FAILURES", cfg.FailureThreshold)
return cfg.Normalize()
}
// Normalize applies safe bounds and defaults.
func (c Config) Normalize() Config {
if strings.TrimSpace(c.URL) == "" {
c.URL = defaultHealthURL
}
c.URL = strings.TrimSpace(c.URL)
c.ProxyURL = strings.TrimSpace(c.ProxyURL)
if c.Interval < time.Second {
c.Interval = defaultInterval
}
if c.Timeout < time.Second {
c.Timeout = defaultTimeout
}
if c.FailureThreshold < 1 {
c.FailureThreshold = defaultFailureThreshold
}
if c.Cooldown < time.Second {
c.Cooldown = defaultCooldown
}
return c
}
// New creates a monitor with an HTTP client based on cfg.
func New(cfg Config, recover RecoveryFunc) (*Monitor, error) {
cfg = cfg.Normalize()
client, err := netproxy.NewHTTPClient(cfg.ProxyURL, cfg.Timeout)
if err != nil {
return nil, err
}
return newWithClient(cfg, client, recover), nil
}
func newWithClient(cfg Config, client *http.Client, recover RecoveryFunc) *Monitor {
cfg = cfg.Normalize()
if client == nil {
client = &http.Client{Timeout: cfg.Timeout}
}
return &Monitor{
cfg: cfg,
client: client,
recover: recover,
now: time.Now,
}
}
// Run starts the monitor loop until ctx is cancelled.
func (m *Monitor) Run(ctx context.Context) {
if m == nil || !m.cfg.Enabled {
return
}
logger.Info("Tunnel health monitor enabled: ", m.cfg.URL)
ticker := time.NewTicker(m.cfg.Interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
logger.Info("Tunnel health monitor stopped")
return
case <-ticker.C:
recovered, err := m.Step(ctx)
if err != nil {
logger.Warning("Tunnel health monitor check failed: ", err)
}
if recovered {
logger.Warning("Tunnel health monitor triggered Xray restart")
}
}
}
}
// Step performs one probe and maybe triggers recovery.
func (m *Monitor) Step(ctx context.Context) (bool, error) {
if m == nil {
return false, errors.New("nil monitor")
}
if err := m.probe(ctx); err != nil {
m.failures++
if m.failures < m.cfg.FailureThreshold {
return false, fmt.Errorf("probe failed %d/%d: %w", m.failures, m.cfg.FailureThreshold, err)
}
now := m.now()
if !m.lastRecovery.IsZero() && now.Sub(m.lastRecovery) < m.cfg.Cooldown {
m.failures = m.cfg.FailureThreshold
return false, fmt.Errorf("probe failed %d/%d; recovery cooldown active: %w", m.failures, m.cfg.FailureThreshold, err)
}
if m.recover == nil {
m.failures = m.cfg.FailureThreshold
return false, errors.New("recovery function is not configured")
}
if recErr := m.recover(ctx); recErr != nil {
return false, fmt.Errorf("recovery failed after probe error %v: %w", err, recErr)
}
m.lastRecovery = now
m.failures = 0
return true, err
}
if m.failures > 0 {
logger.Info("Tunnel health monitor recovered after successful probe")
}
m.failures = 0
return false, nil
}
func (m *Monitor) probe(ctx context.Context) error {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, m.cfg.URL, nil)
if err != nil {
return err
}
resp, err := m.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
_, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusBadRequest {
return fmt.Errorf("unexpected HTTP status %d", resp.StatusCode)
}
return nil
}
func parseBool(value string) bool {
switch strings.ToLower(strings.TrimSpace(value)) {
case "1", "true", "yes", "y", "on", "enable", "enabled":
return true
default:
return false
}
}
func parseDurationEnv(name string, fallback time.Duration) time.Duration {
value := strings.TrimSpace(os.Getenv(name))
if value == "" {
return fallback
}
d, err := time.ParseDuration(value)
if err != nil {
return fallback
}
return d
}
func parseIntEnv(name string, fallback int) int {
value := strings.TrimSpace(os.Getenv(name))
if value == "" {
return fallback
}
n, err := strconv.Atoi(value)
if err != nil {
return fallback
}
return n
}
func firstNonEmpty(value string, fallback string) string {
value = strings.TrimSpace(value)
if value == "" {
return fallback
}
return value
}
+454
View File
@@ -0,0 +1,454 @@
package tunnelmonitor
import (
"context"
"errors"
"net/http"
"strings"
"sync"
"testing"
"time"
"github.com/mhsanaei/3x-ui/v3/internal/logger"
"github.com/op/go-logging"
)
func TestMain(m *testing.M) {
logger.InitLogger(logging.ERROR)
m.Run()
}
type roundTripFunc func(*http.Request) (*http.Response, error)
func (f roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
return f(req)
}
func TestMonitorRestartsAfterFailureThreshold(t *testing.T) {
cfg := Config{
Enabled: true,
URL: "http://example.test",
Interval: time.Minute,
Timeout: time.Second,
FailureThreshold: 2,
Cooldown: time.Minute,
}
client := &http.Client{
Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
return nil, errors.New("tunnel down")
}),
}
restarts := 0
monitor := newWithClient(cfg, client, func(ctx context.Context) error {
restarts++
return nil
})
monitor.now = func() time.Time {
return time.Unix(100, 0)
}
if recovered, _ := monitor.Step(context.Background()); recovered {
t.Fatal("first failure must not trigger recovery")
}
if recovered, _ := monitor.Step(context.Background()); !recovered {
t.Fatal("second consecutive failure should trigger recovery")
}
if restarts != 1 {
t.Fatalf("expected 1 recovery, got %d", restarts)
}
}
func TestMonitorRespectsRecoveryCooldown(t *testing.T) {
cfg := Config{
Enabled: true,
URL: "http://example.test",
Interval: time.Minute,
Timeout: time.Second,
FailureThreshold: 1,
Cooldown: time.Minute,
}
client := &http.Client{
Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
return nil, errors.New("tunnel down")
}),
}
now := time.Unix(100, 0)
restarts := 0
monitor := newWithClient(cfg, client, func(ctx context.Context) error {
restarts++
return nil
})
monitor.now = func() time.Time {
return now
}
recovered, _ := monitor.Step(context.Background())
if !recovered {
t.Fatal("first failure should trigger recovery when threshold is 1")
}
recovered, _ = monitor.Step(context.Background())
if recovered {
t.Fatal("cooldown should suppress immediate second recovery")
}
if restarts != 1 {
t.Fatalf("expected 1 recovery during cooldown, got %d", restarts)
}
now = now.Add(time.Minute + time.Second)
recovered, _ = monitor.Step(context.Background())
if !recovered {
t.Fatal("recovery should be allowed after cooldown")
}
if restarts != 2 {
t.Fatalf("expected 2 recoveries after cooldown, got %d", restarts)
}
}
func TestMonitorSuccessResetsFailures(t *testing.T) {
cfg := Config{
Enabled: true,
URL: "http://example.test",
Interval: time.Minute,
Timeout: time.Second,
FailureThreshold: 2,
Cooldown: time.Minute,
}
fail := true
client := &http.Client{
Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
if fail {
return nil, errors.New("tunnel down")
}
return &http.Response{
StatusCode: http.StatusOK,
Body: http.NoBody,
}, nil
}),
}
restarts := 0
monitor := newWithClient(cfg, client, func(ctx context.Context) error {
restarts++
return nil
})
_, _ = monitor.Step(context.Background())
fail = false
if recovered, err := monitor.Step(context.Background()); recovered || err != nil {
t.Fatalf("successful probe should not recover or fail, recovered=%v err=%v", recovered, err)
}
fail = true
if recovered, _ := monitor.Step(context.Background()); recovered {
t.Fatal("failure after success should be counted as first failure again")
}
if restarts != 0 {
t.Fatalf("expected no recovery, got %d", restarts)
}
}
func TestConfigFromEnvParsesValues(t *testing.T) {
t.Setenv("XUI_TUNNEL_HEALTH_MONITOR", "true")
t.Setenv("XUI_TUNNEL_HEALTH_URL", "https://example.com/health")
t.Setenv("XUI_TUNNEL_HEALTH_PROXY", "socks5://127.0.0.1:1080")
t.Setenv("XUI_TUNNEL_HEALTH_INTERVAL", "15s")
t.Setenv("XUI_TUNNEL_HEALTH_TIMEOUT", "3s")
t.Setenv("XUI_TUNNEL_HEALTH_FAILURES", "4")
t.Setenv("XUI_TUNNEL_HEALTH_COOLDOWN", "2m")
cfg := ConfigFromEnv()
if !cfg.Enabled {
t.Fatal("expected monitor to be enabled")
}
if cfg.URL != "https://example.com/health" {
t.Fatalf("unexpected URL: %s", cfg.URL)
}
if !strings.HasPrefix(cfg.ProxyURL, "socks5://") {
t.Fatalf("unexpected proxy URL: %s", cfg.ProxyURL)
}
if cfg.Interval != 15*time.Second {
t.Fatalf("unexpected interval: %s", cfg.Interval)
}
if cfg.Timeout != 3*time.Second {
t.Fatalf("unexpected timeout: %s", cfg.Timeout)
}
if cfg.FailureThreshold != 4 {
t.Fatalf("unexpected threshold: %d", cfg.FailureThreshold)
}
if cfg.Cooldown != 2*time.Minute {
t.Fatalf("unexpected cooldown: %s", cfg.Cooldown)
}
}
func failingClient() *http.Client {
return &http.Client{
Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
return nil, errors.New("tunnel down")
}),
}
}
func statusClient(code int) *http.Client {
return &http.Client{
Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
return &http.Response{StatusCode: code, Body: http.NoBody}, nil
}),
}
}
func TestProbeStatusCodeClassification(t *testing.T) {
cases := []struct {
status int
healthy bool
}{
{199, false},
{200, true},
{204, true},
{301, true},
{399, true},
{400, false},
{404, false},
{500, false},
}
for _, tc := range cases {
cfg := Config{
Enabled: true,
URL: "http://example.test",
Interval: time.Minute,
Timeout: time.Second,
FailureThreshold: 100,
Cooldown: time.Minute,
}
monitor := newWithClient(cfg, statusClient(tc.status), func(ctx context.Context) error {
return nil
})
recovered, err := monitor.Step(context.Background())
if recovered {
t.Fatalf("status %d: unexpected recovery", tc.status)
}
if tc.healthy && err != nil {
t.Fatalf("status %d: expected healthy probe, got error %v", tc.status, err)
}
if !tc.healthy && err == nil {
t.Fatalf("status %d: expected failure, got nil error", tc.status)
}
}
}
func TestNormalizeClampsBounds(t *testing.T) {
got := Config{
URL: " ",
Interval: 0,
Timeout: 500 * time.Millisecond,
FailureThreshold: 0,
Cooldown: 0,
}.Normalize()
if got.URL != defaultHealthURL {
t.Fatalf("URL not defaulted: %q", got.URL)
}
if got.Interval != defaultInterval {
t.Fatalf("Interval not clamped: %s", got.Interval)
}
if got.Timeout != defaultTimeout {
t.Fatalf("Timeout not clamped: %s", got.Timeout)
}
if got.FailureThreshold != defaultFailureThreshold {
t.Fatalf("FailureThreshold not clamped: %d", got.FailureThreshold)
}
if got.Cooldown != defaultCooldown {
t.Fatalf("Cooldown not clamped: %s", got.Cooldown)
}
valid := Config{
URL: "https://example.com/health",
Interval: 15 * time.Second,
Timeout: 3 * time.Second,
FailureThreshold: 5,
Cooldown: 2 * time.Minute,
}.Normalize()
if valid.URL != "https://example.com/health" ||
valid.Interval != 15*time.Second ||
valid.Timeout != 3*time.Second ||
valid.FailureThreshold != 5 ||
valid.Cooldown != 2*time.Minute {
t.Fatalf("valid config was mutated: %+v", valid)
}
}
func TestNewRejectsUnsupportedProxyScheme(t *testing.T) {
m, err := New(Config{ProxyURL: "ftp://127.0.0.1:21"}, func(ctx context.Context) error {
return nil
})
if err == nil || m != nil {
t.Fatalf("expected error and nil monitor for bad scheme, got m=%v err=%v", m, err)
}
m, err = New(Config{}, func(ctx context.Context) error {
return nil
})
if err != nil || m == nil {
t.Fatalf("expected a valid monitor for empty proxy, got m=%v err=%v", m, err)
}
}
func TestMonitorRecoveryErrorDoesNotArmCooldown(t *testing.T) {
cfg := Config{
Enabled: true,
URL: "http://example.test",
Interval: time.Minute,
Timeout: time.Second,
FailureThreshold: 1,
Cooldown: time.Minute,
}
attempts := 0
monitor := newWithClient(cfg, failingClient(), func(ctx context.Context) error {
attempts++
return errors.New("restart failed")
})
monitor.now = func() time.Time {
return time.Unix(100, 0)
}
recovered, err := monitor.Step(context.Background())
if recovered || err == nil {
t.Fatalf("failed recovery must report recovered=false with an error, got recovered=%v err=%v", recovered, err)
}
if !monitor.lastRecovery.IsZero() {
t.Fatal("a failed recovery must not arm the cooldown")
}
if _, err := monitor.Step(context.Background()); err == nil {
t.Fatal("expected error on the second failing step")
}
if attempts != 2 {
t.Fatalf("recovery should be retried (no cooldown) after a failure, attempts=%d", attempts)
}
}
func TestMonitorNilRecoverStaysBounded(t *testing.T) {
cfg := Config{
Enabled: true,
URL: "http://example.test",
Interval: time.Minute,
Timeout: time.Second,
FailureThreshold: 2,
Cooldown: time.Minute,
}
monitor := newWithClient(cfg, failingClient(), nil)
for i := 0; i < 5; i++ {
recovered, _ := monitor.Step(context.Background())
if recovered {
t.Fatal("a nil recovery func must never report recovery")
}
if monitor.failures > cfg.FailureThreshold {
t.Fatalf("failures must stay capped at threshold %d, got %d", cfg.FailureThreshold, monitor.failures)
}
}
}
func TestMonitorFailuresCappedDuringCooldown(t *testing.T) {
cfg := Config{
Enabled: true,
URL: "http://example.test",
Interval: time.Minute,
Timeout: time.Second,
FailureThreshold: 2,
Cooldown: time.Minute,
}
restarts := 0
monitor := newWithClient(cfg, failingClient(), func(ctx context.Context) error {
restarts++
return nil
})
monitor.now = func() time.Time {
return time.Unix(100, 0)
}
monitor.Step(context.Background())
if recovered, _ := monitor.Step(context.Background()); !recovered {
t.Fatal("expected recovery once the threshold is reached")
}
for i := 0; i < 6; i++ {
monitor.Step(context.Background())
if monitor.failures > cfg.FailureThreshold {
t.Fatalf("failures must never exceed threshold %d during cooldown, got %d", cfg.FailureThreshold, monitor.failures)
}
}
if restarts != 1 {
t.Fatalf("cooldown should suppress further recoveries, restarts=%d", restarts)
}
}
func TestMonitorRunStopsOnContextCancel(t *testing.T) {
cfg := Config{
Enabled: true,
URL: "http://example.test",
Timeout: time.Second,
FailureThreshold: 1,
Cooldown: time.Hour,
}
recovered := make(chan struct{})
var once sync.Once
monitor := newWithClient(cfg, failingClient(), func(ctx context.Context) error {
once.Do(func() { close(recovered) })
return nil
})
monitor.cfg.Interval = 5 * time.Millisecond
ctx, cancel := context.WithCancel(context.Background())
done := make(chan struct{})
go func() {
monitor.Run(ctx)
close(done)
}()
select {
case <-recovered:
case <-time.After(2 * time.Second):
cancel()
t.Fatal("Run did not trigger recovery within the deadline")
}
cancel()
select {
case <-done:
case <-time.After(2 * time.Second):
t.Fatal("Run did not return after context cancellation")
}
}
+28 -1
View File
@@ -3,6 +3,7 @@
package main
import (
"context"
"flag"
"fmt"
"log"
@@ -18,6 +19,7 @@ import (
"github.com/mhsanaei/3x-ui/v3/internal/database"
"github.com/mhsanaei/3x-ui/v3/internal/logger"
"github.com/mhsanaei/3x-ui/v3/internal/sub"
"github.com/mhsanaei/3x-ui/v3/internal/tunnelmonitor"
"github.com/mhsanaei/3x-ui/v3/internal/util/crypto"
"github.com/mhsanaei/3x-ui/v3/internal/util/sys"
"github.com/mhsanaei/3x-ui/v3/internal/web"
@@ -91,7 +93,7 @@ func runWebServer() {
return
}
sigCh := make(chan os.Signal, 1)
sigCh := make(chan os.Signal, 8)
// Trap shutdown signals
signal.Notify(sigCh, syscall.SIGHUP, syscall.SIGTERM, sys.SIGUSR1, os.Interrupt)
global.SetRestartHook(func() {
@@ -100,6 +102,27 @@ func runWebServer() {
default:
}
})
var stopTunnelHealthMonitor context.CancelFunc
monitorCfg := tunnelmonitor.ConfigFromEnv()
if monitorCfg.Enabled {
if monitorCfg.ProxyURL == "" {
logger.Warning("Tunnel health monitor enabled without XUI_TUNNEL_HEALTH_PROXY: the probe measures host connectivity, not the xray tunnel, so failures will restart xray without fixing host network issues")
}
monitorCtx, cancel := context.WithCancel(context.Background())
stopTunnelHealthMonitor = cancel
monitor, err := tunnelmonitor.New(monitorCfg, func(_ context.Context) error {
logger.Warning("Tunnel health monitor threshold reached, restarting xray-core")
return server.RestartXray()
})
if err != nil {
logger.Warning("Tunnel health monitor disabled: ", err)
} else {
go monitor.Run(monitorCtx)
}
}
for {
sig := <-sigCh
@@ -142,6 +165,10 @@ func runWebServer() {
}
default:
if stopTunnelHealthMonitor != nil {
stopTunnelHealthMonitor()
}
// --- FIX FOR TELEGRAM BOT CONFLICT (409) on full shutdown ---
tgbot.StopBot()
// ------------------------------------------------------------