fix(jobs): isolate per-node background goroutines from panics (#5397)

A panic in a goroutine without a recover takes the whole panel down. The
per-node heartbeat and traffic-sync goroutines run remote network I/O for
each node with no panic isolation, so one misbehaving node could crash the
master.

Add common.GoRecover(name, fn), which runs fn in a goroutine guarded by a
recover that logs the panic with a stack trace instead of crashing, and use
it for the per-node heartbeat, traffic-sync and global-push goroutines. The
deferred WaitGroup/semaphore releases still run during panic unwind, so the
group never stalls. Other background goroutines can adopt the same helper.
This commit is contained in:
n0ctal
2026-06-20 03:38:25 +05:00
committed by GitHub
parent bedbe04bf1
commit f63ed9f510
4 changed files with 67 additions and 7 deletions
+15
View File
@@ -4,6 +4,7 @@ package common
import (
"errors"
"fmt"
"runtime/debug"
"github.com/mhsanaei/3x-ui/v3/internal/logger"
)
@@ -30,3 +31,17 @@ func Recover(msg string) any {
}
return panicErr
}
// GoRecover runs fn in a new goroutine guarded by a recover, so a panic in a
// background goroutine is logged (with name and a stack trace) instead of taking
// the whole process down. name identifies the goroutine in the log.
func GoRecover(name string, fn func()) {
go func() {
defer func() {
if r := recover(); r != nil {
logger.Error("panic in goroutine", name, ":", r, "\n"+string(debug.Stack()))
}
}()
fn()
}()
}
+41
View File
@@ -0,0 +1,41 @@
package common
import (
"os"
"testing"
"time"
"github.com/mhsanaei/3x-ui/v3/internal/logger"
"github.com/op/go-logging"
)
func TestMain(m *testing.M) {
logger.InitLogger(logging.ERROR)
os.Exit(m.Run())
}
func TestGoRecover_RunsFn(t *testing.T) {
done := make(chan struct{})
GoRecover("test-run", func() { close(done) })
select {
case <-done:
case <-time.After(2 * time.Second):
t.Fatal("fn did not run")
}
}
func TestGoRecover_RecoversPanic(t *testing.T) {
done := make(chan struct{})
// If GoRecover did not recover, this panic would crash the test binary.
GoRecover("test-panic", func() {
defer close(done)
panic("boom")
})
select {
case <-done:
case <-time.After(2 * time.Second):
t.Fatal("goroutine did not complete")
}
// Let the deferred recover+log run before the test ends.
time.Sleep(50 * time.Millisecond)
}