Files
LangBot/docker/kubernetes.yaml
RockChinQ 47fe9bde03 docs(docker): move k8s deployment docs to wiki, drop README_K8S.md
The Kubernetes deployment guide now lives only in the wiki
(docs.langbot.app -> Installation -> Kubernetes). Remove the in-repo
docker/README_K8S.md, repoint the README language variants and the
docker-compose / kubernetes.yaml header comments to the wiki, and keep
kubernetes.yaml self-describing via inline comments.
2026-06-07 11:36:39 -04:00

572 lines
16 KiB
YAML

# Kubernetes Deployment for LangBot
# This file provides Kubernetes deployment manifests for LangBot based on docker-compose.yaml
#
# Full deployment guide (zh/en/ja): https://docs.langbot.app -> Installation -> Kubernetes
#
# Usage:
# kubectl apply -f kubernetes.yaml
#
# Prerequisites:
# - A Kubernetes cluster (1.19+)
# - kubectl configured to communicate with your cluster
# - (Optional) A StorageClass for dynamic volume provisioning
# - For the Box sandbox runtime: a node with a reachable Docker daemon
# (the box mounts the node's /var/run/docker.sock). See the deployment guide.
#
# Components:
# - Namespace: langbot
# - PersistentVolumeClaims for data persistence
# - Deployments for langbot, langbot-plugin-runtime, and langbot-box (sandbox)
# - Services for network access
# - ConfigMap for timezone + runtime endpoints
---
# Namespace
apiVersion: v1
kind: Namespace
metadata:
name: langbot
labels:
app: langbot
---
# PersistentVolumeClaim for LangBot data
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: langbot-data
namespace: langbot
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
# Uncomment and modify if you have a specific StorageClass
# storageClassName: your-storage-class
---
# PersistentVolumeClaim for LangBot plugins
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: langbot-plugins
namespace: langbot
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
# Uncomment and modify if you have a specific StorageClass
# storageClassName: your-storage-class
---
# PersistentVolumeClaim for Plugin Runtime data
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: langbot-plugin-runtime-data
namespace: langbot
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
# Uncomment and modify if you have a specific StorageClass
# storageClassName: your-storage-class
---
# ConfigMap for environment configuration
apiVersion: v1
kind: ConfigMap
metadata:
name: langbot-config
namespace: langbot
data:
TZ: "Asia/Shanghai"
PLUGIN__RUNTIME_WS_URL: "ws://langbot-plugin-runtime:5400/control/ws"
# Box sandbox runtime endpoint. LangBot connects to the Box runtime over
# WebSocket. The hostname MUST match the langbot-box Service name. Note the
# in-container default ("langbot_box") uses an underscore, which is an
# invalid Kubernetes DNS name — so the endpoint is always set explicitly here.
BOX__RUNTIME__ENDPOINT: "ws://langbot-box:5410"
---
# Deployment for LangBot Plugin Runtime
apiVersion: apps/v1
kind: Deployment
metadata:
name: langbot-plugin-runtime
namespace: langbot
labels:
app: langbot-plugin-runtime
spec:
replicas: 1
selector:
matchLabels:
app: langbot-plugin-runtime
template:
metadata:
labels:
app: langbot-plugin-runtime
spec:
containers:
- name: langbot-plugin-runtime
image: rockchin/langbot:latest
imagePullPolicy: Always
command: ["uv", "run", "-m", "langbot_plugin.cli.__init__", "rt"]
ports:
- containerPort: 5400
name: runtime
protocol: TCP
env:
- name: TZ
valueFrom:
configMapKeyRef:
name: langbot-config
key: TZ
volumeMounts:
- name: plugin-data
mountPath: /app/data/plugins
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "2Gi"
cpu: "1000m"
# Liveness probe to restart container if it becomes unresponsive
livenessProbe:
tcpSocket:
port: 5400
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
# Readiness probe to know when container is ready to accept traffic
readinessProbe:
tcpSocket:
port: 5400
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
volumes:
- name: plugin-data
persistentVolumeClaim:
claimName: langbot-plugin-runtime-data
restartPolicy: Always
---
# Service for LangBot Plugin Runtime
apiVersion: v1
kind: Service
metadata:
name: langbot-plugin-runtime
namespace: langbot
labels:
app: langbot-plugin-runtime
spec:
type: ClusterIP
selector:
app: langbot-plugin-runtime
ports:
- port: 5400
targetPort: 5400
protocol: TCP
name: runtime
---
# Deployment for LangBot Box (sandbox) runtime
#
# The Box runtime backs LangBot's sandbox tools (exec / read / write / edit /
# glob / grep), the `activate` skill tool, skill add/edit, and stdio-mode MCP
# servers. It is OPTIONAL: if you do not deploy it, set `BOX__ENABLED=false` on
# the langbot Deployment (or `box.enabled: false` in config.yaml) so the
# dashboard renders cleanly with sandbox features disabled.
#
# IMPORTANT — how the sandbox actually runs:
# The bundled image ships only the Docker CLI (no dockerd, no nsjail). The Box
# runtime therefore creates sandbox containers by talking to a Docker daemon
# over the mounted socket (`/var/run/docker.sock`). Because that daemon
# resolves bind-mount paths on the NODE filesystem, the Box workspace root
# must be the SAME absolute path inside the box container, inside every
# sandbox container it spawns, AND on the node. That is why this manifest uses
# a hostPath at a fixed absolute path (/app/data/box) and pins langbot + box
# to the same node via podAffinity. A normal PVC will NOT work for the box
# workspace, because the node's dockerd cannot see paths that exist only
# inside the pod's mount namespace.
#
# Security note: mounting the host Docker socket grants the Box runtime (and any
# code executed in the sandbox) effective root on the node. Only deploy Box on
# nodes you trust for this workload, ideally a dedicated node pool. For a
# stronger isolation boundary, switch box.backend to 'e2b' (set E2B_API_KEY) and
# drop the docker.sock mount + hostPath entirely.
apiVersion: apps/v1
kind: Deployment
metadata:
name: langbot-box
namespace: langbot
labels:
app: langbot-box
spec:
replicas: 1
selector:
matchLabels:
app: langbot-box
template:
metadata:
labels:
app: langbot-box
spec:
# Pin to the same node as langbot so they share the hostPath box root.
affinity:
podAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchLabels:
app: langbot
topologyKey: kubernetes.io/hostname
containers:
- name: langbot-box
image: rockchin/langbot:latest
imagePullPolicy: Always
# Launched through the same CLI entry point as the plugin runtime.
# No flag => WebSocket control transport (default), listening on 5410.
command: ["uv", "run", "--no-sync", "-m", "langbot_plugin.cli.__init__", "box"]
ports:
- containerPort: 5410
name: box-rpc
protocol: TCP
env:
- name: TZ
valueFrom:
configMapKeyRef:
name: langbot-config
key: TZ
# The Box runtime does NOT read box.local.* / BOX__* from its own env;
# it receives its configuration from LangBot via the INIT RPC action.
# Do not add BOX__* here — they would be silently ignored.
volumeMounts:
# Box workspace root — identical path on node, box, and sandbox
# containers (see the IMPORTANT note above).
- name: box-root
mountPath: /app/data/box
# Host Docker socket — the sandbox backend uses it to create containers.
- name: docker-sock
mountPath: /var/run/docker.sock
resources:
requests:
memory: "256Mi"
cpu: "100m"
limits:
memory: "1Gi"
cpu: "1000m"
livenessProbe:
tcpSocket:
port: 5410
initialDelaySeconds: 20
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
tcpSocket:
port: 5410
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
volumes:
- name: box-root
hostPath:
path: /app/data/box
type: DirectoryOrCreate
- name: docker-sock
hostPath:
path: /var/run/docker.sock
type: Socket
restartPolicy: Always
---
# Service for LangBot Box runtime
apiVersion: v1
kind: Service
metadata:
name: langbot-box
namespace: langbot
labels:
app: langbot-box
spec:
type: ClusterIP
selector:
app: langbot-box
ports:
- port: 5410
targetPort: 5410
protocol: TCP
name: box-rpc
---
# Deployment for LangBot
apiVersion: apps/v1
kind: Deployment
metadata:
name: langbot
namespace: langbot
labels:
app: langbot
spec:
replicas: 1
selector:
matchLabels:
app: langbot
template:
metadata:
labels:
app: langbot
spec:
containers:
- name: langbot
image: rockchin/langbot:latest
imagePullPolicy: Always
ports:
- containerPort: 5300
name: web
protocol: TCP
- containerPort: 2280
name: webhook-start
protocol: TCP
# Note: Kubernetes doesn't support port ranges directly in container ports
# The webhook ports 2280-2290 are available, but we only expose the start of the range
# If you need all ports exposed, consider using a Service with multiple port definitions
env:
- name: TZ
valueFrom:
configMapKeyRef:
name: langbot-config
key: TZ
- name: PLUGIN__RUNTIME_WS_URL
valueFrom:
configMapKeyRef:
name: langbot-config
key: PLUGIN__RUNTIME_WS_URL
# Box (sandbox) runtime endpoint. Connects LangBot to the langbot-box
# Service over WebSocket. Remove this (and the langbot-box Deployment)
# and set BOX__ENABLED=false if you do not want the sandbox.
- name: BOX__RUNTIME__ENDPOINT
valueFrom:
configMapKeyRef:
name: langbot-config
key: BOX__RUNTIME__ENDPOINT
# box.local.* config — forwarded to the Box runtime via INIT RPC. The
# host_root MUST match the box-root hostPath mountPath below AND the box
# Deployment's box-root mountPath, so that skill package paths resolve
# identically on both sides and on the node's Docker daemon.
- name: BOX__LOCAL__HOST_ROOT
value: "/app/data/box"
- name: BOX__LOCAL__DEFAULT_WORKSPACE
value: "default"
- name: BOX__LOCAL__SKILLS_ROOT
value: "skills"
- name: BOX__LOCAL__ALLOWED_MOUNT_ROOTS
value: "/app/data/box"
volumeMounts:
- name: data
mountPath: /app/data
- name: plugins
mountPath: /app/plugins
# Same node-level box root as the langbot-box Deployment. Mounted over
# the data PVC's /app/data/box subpath so both LangBot and the Box
# runtime (and the node's dockerd) agree on one absolute path.
- name: box-root
mountPath: /app/data/box
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "4Gi"
cpu: "2000m"
# Liveness probe to restart container if it becomes unresponsive
livenessProbe:
httpGet:
path: /
port: 5300
initialDelaySeconds: 60
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
# Readiness probe to know when container is ready to accept traffic
readinessProbe:
httpGet:
path: /
port: 5300
initialDelaySeconds: 30
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
volumes:
- name: data
persistentVolumeClaim:
claimName: langbot-data
- name: plugins
persistentVolumeClaim:
claimName: langbot-plugins
# Node-level box workspace root, shared with the langbot-box Deployment.
# hostPath (not PVC) because the node's Docker daemon must see the same
# absolute path when bind-mounting workspaces into sandbox containers.
- name: box-root
hostPath:
path: /app/data/box
type: DirectoryOrCreate
restartPolicy: Always
---
# Service for LangBot (ClusterIP for internal access)
apiVersion: v1
kind: Service
metadata:
name: langbot
namespace: langbot
labels:
app: langbot
spec:
type: ClusterIP
selector:
app: langbot
ports:
- port: 5300
targetPort: 5300
protocol: TCP
name: web
- port: 2280
targetPort: 2280
protocol: TCP
name: webhook-2280
- port: 2281
targetPort: 2281
protocol: TCP
name: webhook-2281
- port: 2282
targetPort: 2282
protocol: TCP
name: webhook-2282
- port: 2283
targetPort: 2283
protocol: TCP
name: webhook-2283
- port: 2284
targetPort: 2284
protocol: TCP
name: webhook-2284
- port: 2285
targetPort: 2285
protocol: TCP
name: webhook-2285
- port: 2286
targetPort: 2286
protocol: TCP
name: webhook-2286
- port: 2287
targetPort: 2287
protocol: TCP
name: webhook-2287
- port: 2288
targetPort: 2288
protocol: TCP
name: webhook-2288
- port: 2289
targetPort: 2289
protocol: TCP
name: webhook-2289
- port: 2290
targetPort: 2290
protocol: TCP
name: webhook-2290
---
# Ingress for external access (Optional - requires Ingress Controller)
# Uncomment and modify the following section if you want to expose LangBot via Ingress
# apiVersion: networking.k8s.io/v1
# kind: Ingress
# metadata:
# name: langbot-ingress
# namespace: langbot
# annotations:
# # Uncomment and modify based on your ingress controller
# # nginx.ingress.kubernetes.io/rewrite-target: /
# # cert-manager.io/cluster-issuer: letsencrypt-prod
# spec:
# ingressClassName: nginx # Change based on your ingress controller
# rules:
# - host: langbot.yourdomain.com # Change to your domain
# http:
# paths:
# - path: /
# pathType: Prefix
# backend:
# service:
# name: langbot
# port:
# number: 5300
# # Uncomment for TLS/HTTPS
# # tls:
# # - hosts:
# # - langbot.yourdomain.com
# # secretName: langbot-tls
---
# Service for LangBot with LoadBalancer (Alternative to Ingress)
# Uncomment the following if you want to expose LangBot directly via LoadBalancer
# This is useful in cloud environments (AWS, GCP, Azure, etc.)
# apiVersion: v1
# kind: Service
# metadata:
# name: langbot-loadbalancer
# namespace: langbot
# labels:
# app: langbot
# spec:
# type: LoadBalancer
# selector:
# app: langbot
# ports:
# - port: 80
# targetPort: 5300
# protocol: TCP
# name: web
# - port: 2280
# targetPort: 2280
# protocol: TCP
# name: webhook-start
# # Add more webhook ports as needed
---
# Service for LangBot with NodePort (Alternative for exposing service)
# Uncomment if you want to expose LangBot via NodePort
# This is useful for testing or when LoadBalancer is not available
# apiVersion: v1
# kind: Service
# metadata:
# name: langbot-nodeport
# namespace: langbot
# labels:
# app: langbot
# spec:
# type: NodePort
# selector:
# app: langbot
# ports:
# - port: 5300
# targetPort: 5300
# nodePort: 30300 # Must be in range 30000-32767
# protocol: TCP
# name: web
# - port: 2280
# targetPort: 2280
# nodePort: 30280 # Must be in range 30000-32767
# protocol: TCP
# name: webhook