mirror of
https://github.com/zhwei820/learn.lianglianglee.com.git
synced 2025-09-17 08:46:40 +08:00
1144 lines
26 KiB
HTML
1144 lines
26 KiB
HTML
<!DOCTYPE html>
|
||
|
||
<!-- saved from url=(0046)https://kaiiiz.github.io/hexo-theme-book-demo/ -->
|
||
|
||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||
|
||
<head>
|
||
|
||
<head>
|
||
|
||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1.0, user-scalable=no">
|
||
|
||
<link rel="icon" href="/static/favicon.png">
|
||
|
||
<title>23 监控实践:对 K8S 集群进行监控.md.html</title>
|
||
|
||
<!-- Spectre.css framework -->
|
||
|
||
<link rel="stylesheet" href="/static/index.css">
|
||
|
||
<!-- theme css & js -->
|
||
|
||
<meta name="generator" content="Hexo 4.2.0">
|
||
|
||
</head>
|
||
<body>
|
||
<div class="book-container">
|
||
|
||
<div class="book-sidebar">
|
||
|
||
<div class="book-brand">
|
||
|
||
<a href="/">
|
||
|
||
<img src="/static/favicon.png">
|
||
|
||
<span>技术文章摘抄</span>
|
||
|
||
</a>
|
||
|
||
</div>
|
||
|
||
<div class="book-menu uncollapsible">
|
||
|
||
<ul class="uncollapsible">
|
||
|
||
<li><a href="/" class="current-tab">首页</a></li>
|
||
|
||
</ul>
|
||
<ul class="uncollapsible">
|
||
|
||
<li><a href="../">上一级</a></li>
|
||
|
||
</ul>
|
||
<ul class="uncollapsible">
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/01 开篇: Kubernetes 是什么以及为什么需要它.md.html">01 开篇: Kubernetes 是什么以及为什么需要它.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/02 初步认识:Kubernetes 基础概念.md.html">02 初步认识:Kubernetes 基础概念.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/03 宏观认识:整体架构.md.html">03 宏观认识:整体架构.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/04 搭建 Kubernetes 集群 - 本地快速搭建.md.html">04 搭建 Kubernetes 集群 - 本地快速搭建.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/05 动手实践:搭建一个 Kubernetes 集群 - 生产可用.md.html">05 动手实践:搭建一个 Kubernetes 集群 - 生产可用.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/06 集群管理:初识 kubectl.md.html">06 集群管理:初识 kubectl.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/07 集群管理:以 Redis 为例-部署及访问.md.html">07 集群管理:以 Redis 为例-部署及访问.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/08 安全重点 认证和授权.md.html">08 安全重点 认证和授权.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/09 应用发布:部署实际项目.md.html">09 应用发布:部署实际项目.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/10 应用管理:初识 Helm.md.html">10 应用管理:初识 Helm.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/11 部署实践:以 Helm 部署项目.md.html">11 部署实践:以 Helm 部署项目.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/12 庖丁解牛:kube-apiserver.md.html">12 庖丁解牛:kube-apiserver.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/13 庖丁解牛:etcd.md.html">13 庖丁解牛:etcd.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/14 庖丁解牛:controller-manager.md.html">14 庖丁解牛:controller-manager.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/15 庖丁解牛:kube-scheduler.md.html">15 庖丁解牛:kube-scheduler.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/16 庖丁解牛:kubelet.md.html">16 庖丁解牛:kubelet.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/17 庖丁解牛:kube-proxy.md.html">17 庖丁解牛:kube-proxy.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/18 庖丁解牛:Container Runtime (Docker).md.html">18 庖丁解牛:Container Runtime (Docker).md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/19 Troubleshoot.md.html">19 Troubleshoot.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/20 扩展增强:Dashboard.md.html">20 扩展增强:Dashboard.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/21 扩展增强:CoreDNS.md.html">21 扩展增强:CoreDNS.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/22 服务增强:Ingress.md.html">22 服务增强:Ingress.md.html</a>
|
||
</li>
|
||
|
||
<li>
|
||
<a class="current-tab" href="/专栏/Kubernetes 从上手到实践/23 监控实践:对 K8S 集群进行监控.md.html">23 监控实践:对 K8S 集群进行监控.md.html</a>
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/24 总结.md.html">24 总结.md.html</a>
|
||
</li>
|
||
|
||
</ul>
|
||
</div>
|
||
|
||
</div>
|
||
<div class="sidebar-toggle" onclick="sidebar_toggle()" onmouseover="add_inner()" onmouseleave="remove_inner()">
|
||
|
||
<div class="sidebar-toggle-inner"></div>
|
||
|
||
</div>
|
||
<script>
|
||
|
||
function add_inner() {
|
||
|
||
let inner = document.querySelector('.sidebar-toggle-inner')
|
||
|
||
inner.classList.add('show')
|
||
|
||
}
|
||
function remove_inner() {
|
||
|
||
let inner = document.querySelector('.sidebar-toggle-inner')
|
||
|
||
inner.classList.remove('show')
|
||
|
||
}
|
||
function sidebar_toggle() {
|
||
|
||
let sidebar_toggle = document.querySelector('.sidebar-toggle')
|
||
|
||
let sidebar = document.querySelector('.book-sidebar')
|
||
|
||
let content = document.querySelector('.off-canvas-content')
|
||
|
||
if (sidebar_toggle.classList.contains('extend')) { // show
|
||
|
||
sidebar_toggle.classList.remove('extend')
|
||
|
||
sidebar.classList.remove('hide')
|
||
|
||
content.classList.remove('extend')
|
||
|
||
} else { // hide
|
||
|
||
sidebar_toggle.classList.add('extend')
|
||
|
||
sidebar.classList.add('hide')
|
||
|
||
content.classList.add('extend')
|
||
|
||
}
|
||
|
||
}
|
||
|
||
|
||
function open_sidebar() {
|
||
|
||
let sidebar = document.querySelector('.book-sidebar')
|
||
|
||
let overlay = document.querySelector('.off-canvas-overlay')
|
||
|
||
sidebar.classList.add('show')
|
||
|
||
overlay.classList.add('show')
|
||
|
||
}
|
||
|
||
function hide_canvas() {
|
||
|
||
let sidebar = document.querySelector('.book-sidebar')
|
||
|
||
let overlay = document.querySelector('.off-canvas-overlay')
|
||
|
||
sidebar.classList.remove('show')
|
||
|
||
overlay.classList.remove('show')
|
||
|
||
}
|
||
</script>
|
||
<div class="off-canvas-content">
|
||
|
||
<div class="columns">
|
||
|
||
<div class="column col-12 col-lg-12">
|
||
|
||
<div class="book-navbar">
|
||
|
||
<!-- For Responsive Layout -->
|
||
|
||
<header class="navbar">
|
||
|
||
<section class="navbar-section">
|
||
|
||
<a onclick="open_sidebar()">
|
||
|
||
<i class="icon icon-menu"></i>
|
||
|
||
</a>
|
||
|
||
</section>
|
||
|
||
</header>
|
||
|
||
</div>
|
||
|
||
<div class="book-content" style="max-width: 960px; margin: 0 auto;
|
||
|
||
overflow-x: auto;
|
||
|
||
overflow-y: hidden;">
|
||
|
||
<div class="book-post">
|
||
|
||
<p id="tip" align="center"></p>
|
||
|
||
<div><h1>23 监控实践:对 K8S 集群进行监控</h1>
|
||
|
||
<h2>整体概览</h2>
|
||
|
||
<p>通过前面的学习,我们对 K8S 有了一定的了解,也具备了一定的集群管理和排错能力。但如果要应用于生产环境中,不可能随时随地的都盯着集群,我们需要扩展我们对集群的感知能力。</p>
|
||
|
||
<p>本节,我们将介绍下 K8S 集群监控相关的内容。</p>
|
||
|
||
<h2>监控什么</h2>
|
||
|
||
<p>除去 K8S 外,我们平时自己开发的系统或者负责的项目,一般都是有监控的。监控可以提升我们的感知能力,便于我们及时了解集群的变化,以及知道哪里出现了问题。</p>
|
||
|
||
<p>K8S 是一个典型的分布式系统,组件很多,那么监控的目标,就变的很重要了。</p>
|
||
|
||
<p>总体来讲,对 K8S 集群的监控的话,主要有以下方面:</p>
|
||
|
||
<ul>
|
||
|
||
<li>节点情况</li>
|
||
|
||
<li>K8S 集群自身状态</li>
|
||
|
||
<li>部署在 K8S 内的应用的状态</li>
|
||
|
||
</ul>
|
||
|
||
<h2>Prometheus</h2>
|
||
|
||
<p>对于 K8S 的监控,我们选择 CNCF 旗下次于 K8S 毕业的项目<a href="https://prometheus.io/"> Prometheus </a>。</p>
|
||
|
||
<p>Prometheus 是一个非常灵活易于扩展的监控系统,它通过各种 <code>exporter</code> 暴露数据,并由 <code>prometheus server</code> 定时去拉数据,然后存储。</p>
|
||
|
||
<p>它自己提供了一个简单的前端界面,可在其中使用 <a href="https://prometheus.io/docs/prometheus/latest/querying/basics/">PromQL </a>的语法进行查询,并进行图形化展示。</p>
|
||
|
||
<h2>安装 Prometheus</h2>
|
||
|
||
<blockquote>
|
||
|
||
<p>这里推荐一个项目 <a href="https://github.com/coreos/prometheus-operator">Prometheus Operator</a>, 尽管该项目还处于 Beta 阶段,但是它给在 K8S 中搭建基于 Prometheus 的监控提供了很大的便利。</p>
|
||
|
||
</blockquote>
|
||
|
||
<p>我们此处选择以一般的方式进行部署,带你了解其整体的过程。</p>
|
||
|
||
<ul>
|
||
|
||
<li>
|
||
|
||
<p>创建一个独立的 <code>Namespace</code>:</p>
|
||
|
||
<pre><code>apiVersion: v1
|
||
|
||
kind: Namespace
|
||
|
||
metadata:
|
||
|
||
name: monitoring
|
||
# 将文件保存为 namespace.yaml 的文件,并执行 kubectl apply -f namespace.yaml 即可,后面不再赘述。
|
||
|
||
</code></pre>
|
||
|
||
<pre><code>master $ kubectl apply -f namespace.yaml
|
||
|
||
namespace/monitoring created
|
||
|
||
</code></pre>
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
<p>RBAC</p>
|
||
|
||
<p>我们的集群使用 <code>kubeadm</code> 创建,默认开启了 <code>RBAC</code>,所以现在需要创建相关的 Role 和 binding。</p>
|
||
|
||
<pre><code>apiVersion: rbac.authorization.k8s.io/v1
|
||
|
||
kind: ClusterRoleBinding
|
||
|
||
metadata:
|
||
|
||
name: prometheus
|
||
|
||
roleRef:
|
||
|
||
apiGroup: rbac.authorization.k8s.io
|
||
|
||
kind: ClusterRole
|
||
|
||
name: prometheus
|
||
|
||
subjects:
|
||
|
||
- kind: ServiceAccount
|
||
|
||
name: prometheus-k8s
|
||
|
||
namespace: monitoring
|
||
|
||
---
|
||
|
||
apiVersion: rbac.authorization.k8s.io/v1
|
||
|
||
kind: ClusterRole
|
||
|
||
metadata:
|
||
|
||
name: prometheus
|
||
|
||
rules:
|
||
|
||
- apiGroups: [""]
|
||
|
||
resources:
|
||
|
||
- nodes
|
||
|
||
- nodes/proxy
|
||
|
||
- services
|
||
|
||
- endpoints
|
||
|
||
- pods
|
||
|
||
verbs: ["get", "list", "watch"]
|
||
|
||
- apiGroups: [""]
|
||
|
||
resources:
|
||
|
||
- configmaps
|
||
|
||
verbs: ["get"]
|
||
|
||
- nonResourceURLs: ["/metrics"]
|
||
|
||
verbs: ["get"]
|
||
|
||
---
|
||
|
||
apiVersion: v1
|
||
|
||
kind: ServiceAccount
|
||
|
||
metadata:
|
||
|
||
name: prometheus-k8s
|
||
|
||
namespace: monitoring
|
||
|
||
</code></pre>
|
||
|
||
<p>执行创建</p>
|
||
|
||
<pre><code>master $ kubectl apply -f rbac.yaml
|
||
|
||
clusterrolebinding.rbac.authorization.k8s.io/prometheus created
|
||
|
||
clusterrole.rbac.authorization.k8s.io/prometheus created
|
||
|
||
serviceaccount/prometheus-k8s created
|
||
|
||
</code></pre>
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
<p>创建 Promethes 的配置文件</p>
|
||
|
||
<p>其中的内容主要参考 <a href="https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml">Prometheus 官方提供的示例</a> 和 <a href="https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config">Prometheus 官方文档</a>。</p>
|
||
|
||
<pre><code>apiVersion: v1
|
||
|
||
kind: ConfigMap
|
||
|
||
metadata:
|
||
|
||
name: prometheus-core
|
||
|
||
namespace: monitoring
|
||
|
||
data:
|
||
|
||
prometheus.yaml: |
|
||
|
||
global:
|
||
|
||
scrape_interval: 30s
|
||
|
||
scrape_timeout: 30s
|
||
|
||
scrape_configs:
|
||
|
||
- job_name: 'kubernetes-apiservers'
|
||
kubernetes_sd_configs:
|
||
|
||
- role: endpoints
|
||
|
||
scheme: https
|
||
tls_config:
|
||
|
||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||
|
||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||
relabel_configs:
|
||
|
||
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
||
|
||
action: keep
|
||
|
||
regex: default;kubernetes;https
|
||
# Scrape config for nodes (kubelet).
|
||
|
||
- job_name: 'kubernetes-nodes'
|
||
|
||
scheme: https
|
||
tls_config:
|
||
|
||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||
|
||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||
kubernetes_sd_configs:
|
||
|
||
- role: node
|
||
relabel_configs:
|
||
|
||
- action: labelmap
|
||
|
||
regex: __meta_kubernetes_node_label_(.+)
|
||
|
||
- target_label: __address__
|
||
|
||
replacement: kubernetes.default.svc:443
|
||
|
||
- source_labels: [__meta_kubernetes_node_name]
|
||
|
||
regex: (.+)
|
||
|
||
target_label: __metrics_path__
|
||
|
||
replacement: /api/v1/nodes/${1}/proxy/metrics
|
||
# Scrape config for Kubelet cAdvisor.
|
||
|
||
- job_name: 'kubernetes-cadvisor'
|
||
scheme: https
|
||
tls_config:
|
||
|
||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||
|
||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||
kubernetes_sd_configs:
|
||
|
||
- role: node
|
||
relabel_configs:
|
||
|
||
- action: labelmap
|
||
|
||
regex: __meta_kubernetes_node_label_(.+)
|
||
|
||
- target_label: __address__
|
||
|
||
replacement: kubernetes.default.svc:443
|
||
|
||
- source_labels: [__meta_kubernetes_node_name]
|
||
|
||
regex: (.+)
|
||
|
||
target_label: __metrics_path__
|
||
|
||
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
|
||
- job_name: 'kubernetes-service-endpoints'
|
||
kubernetes_sd_configs:
|
||
|
||
- role: endpoints
|
||
relabel_configs:
|
||
|
||
- action: labelmap
|
||
|
||
regex: __meta_kubernetes_service_label_(.+)
|
||
|
||
- source_labels: [__meta_kubernetes_namespace]
|
||
|
||
action: replace
|
||
|
||
target_label: kubernetes_namespace
|
||
|
||
- source_labels: [__meta_kubernetes_service_name]
|
||
|
||
action: replace
|
||
|
||
target_label: kubernetes_name
|
||
- job_name: 'kubernetes-services'
|
||
metrics_path: /probe
|
||
|
||
params:
|
||
|
||
module: [http_2xx]
|
||
kubernetes_sd_configs:
|
||
|
||
- role: service
|
||
relabel_configs:
|
||
|
||
- source_labels: [__address__]
|
||
|
||
target_label: __param_target
|
||
|
||
- target_label: __address__
|
||
|
||
replacement: blackbox-exporter.example.com:9115
|
||
|
||
- source_labels: [__param_target]
|
||
|
||
target_label: instance
|
||
|
||
- action: labelmap
|
||
|
||
regex: __meta_kubernetes_service_label_(.+)
|
||
|
||
- source_labels: [__meta_kubernetes_namespace]
|
||
|
||
target_label: kubernetes_namespace
|
||
|
||
- source_labels: [__meta_kubernetes_service_name]
|
||
|
||
target_label: kubernetes_name
|
||
- job_name: 'kubernetes-ingresses'
|
||
metrics_path: /probe
|
||
|
||
params:
|
||
|
||
module: [http_2xx]
|
||
kubernetes_sd_configs:
|
||
|
||
- role: ingress
|
||
relabel_configs:
|
||
|
||
- source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]
|
||
|
||
regex: (.+);(.+);(.+)
|
||
|
||
replacement: ${1}://${2}${3}
|
||
|
||
target_label: __param_target
|
||
|
||
- target_label: __address__
|
||
|
||
replacement: blackbox-exporter.example.com:9115
|
||
|
||
- source_labels: [__param_target]
|
||
|
||
target_label: instance
|
||
|
||
- action: labelmap
|
||
|
||
regex: __meta_kubernetes_ingress_label_(.+)
|
||
|
||
- source_labels: [__meta_kubernetes_namespace]
|
||
|
||
target_label: kubernetes_namespace
|
||
|
||
- source_labels: [__meta_kubernetes_ingress_name]
|
||
|
||
target_label: kubernetes_name
|
||
- job_name: 'kubernetes-pods'
|
||
kubernetes_sd_configs:
|
||
|
||
- role: pod
|
||
relabel_configs:
|
||
|
||
- action: labelmap
|
||
|
||
regex: __meta_kubernetes_pod_label_(.+)
|
||
|
||
- source_labels: [__meta_kubernetes_namespace]
|
||
|
||
action: replace
|
||
|
||
target_label: kubernetes_namespace
|
||
|
||
- source_labels: [__meta_kubernetes_pod_name]
|
||
|
||
action: replace
|
||
|
||
target_label: kubernetes_pod_name
|
||
|
||
</code></pre>
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
<p>部署 Prometheus</p>
|
||
|
||
<pre><code>apiVersion: extensions/v1beta1
|
||
|
||
kind: Deployment
|
||
|
||
metadata:
|
||
|
||
name: prometheus-core
|
||
|
||
namespace: monitoring
|
||
|
||
labels:
|
||
|
||
app: prometheus
|
||
|
||
component: core
|
||
|
||
spec:
|
||
|
||
replicas: 1
|
||
|
||
template:
|
||
|
||
metadata:
|
||
|
||
name: prometheus-main
|
||
|
||
labels:
|
||
|
||
app: prometheus
|
||
|
||
component: core
|
||
|
||
spec:
|
||
|
||
serviceAccountName: prometheus-k8s
|
||
|
||
containers:
|
||
|
||
- name: prometheus
|
||
|
||
image: taobeier/prometheus:v2.6.0
|
||
|
||
args:
|
||
|
||
- '--storage.tsdb.retention=24h'
|
||
|
||
- '--storage.tsdb.path=/prometheus'
|
||
|
||
- '--config.file=/etc/prometheus/prometheus.yaml'
|
||
|
||
ports:
|
||
|
||
- name: webui
|
||
|
||
containerPort: 9090
|
||
|
||
resources:
|
||
|
||
requests:
|
||
|
||
cpu: 500m
|
||
|
||
memory: 500M
|
||
|
||
limits:
|
||
|
||
cpu: 500m
|
||
|
||
memory: 500M
|
||
|
||
volumeMounts:
|
||
|
||
- name: data
|
||
|
||
mountPath: /prometheus
|
||
|
||
- name: config-volume
|
||
|
||
mountPath: /etc/prometheus
|
||
|
||
volumes:
|
||
|
||
- name: data
|
||
|
||
emptyDir: {}
|
||
|
||
- name: config-volume
|
||
|
||
configMap:
|
||
|
||
name: prometheus-core
|
||
|
||
</code></pre>
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
<p>查看部署情况</p>
|
||
|
||
<pre><code>master $ kubectl -n monitoring get all
|
||
|
||
NAME READY STATUS RESTARTS AGE
|
||
|
||
pod/prometheus-core-86b8455f76-mvrn4 1/1 Running 0 12s
|
||
NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE
|
||
|
||
deployment.apps/prometheus-core 1 1 1 1 12s
|
||
NAME DESIRED CURRENT READY AGE
|
||
|
||
replicaset.apps/prometheus-core-86b8455f76 1 1 1 12s
|
||
|
||
</code></pre>
|
||
|
||
<p>Prometheus 的主体就已经部署完成。</p>
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
<p>使用 <code>Service</code> 将 <code>Promethes</code> 的服务暴露出来</p>
|
||
|
||
<pre><code>apiVersion: v1
|
||
|
||
kind: Service
|
||
|
||
metadata:
|
||
|
||
labels:
|
||
|
||
app: prometheus
|
||
|
||
component: core
|
||
|
||
name: prometheus
|
||
|
||
namespace: monitoring
|
||
|
||
spec:
|
||
|
||
ports:
|
||
|
||
- protocol: TCP
|
||
|
||
port: 9090
|
||
|
||
targetPort: 9090
|
||
|
||
selector:
|
||
|
||
app: prometheus
|
||
|
||
component: core
|
||
|
||
type: NodePort
|
||
|
||
</code></pre>
|
||
|
||
<p>这里为了方便演示,直接使用了 <code>NodePort</code> 的方式暴露服务。当然你也可以参考上一节,使用 <code>Ingress</code> 的方式将服务暴露出来。</p>
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
<p>查询当前状态</p>
|
||
|
||
<p>我们使用 Promethes 自带的 PromQL 语法,查询在当前 <code>monitoring</code> Namespace 中 up 的任务。这里对查询的结果暂不进行展开。</p>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
|
||
<p><img src="assets/167df2ab2fc5b19a" alt="img" /></p>
|
||
|
||
<h2>安装 Node exporter</h2>
|
||
|
||
<p>我们刚才在介绍时,提到过 <code>Promethes</code> 支持多种 <code>exporter</code> 暴露指标。我们现在使用 <a href="https://github.com/prometheus/node_exporter">Node exporter</a> 完成对集群中机器的基础监控。</p>
|
||
|
||
<p>这里有一个需要考虑的点:</p>
|
||
|
||
<ul>
|
||
|
||
<li>
|
||
|
||
<p>使用什么方式部署 Node exporter ?</p>
|
||
|
||
<p>Node exporter 有已经编译好的二进制文件,可以很方便的进行部署。当我们要监控集群中所有的机器时,我们是该将它直接部署在机器上,还是部署在集群内?</p>
|
||
|
||
<p>我建议是直接部署在集群内,使用 <code>DaemonSet</code> 的方式进行部署。这里的考虑是当我们直接部署在宿主机上时,我们最起码需要保证两点:1. Promethes 服务可与它正常通信(Promethes 采用 Pull 的方式采集数据) ;2. 需要服务保活,如果 exporter 挂掉了,那自然就取不到数据。</p>
|
||
|
||
<p><code>DaemonSet</code> 是一种很合适的的部署方式,可直接将 Node exporter 部署至集群的每个节点上。</p>
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
<p>创建 <code>DaemonSet</code></p>
|
||
|
||
<pre><code>apiVersion: extensions/v1beta1
|
||
|
||
kind: DaemonSet
|
||
|
||
metadata:
|
||
|
||
name: prometheus-node-exporter
|
||
|
||
namespace: monitoring
|
||
|
||
labels:
|
||
|
||
app: prometheus
|
||
|
||
component: node-exporter
|
||
|
||
spec:
|
||
|
||
template:
|
||
|
||
metadata:
|
||
|
||
name: prometheus-node-exporter
|
||
|
||
labels:
|
||
|
||
app: prometheus
|
||
|
||
component: node-exporter
|
||
|
||
spec:
|
||
|
||
tolerations:
|
||
|
||
- key: node-role.kubernetes.io/master
|
||
|
||
effect: NoSchedule
|
||
|
||
containers:
|
||
|
||
- image: taobeier/node-exporter:v0.17.0
|
||
|
||
name: prometheus-node-exporter
|
||
|
||
ports:
|
||
|
||
- name: prom-node-exp
|
||
|
||
containerPort: 9100
|
||
|
||
hostPort: 9100
|
||
|
||
hostNetwork: true
|
||
|
||
hostPID: true
|
||
|
||
</code></pre>
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
<p>让 Promethes 抓取数据</p>
|
||
|
||
<pre><code>apiVersion: v1
|
||
|
||
kind: Service
|
||
|
||
metadata:
|
||
|
||
annotations:
|
||
|
||
prometheus.io/scrape: 'true'
|
||
|
||
name: prometheus-node-exporter
|
||
|
||
namespace: monitoring
|
||
|
||
labels:
|
||
|
||
app: prometheus
|
||
|
||
component: node-exporter
|
||
|
||
spec:
|
||
|
||
clusterIP: None
|
||
|
||
ports:
|
||
|
||
- name: prometheus-node-exporter
|
||
|
||
port: 9100
|
||
|
||
protocol: TCP
|
||
|
||
selector:
|
||
|
||
app: prometheus
|
||
|
||
component: node-exporter
|
||
|
||
type: ClusterIP
|
||
|
||
</code></pre>
|
||
|
||
<p>这里我们直接使用了添加 <code>annotations</code> 的方式,让 Promethes 自动的通过 Kubernetes SD 发现我们新添加的 exporter (或者说资源)</p>
|
||
|
||
<p>我们访问 Promethes 的 web 端,进行验证。</p>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
|
||
<p><img src="assets/167df6106b58b2fb" alt="img" /></p>
|
||
|
||
<h2>总结</h2>
|
||
|
||
<p>在本节中,我们介绍了 <code>Prometheus</code> 的基本情况,也部署了 <code>Prometheus</code> 的主体服务。</p>
|
||
|
||
<p>但这是结束么?这并不是,这才刚刚开始。</p>
|
||
|
||
<p>我们提到 <code>Prometheus</code> 支持多种 <code>exporter</code> 暴露各种指标,而且我们还可以使用 <a href="https://grafana.com/">Grafana</a> 作为我们监控的展示手段。</p>
|
||
|
||
<p>如果要做 Dashboard 推荐使用 <a href="https://grafana.com/dashboards/162">Kubernetes cluster monitoring (via Prometheus)</a> 。</p>
|
||
|
||
<p>另外,监控其实涉及的内容很多,包括数据持久化方式。以及是否考虑与集群外的 Prometheus 集群做邦联模式等。这里需要考虑的实际情况较多,暂不一一展开了。</p>
|
||
|
||
<p>Prometheus 已经从 CNCF 毕业,其在云原生时代下作为标准的监控技术栈也基本确立。至于应用监控,也可使用它的 SDK 来完成。</p>
|
||
|
||
<p>下节,我们将对本小册进行一次总结。</p>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
<div>
|
||
|
||
<div style="float: left">
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/22 服务增强:Ingress.md.html">上一页</a>
|
||
|
||
</div>
|
||
|
||
<div style="float: right">
|
||
|
||
<a href="/专栏/Kubernetes 从上手到实践/24 总结.md.html">下一页</a>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
<a class="off-canvas-overlay" onclick="hide_canvas()"></a>
|
||
|
||
</div>
|
||
|
||
<script defer src="https://static.cloudflareinsights.com/beacon.min.js/v652eace1692a40cfa3763df669d7439c1639079717194" integrity="sha512-Gi7xpJR8tSkrpF7aordPZQlW2DLtzUlZcumS8dMQjwDHEnw9I7ZLyiOj/6tZStRBGtGgN6ceN6cMH8z7etPGlw==" data-cf-beacon='{"rayId":"709972578bae3d60","version":"2021.12.0","r":1,"token":"1f5d475227ce4f0089a7cff1ab17c0f5","si":100}' crossorigin="anonymous"></script>
|
||
|
||
</body>
|
||
|
||
<!-- Global site tag (gtag.js) - Google Analytics -->
|
||
|
||
<script async src="https://www.googletagmanager.com/gtag/js?id=G-NPSEEVD756"></script>
|
||
|
||
<script>
|
||
|
||
window.dataLayer = window.dataLayer || [];
|
||
function gtag() {
|
||
|
||
dataLayer.push(arguments);
|
||
|
||
}
|
||
gtag('js', new Date());
|
||
|
||
gtag('config', 'G-NPSEEVD756');
|
||
|
||
var path = window.location.pathname
|
||
|
||
var cookie = getCookie("lastPath");
|
||
|
||
console.log(path)
|
||
|
||
if (path.replace("/", "") === "") {
|
||
|
||
if (cookie.replace("/", "") !== "") {
|
||
|
||
console.log(cookie)
|
||
|
||
document.getElementById("tip").innerHTML = "<a href='" + cookie + "'>跳转到上次进度</a>"
|
||
|
||
}
|
||
|
||
} else {
|
||
|
||
setCookie("lastPath", path)
|
||
|
||
}
|
||
function setCookie(cname, cvalue) {
|
||
|
||
var d = new Date();
|
||
|
||
d.setTime(d.getTime() + (180 * 24 * 60 * 60 * 1000));
|
||
|
||
var expires = "expires=" + d.toGMTString();
|
||
|
||
document.cookie = cname + "=" + cvalue + "; " + expires + ";path = /";
|
||
|
||
}
|
||
function getCookie(cname) {
|
||
|
||
var name = cname + "=";
|
||
|
||
var ca = document.cookie.split(';');
|
||
|
||
for (var i = 0; i < ca.length; i++) {
|
||
|
||
var c = ca[i].trim();
|
||
|
||
if (c.indexOf(name) === 0) return c.substring(name.length, c.length);
|
||
|
||
}
|
||
|
||
return "";
|
||
|
||
}
|
||
</script>
|
||
</html>
|
||
|