mirror of
https://github.com/zhwei820/learn.lianglianglee.com.git
synced 2025-09-26 21:26:41 +08:00
787 lines
18 KiB
HTML
787 lines
18 KiB
HTML
<!DOCTYPE html>
|
||
|
||
<!-- saved from url=(0046)https://kaiiiz.github.io/hexo-theme-book-demo/ -->
|
||
|
||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||
|
||
<head>
|
||
|
||
<head>
|
||
|
||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1.0, user-scalable=no">
|
||
|
||
<link rel="icon" href="/static/favicon.png">
|
||
|
||
<title>21 RocketMQ 集群告警.md.html</title>
|
||
|
||
<!-- Spectre.css framework -->
|
||
|
||
<link rel="stylesheet" href="/static/index.css">
|
||
|
||
<!-- theme css & js -->
|
||
|
||
<meta name="generator" content="Hexo 4.2.0">
|
||
|
||
</head>
|
||
|
||
|
||
|
||
<body>
|
||
|
||
|
||
|
||
<div class="book-container">
|
||
|
||
<div class="book-sidebar">
|
||
|
||
<div class="book-brand">
|
||
|
||
<a href="/">
|
||
|
||
<img src="/static/favicon.png">
|
||
|
||
<span>技术文章摘抄</span>
|
||
|
||
</a>
|
||
|
||
</div>
|
||
|
||
<div class="book-menu uncollapsible">
|
||
|
||
<ul class="uncollapsible">
|
||
|
||
<li><a href="/" class="current-tab">首页</a></li>
|
||
|
||
</ul>
|
||
|
||
|
||
|
||
<ul class="uncollapsible">
|
||
|
||
<li><a href="../">上一级</a></li>
|
||
|
||
</ul>
|
||
|
||
|
||
|
||
<ul class="uncollapsible">
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/01 搭建学习环境准备篇.md">01 搭建学习环境准备篇.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/02 RocketMQ 核心概念扫盲篇.md">02 RocketMQ 核心概念扫盲篇.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/03 消息发送 API 详解与版本变迁说明.md">03 消息发送 API 详解与版本变迁说明.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/04 结合实际应用场景谈消息发送.md">04 结合实际应用场景谈消息发送.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/05 消息发送核心参数与工作原理详解.md">05 消息发送核心参数与工作原理详解.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/06 消息发送常见错误与解决方案.md">06 消息发送常见错误与解决方案.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/07 事务消息使用及方案选型思考.md">07 事务消息使用及方案选型思考.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/08 消息消费 API 与版本变迁说明.md">08 消息消费 API 与版本变迁说明.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/09 DefaultMQPushConsumer 核心参数与工作原理.md">09 DefaultMQPushConsumer 核心参数与工作原理.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/10 DefaultMQPushConsumer 使用示例与注意事项.md">10 DefaultMQPushConsumer 使用示例与注意事项.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/11 DefaultLitePullConsumer 核心参数与实战.md">11 DefaultLitePullConsumer 核心参数与实战.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/12 结合实际场景再聊 DefaultLitePullConsumer 的使用.md">12 结合实际场景再聊 DefaultLitePullConsumer 的使用.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/13 结合实际场景顺序消费、消息过滤实战.md">13 结合实际场景顺序消费、消息过滤实战.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/14 消息消费积压问题排查实战.md">14 消息消费积压问题排查实战.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/15 RocketMQ 常用命令实战.md">15 RocketMQ 常用命令实战.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/16 RocketMQ 集群性能摸高.md">16 RocketMQ 集群性能摸高.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/17 RocketMQ 集群性能调优.md">17 RocketMQ 集群性能调优.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/18 RocketMQ 集群平滑运维.md">18 RocketMQ 集群平滑运维.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/19 RocketMQ 集群监控(一).md">19 RocketMQ 集群监控(一).md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/20 RocketMQ 集群监控(二).md">20 RocketMQ 集群监控(二).md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
<a class="current-tab" href="/专栏/RocketMQ 实战与进阶(完)/21 RocketMQ 集群告警.md">21 RocketMQ 集群告警.md.html</a>
|
||
|
||
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/22 RocketMQ 集群踩坑记.md">22 RocketMQ 集群踩坑记.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/23 消息轨迹、ACL 与多副本搭建.md">23 消息轨迹、ACL 与多副本搭建.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/24 RocketMQ-Console 常用页面指标获取逻辑.md">24 RocketMQ-Console 常用页面指标获取逻辑.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/25 RocketMQ Nameserver 背后的设计理念.md">25 RocketMQ Nameserver 背后的设计理念.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/26 Java 并发编程实战.md">26 Java 并发编程实战.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/27 从 RocketMQ 学基于文件的编程模式(一).md">27 从 RocketMQ 学基于文件的编程模式(一).md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/28 从 RocketMQ 学基于文件的编程模式(二).md">28 从 RocketMQ 学基于文件的编程模式(二).md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/29 从 RocketMQ 学 Netty 网络编程技巧.md">29 从 RocketMQ 学 Netty 网络编程技巧.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/30 RocketMQ 学习方法之我见.md">30 RocketMQ 学习方法之我见.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
|
||
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
<div class="sidebar-toggle" onclick="sidebar_toggle()" onmouseover="add_inner()" onmouseleave="remove_inner()">
|
||
|
||
<div class="sidebar-toggle-inner"></div>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
<script>
|
||
|
||
function add_inner() {
|
||
|
||
let inner = document.querySelector('.sidebar-toggle-inner')
|
||
|
||
inner.classList.add('show')
|
||
|
||
}
|
||
|
||
|
||
|
||
function remove_inner() {
|
||
|
||
let inner = document.querySelector('.sidebar-toggle-inner')
|
||
|
||
inner.classList.remove('show')
|
||
|
||
}
|
||
|
||
|
||
|
||
function sidebar_toggle() {
|
||
|
||
let sidebar_toggle = document.querySelector('.sidebar-toggle')
|
||
|
||
let sidebar = document.querySelector('.book-sidebar')
|
||
|
||
let content = document.querySelector('.off-canvas-content')
|
||
|
||
if (sidebar_toggle.classList.contains('extend')) { // show
|
||
|
||
sidebar_toggle.classList.remove('extend')
|
||
|
||
sidebar.classList.remove('hide')
|
||
|
||
content.classList.remove('extend')
|
||
|
||
} else { // hide
|
||
|
||
sidebar_toggle.classList.add('extend')
|
||
|
||
sidebar.classList.add('hide')
|
||
|
||
content.classList.add('extend')
|
||
|
||
}
|
||
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
function open_sidebar() {
|
||
|
||
let sidebar = document.querySelector('.book-sidebar')
|
||
|
||
let overlay = document.querySelector('.off-canvas-overlay')
|
||
|
||
sidebar.classList.add('show')
|
||
|
||
overlay.classList.add('show')
|
||
|
||
}
|
||
|
||
function hide_canvas() {
|
||
|
||
let sidebar = document.querySelector('.book-sidebar')
|
||
|
||
let overlay = document.querySelector('.off-canvas-overlay')
|
||
|
||
sidebar.classList.remove('show')
|
||
|
||
overlay.classList.remove('show')
|
||
|
||
}
|
||
|
||
|
||
|
||
</script>
|
||
|
||
|
||
|
||
<div class="off-canvas-content">
|
||
|
||
<div class="columns">
|
||
|
||
<div class="column col-12 col-lg-12">
|
||
|
||
<div class="book-navbar">
|
||
|
||
<!-- For Responsive Layout -->
|
||
|
||
<header class="navbar">
|
||
|
||
<section class="navbar-section">
|
||
|
||
<a onclick="open_sidebar()">
|
||
|
||
<i class="icon icon-menu"></i>
|
||
|
||
</a>
|
||
|
||
</section>
|
||
|
||
</header>
|
||
|
||
</div>
|
||
|
||
<div class="book-content" style="max-width: 960px; margin: 0 auto;
|
||
|
||
overflow-x: auto;
|
||
|
||
overflow-y: hidden;">
|
||
|
||
<div class="book-post">
|
||
|
||
<p id="tip" align="center"></p>
|
||
|
||
<div><h1>21 RocketMQ 集群告警</h1>
|
||
|
||
<h3>前言</h3>
|
||
|
||
<p>对集群健康状况、使用主题、消费组资源的巡检,发现达到阈值则发送告警信息给管理员或者资源申请者。监控是告警的基础,告警的巡检基于前面两篇文章中监控采集到的数据。</p>
|
||
|
||
<p>告警的重要性不必过多地赘述,RocketMQ 集群往往承载着公司核心业务流转。如果集群不可用往往影响是全公司的业务,事故责任是公司最高级别的。</p>
|
||
|
||
<p>本文从告警项的设计、告警流程、告警实战给出指导建议,在实践中以此为思路扩展完善,实现自己公司的定制化告警。</p>
|
||
|
||
<h3>告警项设计</h3>
|
||
|
||
<p>下图分别从主题、消费组、集群维度罗列了比较重要的告警项以及触发条件包括哪些方面。</p>
|
||
|
||
<p><img src="assets/20200905163231215.png" alt="img" /></p>
|
||
|
||
<h4><strong>触发条件</strong></h4>
|
||
|
||
<ul>
|
||
|
||
<li>触发阈值:超过某个特定的数值,例如:消费积压超过 10 万。</li>
|
||
|
||
<li>时间间隔:间隔多久检测,例如:5 分钟内消费积压超过 10 万。</li>
|
||
|
||
<li>触发次数:在时间间隔内满足阈值的次数,例如:5 分钟内消费积压超过 10 万,触发了 3 次。</li>
|
||
|
||
<li>告警时间段:收到告警通知的时间范围,例如:在 9:00-22:00 之间收到告警信息。</li>
|
||
|
||
</ul>
|
||
|
||
<h4><strong>主题告警</strong></h4>
|
||
|
||
<p>发送速度:当发送速度满足触发条件设定的阈值时发送告警信息。</p>
|
||
|
||
<p>例如:5 分钟内当发送速度小于阈值 10,触发 1 次,在 00:00-23:59 触发告警信息。</p>
|
||
|
||
<h4><strong>消费告警</strong></h4>
|
||
|
||
<p>消费速度:当消费速度满足触发条件设定的阈值时发送告警信息。</p>
|
||
|
||
<p>例如:5 分钟内当消费速度小于阈值 5000,触发 1 次,在 00:00-23:59 触发告警信息。</p>
|
||
|
||
<p>消费积压:当消费积压值满足触发条件设定的阈值时发送告警信息。</p>
|
||
|
||
<p>例如:5 分钟内当消费积压大于阈值 100000,触发 1 次,在 00:00-23:59 触发告警信息。</p>
|
||
|
||
<h4><strong>集群告警</strong></h4>
|
||
|
||
<p>集群节点数量:当集群节点数量满足触发条件设定的阈值时触发告警。</p>
|
||
|
||
<p>例如:5 分钟内当集群节点数量小于阈值 4,触发 1 次,在 00:00-23:59 触发告警信息。</p>
|
||
|
||
<p>集群响应时间:当集群节点发送的 RT 满足触发条件的阈值时触发告警。</p>
|
||
|
||
<p>例如:5 分钟内当节点发送的响应时间大于 1 秒,触发 1 次,在 00:00-23:59 触发告警信息。</p>
|
||
|
||
<p>集群写入 TPS:当集群写入 TPS 满足触发条件设定的阈值时触发告警。</p>
|
||
|
||
<p>例如:5 分钟内当集群写入 TPS 大于 40000,触发 1 次,在 00:00-23:59 触发告警信息。</p>
|
||
|
||
<p>集群节点可用性:当集群节点心跳检测结果满足触发条件设定的阈值时触发告警。</p>
|
||
|
||
<p>例如:5 分钟内当节点心跳检测结果大于 0(表示失败),触发 1 次,在 00:00-23:59 触发告警信息。</p>
|
||
|
||
<p>集群写入变化率:当集群写入 TPS 变化率满足触发条件设定的阈值时触发告警。</p>
|
||
|
||
<p>例如:5 分钟内当集群写入变化率大于 100%,触发 1 次,在 00:00-23:59 触发告警信息。</p>
|
||
|
||
<h4>告警开发实战</h4>
|
||
|
||
<h4><strong>告警流程</strong></h4>
|
||
|
||
<p>定时任务巡检:可以使用公司的调度平台或者自己写调度线程 ScheduledExecutorService,调度的频率可以根据不同的指标分成不同的调度任务,例如:集群告警可以采取秒级探测、对于主题和消费组的告警可以采用分钟级探测。</p>
|
||
|
||
<p>检索监控数据:数据来自于前面两节中存储的监控数据,例如:存储到了时序数据库 InfluxDB 中。</p>
|
||
|
||
<p>发送告警信息:此处可以发送到公司的统一告警系统,也可以发送到钉钉、邮箱、短信等。</p>
|
||
|
||
<p><img src="assets/20200905163427806.png" alt="img" /></p>
|
||
|
||
<h4><strong>主题/消费动态 SQL</strong></h4>
|
||
|
||
<p>我们可以通过在界面上配置不同的告警规则生成不同的检索语句,在定时调度时使用生成的语句。</p>
|
||
|
||
<p><img src="assets/202009051635346.png" alt="img" /></p>
|
||
|
||
<p>通过类似上面图示中对主题和消费组的选择,动态生成 SQL 语句,例如:当选择以下动态规则参数时,集群名称 demo_cluster、消费组名称 demo_consumer、类型 consumer、指标积压、大于、阈值 1000000、间隔 5 分钟、次数 1 次、告警开始时间 00:00、告警结束时间 23:59 时生成以下语句。</p>
|
||
|
||
<pre><code>select Count(value) FROM "consumer_monitor_info" WHERE "clusterName" =
|
||
|
||
</code></pre>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
<div>
|
||
|
||
<div style="float: left">
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/20 RocketMQ 集群监控(二).md">上一页</a>
|
||
|
||
</div>
|
||
|
||
<div style="float: right">
|
||
|
||
<a href="/专栏/RocketMQ 实战与进阶(完)/22 RocketMQ 集群踩坑记.md">下一页</a>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
<a class="off-canvas-overlay" onclick="hide_canvas()"></a>
|
||
|
||
</div>
|
||
|
||
<script defer src="https://static.cloudflareinsights.com/beacon.min.js/v652eace1692a40cfa3763df669d7439c1639079717194" integrity="sha512-Gi7xpJR8tSkrpF7aordPZQlW2DLtzUlZcumS8dMQjwDHEnw9I7ZLyiOj/6tZStRBGtGgN6ceN6cMH8z7etPGlw==" data-cf-beacon='{"rayId":"70997441caed3d60","version":"2021.12.0","r":1,"token":"1f5d475227ce4f0089a7cff1ab17c0f5","si":100}' crossorigin="anonymous"></script>
|
||
|
||
</body>
|
||
|
||
<!-- Global site tag (gtag.js) - Google Analytics -->
|
||
|
||
<script async src="https://www.googletagmanager.com/gtag/js?id=G-NPSEEVD756"></script>
|
||
|
||
<script>
|
||
|
||
window.dataLayer = window.dataLayer || [];
|
||
|
||
|
||
|
||
function gtag() {
|
||
|
||
dataLayer.push(arguments);
|
||
|
||
}
|
||
|
||
|
||
|
||
gtag('js', new Date());
|
||
|
||
gtag('config', 'G-NPSEEVD756');
|
||
|
||
var path = window.location.pathname
|
||
|
||
var cookie = getCookie("lastPath");
|
||
|
||
console.log(path)
|
||
|
||
if (path.replace("/", "") === "") {
|
||
|
||
if (cookie.replace("/", "") !== "") {
|
||
|
||
console.log(cookie)
|
||
|
||
document.getElementById("tip").innerHTML = "<a href='" + cookie + "'>跳转到上次进度</a>"
|
||
|
||
}
|
||
|
||
} else {
|
||
|
||
setCookie("lastPath", path)
|
||
|
||
}
|
||
|
||
|
||
|
||
function setCookie(cname, cvalue) {
|
||
|
||
var d = new Date();
|
||
|
||
d.setTime(d.getTime() + (180 * 24 * 60 * 60 * 1000));
|
||
|
||
var expires = "expires=" + d.toGMTString();
|
||
|
||
document.cookie = cname + "=" + cvalue + "; " + expires + ";path = /";
|
||
|
||
}
|
||
|
||
|
||
|
||
function getCookie(cname) {
|
||
|
||
var name = cname + "=";
|
||
|
||
var ca = document.cookie.split(';');
|
||
|
||
for (var i = 0; i < ca.length; i++) {
|
||
|
||
var c = ca[i].trim();
|
||
|
||
if (c.indexOf(name) === 0) return c.substring(name.length, c.length);
|
||
|
||
}
|
||
|
||
return "";
|
||
|
||
}
|
||
|
||
|
||
|
||
</script>
|
||
|
||
|
||
|
||
</html>
|
||
|