learn.lianglianglee.com/专栏/ElasticSearch知识体系详解/04 入门:查询和聚合的基础使用.md.html
2022-05-11 18:57:05 +08:00

1151 lines
22 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<!-- saved from url=(0046)https://kaiiiz.github.io/hexo-theme-book-demo/ -->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1.0, user-scalable=no">
<link rel="icon" href="/static/favicon.png">
<title>04 入门:查询和聚合的基础使用.md.html</title>
<!-- Spectre.css framework -->
<link rel="stylesheet" href="/static/index.css">
<!-- theme css & js -->
<meta name="generator" content="Hexo 4.2.0">
</head>
<body>
<div class="book-container">
<div class="book-sidebar">
<div class="book-brand">
<a href="/">
<img src="/static/favicon.png">
<span>技术文章摘抄</span>
</a>
</div>
<div class="book-menu uncollapsible">
<ul class="uncollapsible">
<li><a href="/" class="current-tab">首页</a></li>
</ul>
<ul class="uncollapsible">
<li><a href="../">上一级</a></li>
</ul>
<ul class="uncollapsible">
<li>
<a href="/专栏/ElasticSearch知识体系详解/01 认知ElasticSearch基础概念.md.html">01 认知ElasticSearch基础概念.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/02 认知Elastic Stack生态和场景方案.md.html">02 认知Elastic Stack生态和场景方案.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/03 安装ElasticSearch和Kibana安装.md.html">03 安装ElasticSearch和Kibana安装.md.html</a>
</li>
<li>
<a class="current-tab" href="/专栏/ElasticSearch知识体系详解/04 入门:查询和聚合的基础使用.md.html">04 入门:查询和聚合的基础使用.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/05 索引:索引管理详解.md.html">05 索引:索引管理详解.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/06 索引:索引模板(Index Template)详解.md.html">06 索引:索引模板(Index Template)详解.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/07 查询DSL查询之复合查询详解.md.html">07 查询DSL查询之复合查询详解.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/08 查询DSL查询之全文搜索详解.md.html">08 查询DSL查询之全文搜索详解.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/09 查询DSL查询之Term详解.md.html">09 查询DSL查询之Term详解.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/10 聚合聚合查询之Bucket聚合详解.md.html">10 聚合聚合查询之Bucket聚合详解.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/11 聚合聚合查询之Metric聚合详解.md.html">11 聚合聚合查询之Metric聚合详解.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/12 聚合聚合查询之Pipline聚合详解.md.html">12 聚合聚合查询之Pipline聚合详解.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/13 原理从图解构筑对ES原理的初步认知.md.html">13 原理从图解构筑对ES原理的初步认知.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/14 原理ES原理知识点补充和整体结构.md.html">14 原理ES原理知识点补充和整体结构.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/15 原理ES原理之索引文档流程详解.md.html">15 原理ES原理之索引文档流程详解.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/16 原理ES原理之读取文档流程详解.md.html">16 原理ES原理之读取文档流程详解.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/17 优化ElasticSearch性能优化详解.md.html">17 优化ElasticSearch性能优化详解.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/18 大厂实践:腾讯万亿级 Elasticsearch 技术实践.md.html">18 大厂实践:腾讯万亿级 Elasticsearch 技术实践.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/19 资料Awesome Elasticsearch.md.html">19 资料Awesome Elasticsearch.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/20 WrapperQuery.md.html">20 WrapperQuery.md.html</a>
</li>
<li>
<a href="/专栏/ElasticSearch知识体系详解/21 备份和迁移.md.html">21 备份和迁移.md.html</a>
</li>
</ul>
</div>
</div>
<div class="sidebar-toggle" onclick="sidebar_toggle()" onmouseover="add_inner()" onmouseleave="remove_inner()">
<div class="sidebar-toggle-inner"></div>
</div>
<script>
function add_inner() {
let inner = document.querySelector('.sidebar-toggle-inner')
inner.classList.add('show')
}
function remove_inner() {
let inner = document.querySelector('.sidebar-toggle-inner')
inner.classList.remove('show')
}
function sidebar_toggle() {
let sidebar_toggle = document.querySelector('.sidebar-toggle')
let sidebar = document.querySelector('.book-sidebar')
let content = document.querySelector('.off-canvas-content')
if (sidebar_toggle.classList.contains('extend')) { // show
sidebar_toggle.classList.remove('extend')
sidebar.classList.remove('hide')
content.classList.remove('extend')
} else { // hide
sidebar_toggle.classList.add('extend')
sidebar.classList.add('hide')
content.classList.add('extend')
}
}
function open_sidebar() {
let sidebar = document.querySelector('.book-sidebar')
let overlay = document.querySelector('.off-canvas-overlay')
sidebar.classList.add('show')
overlay.classList.add('show')
}
function hide_canvas() {
let sidebar = document.querySelector('.book-sidebar')
let overlay = document.querySelector('.off-canvas-overlay')
sidebar.classList.remove('show')
overlay.classList.remove('show')
}
</script>
<div class="off-canvas-content">
<div class="columns">
<div class="column col-12 col-lg-12">
<div class="book-navbar">
<!-- For Responsive Layout -->
<header class="navbar">
<section class="navbar-section">
<a onclick="open_sidebar()">
<i class="icon icon-menu"></i>
</a>
</section>
</header>
</div>
<div class="book-content" style="max-width: 960px; margin: 0 auto;
overflow-x: auto;
overflow-y: hidden;">
<div class="book-post">
<p id="tip" align="center"></p>
<div><h1>04 入门:查询和聚合的基础使用</h1>
<h2>入门:从索引文档开始</h2>
<ul>
<li>索引一个文档</li>
</ul>
<pre><code class="language-bash">PUT /customer/_doc/1
{
&quot;name&quot;: &quot;John Doe&quot;
}
</code></pre>
<p>为了方便测试我们使用kibana的dev tool来进行学习测试</p>
<p><img src="assets/es-usage-1.png" alt="img" /></p>
<p>查询刚才插入的文档</p>
<p><img src="assets/es-usage-2.png" alt="img" /></p>
<h2>学习准备:批量索引文档</h2>
<blockquote>
<p>ES 还提供了批量操作,比如这里我们可以使用批量操作来插入一些数据,供我们在后面学习使用。</p>
</blockquote>
<p>使用批量来批处理文档操作比单独提交请求要快得多,因为它减少了网络往返。</p>
<ul>
<li><strong>下载测试数据</strong></li>
</ul>
<p>数据是index为bankaccounts.json <a href="https://raw.githubusercontent.com/elastic/elasticsearch/master/docs/src/test/resources/accounts.json">下载地址 </a>如果你无法下载也可以clone ES的<a href="https://github.com/elastic/elasticsearch">官方仓库 </a>,然后进入/docs/src/test/resources/accounts.json目录获取</p>
<p>数据的格式如下</p>
<pre><code class="language-json">{
&quot;account_number&quot;: 0,
&quot;balance&quot;: 16623,
&quot;firstname&quot;: &quot;Bradshaw&quot;,
&quot;lastname&quot;: &quot;Mckenzie&quot;,
&quot;age&quot;: 29,
&quot;gender&quot;: &quot;F&quot;,
&quot;address&quot;: &quot;244 Columbus Place&quot;,
&quot;employer&quot;: &quot;Euron&quot;,
&quot;email&quot;: &quot;<a href="/cdn-cgi/l/email-protection" class="__cf_email__" data-cfemail="3b59495a5f48535a4c5658505e5541525e7b5e4e49545515585456">[email&#160;protected]</a>&quot;,
&quot;city&quot;: &quot;Hobucken&quot;,
&quot;state&quot;: &quot;CO&quot;
}
</code></pre>
<ul>
<li><strong>批量插入数据</strong></li>
</ul>
<p>将accounts.json拷贝至指定目录我这里放在<code>/opt/</code>下面,</p>
<p>然后执行</p>
<pre><code class="language-bash">curl -H &quot;Content-Type: application/json&quot; -XPOST &quot;localhost:9200/bank/_bulk?pretty&amp;refresh&quot; --data-binary &quot;@/opt/accounts.json&quot;
</code></pre>
<ul>
<li><strong>查看状态</strong></li>
</ul>
<pre><code class="language-bash">[<a href="/cdn-cgi/l/email-protection" class="__cf_email__" data-cfemail="13767f7260677a7060767261707b53455e3e233e22273e70767d677c60">[email&#160;protected]</a> root]$ curl &quot;localhost:9200/_cat/indices?v=true&quot; | grep bank
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 1524 100 1524 0 0 119k 0 --:--:-- --:--:-- --:--:-- 124k
yellow open bank yq3eSlAWRMO2Td0Sl769rQ 1 1 1000 0 379.2kb 379.2kb
[<a href="/cdn-cgi/l/email-protection" class="__cf_email__" data-cfemail="6c09000d1f18050f1f090d1e0f042c3a21415c415d58410f090218031f">[email&#160;protected]</a> root]$
</code></pre>
<h2>查询数据</h2>
<blockquote>
<p>我们通过kibana来进行查询测试。</p>
</blockquote>
<h3>查询所有</h3>
<p><code>match_all</code>表示查询所有的数据,<code>sort</code>即按照什么字段排序</p>
<pre><code class="language-bash">GET /bank/_search
{
&quot;query&quot;: { &quot;match_all&quot;: {} },
&quot;sort&quot;: [
{ &quot;account_number&quot;: &quot;asc&quot; }
]
}
</code></pre>
<p>结果</p>
<p><img src="assets/es-usage-3.png" alt="img" /></p>
<p>相关字段解释</p>
<ul>
<li><code>took</code> Elasticsearch运行查询所花费的时间以毫秒为单位</li>
<li><code>timed_out</code> –搜索请求是否超时</li>
<li><code>_shards</code> - 搜索了多少个碎片,以及成功,失败或跳过了多少个碎片的细目分类。</li>
<li><code>max_score</code> 找到的最相关文档的分数</li>
<li><code>hits.total.value</code> - 找到了多少个匹配的文档</li>
<li><code>hits.sort</code> - 文档的排序位置(不按相关性得分排序时)</li>
<li><code>hits._score</code> - 文档的相关性得分使用match_all时不适用</li>
</ul>
<h3>分页查询(from+size)</h3>
<p>本质上就是from和size两个字段</p>
<pre><code class="language-bash">GET /bank/_search
{
&quot;query&quot;: { &quot;match_all&quot;: {} },
&quot;sort&quot;: [
{ &quot;account_number&quot;: &quot;asc&quot; }
],
&quot;from&quot;: 10,
&quot;size&quot;: 10
}
</code></pre>
<p>结果</p>
<p><img src="assets/es-usage-4.png" alt="img" /></p>
<h3>指定字段查询match</h3>
<p>如果要在字段中搜索特定字词,可以使用<code>match</code>; 如下语句将查询address 字段中包含 mill 或者 lane的数据</p>
<pre><code class="language-bash">GET /bank/_search
{
&quot;query&quot;: { &quot;match&quot;: { &quot;address&quot;: &quot;mill lane&quot; } }
}
</code></pre>
<p>结果</p>
<p><img src="assets/es-usage-5.png" alt="img" /></p>
<p>由于ES底层是按照分词索引的所以上述查询结果是address 字段中包含 mill 或者 lane的数据</p>
<h3>查询段落匹配match_phrase</h3>
<p>如果我们希望查询的条件是 address字段中包含 &quot;mill lane&quot;,则可以使用<code>match_phrase</code></p>
<pre><code class="language-bash">GET /bank/_search
{
&quot;query&quot;: { &quot;match_phrase&quot;: { &quot;address&quot;: &quot;mill lane&quot; } }
}
</code></pre>
<p>结果</p>
<p><img src="assets/es-usage-6.png" alt="img" /></p>
<h3>多条件查询: bool</h3>
<p>如果要构造更复杂的查询,可以使用<code>bool</code>查询来组合多个查询条件。</p>
<p>例如以下请求在bank索引中搜索40岁客户的帐户但不包括居住在爱达荷州ID的任何人</p>
<pre><code class="language-bash">GET /bank/_search
{
&quot;query&quot;: {
&quot;bool&quot;: {
&quot;must&quot;: [
{ &quot;match&quot;: { &quot;age&quot;: &quot;40&quot; } }
],
&quot;must_not&quot;: [
{ &quot;match&quot;: { &quot;state&quot;: &quot;ID&quot; } }
]
}
}
}
</code></pre>
<p>结果</p>
<p><img src="assets/es-usage-7.png" alt="img" /></p>
<p><code>must</code>, <code>should</code>, <code>must_not</code><code>filter</code> 都是<code>bool</code>查询的子句。那么<code>filter</code>和上述<code>query</code>子句有啥区别呢?</p>
<h3>查询条件query or filter</h3>
<p>先看下如下查询, 在<code>bool</code>查询的子句中同时具备query/must 和 filter</p>
<pre><code class="language-bash">GET /bank/_search
{
&quot;query&quot;: {
&quot;bool&quot;: {
&quot;must&quot;: [
{
&quot;match&quot;: {
&quot;state&quot;: &quot;ND&quot;
}
}
],
&quot;filter&quot;: [
{
&quot;term&quot;: {
&quot;age&quot;: &quot;40&quot;
}
},
{
&quot;range&quot;: {
&quot;balance&quot;: {
&quot;gte&quot;: 20000,
&quot;lte&quot;: 30000
}
}
}
]
}
}
}
</code></pre>
<p>结果</p>
<p><img src="assets/es-usage-8.png" alt="img" /></p>
<p>两者都可以写查询条件,而且语法也类似。区别在于,<strong>query 上下文的条件是用来给文档打分的,匹配越好 _score 越高filter 的条件只产生两种结果:符合与不符合,后者被过滤掉</strong></p>
<p>所以我们进一步看只包含filter的查询</p>
<pre><code class="language-bash">GET /bank/_search
{
&quot;query&quot;: {
&quot;bool&quot;: {
&quot;filter&quot;: [
{
&quot;term&quot;: {
&quot;age&quot;: &quot;40&quot;
}
},
{
&quot;range&quot;: {
&quot;balance&quot;: {
&quot;gte&quot;: 20000,
&quot;lte&quot;: 30000
}
}
}
]
}
}
}
</code></pre>
<p>结果显然无_score</p>
<p><img src="assets/es-usage-9.png" alt="img" /></p>
<h2>聚合查询Aggregation</h2>
<blockquote>
<p>我们知道SQL中有group by在ES中它叫Aggregation即聚合运算。</p>
</blockquote>
<h3>简单聚合</h3>
<p>比如我们希望计算出account每个州的统计数量 使用<code>aggs</code>关键字对<code>state</code>字段聚合,被聚合的字段无需对分词统计,所以使用<code>state.keyword</code>对整个字段统计</p>
<pre><code class="language-bash">GET /bank/_search
{
&quot;size&quot;: 0,
&quot;aggs&quot;: {
&quot;group_by_state&quot;: {
&quot;terms&quot;: {
&quot;field&quot;: &quot;state.keyword&quot;
}
}
}
}
</code></pre>
<p>结果</p>
<p><img src="assets/es-usage-10.png" alt="img" /></p>
<p>因为无需返回条件的具体数据, 所以设置size=0返回hits为空。</p>
<p><code>doc_count</code>表示bucket中每个州的数据条数。</p>
<h3>嵌套聚合</h3>
<p>ES还可以处理个聚合条件的嵌套。</p>
<p>比如承接上个例子, 计算每个州的平均结余。涉及到的就是在对state分组的基础上嵌套计算avg(balance):</p>
<pre><code class="language-bash">GET /bank/_search
{
&quot;size&quot;: 0,
&quot;aggs&quot;: {
&quot;group_by_state&quot;: {
&quot;terms&quot;: {
&quot;field&quot;: &quot;state.keyword&quot;
},
&quot;aggs&quot;: {
&quot;average_balance&quot;: {
&quot;avg&quot;: {
&quot;field&quot;: &quot;balance&quot;
}
}
}
}
}
}
</code></pre>
<p>结果</p>
<p><img src="assets/es-usage-11.png" alt="img" /></p>
<h3>对聚合结果排序</h3>
<p>可以通过在aggs中对嵌套聚合的结果进行排序</p>
<p>比如承接上个例子, 对嵌套计算出的avg(balance)这里是average_balance进行排序</p>
<pre><code class="language-bash">GET /bank/_search
{
&quot;size&quot;: 0,
&quot;aggs&quot;: {
&quot;group_by_state&quot;: {
&quot;terms&quot;: {
&quot;field&quot;: &quot;state.keyword&quot;,
&quot;order&quot;: {
&quot;average_balance&quot;: &quot;desc&quot;
}
},
&quot;aggs&quot;: {
&quot;average_balance&quot;: {
&quot;avg&quot;: {
&quot;field&quot;: &quot;balance&quot;
}
}
}
}
}
}
</code></pre>
<p>结果</p>
<p><img src="assets/es-usage-12.png" alt="img" /></p>
</div>
</div>
<div>
<div style="float: left">
<a href="/专栏/ElasticSearch知识体系详解/03 安装ElasticSearch和Kibana安装.md.html">上一页</a>
</div>
<div style="float: right">
<a href="/专栏/ElasticSearch知识体系详解/05 索引:索引管理详解.md.html">下一页</a>
</div>
</div>
</div>
</div>
</div>
</div>
<a class="off-canvas-overlay" onclick="hide_canvas()"></a>
</div>
<script data-cfasync="false" src="/cdn-cgi/scripts/5c5dd728/cloudflare-static/email-decode.min.js"></script><script defer src="https://static.cloudflareinsights.com/beacon.min.js/v652eace1692a40cfa3763df669d7439c1639079717194" integrity="sha512-Gi7xpJR8tSkrpF7aordPZQlW2DLtzUlZcumS8dMQjwDHEnw9I7ZLyiOj/6tZStRBGtGgN6ceN6cMH8z7etPGlw==" data-cf-beacon='{"rayId":"70996f93acd53d60","version":"2021.12.0","r":1,"token":"1f5d475227ce4f0089a7cff1ab17c0f5","si":100}' crossorigin="anonymous"></script>
</body>
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-NPSEEVD756"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag() {
dataLayer.push(arguments);
}
gtag('js', new Date());
gtag('config', 'G-NPSEEVD756');
var path = window.location.pathname
var cookie = getCookie("lastPath");
console.log(path)
if (path.replace("/", "") === "") {
if (cookie.replace("/", "") !== "") {
console.log(cookie)
document.getElementById("tip").innerHTML = "<a href='" + cookie + "'>跳转到上次进度</a>"
}
} else {
setCookie("lastPath", path)
}
function setCookie(cname, cvalue) {
var d = new Date();
d.setTime(d.getTime() + (180 * 24 * 60 * 60 * 1000));
var expires = "expires=" + d.toGMTString();
document.cookie = cname + "=" + cvalue + "; " + expires + ";path = /";
}
function getCookie(cname) {
var name = cname + "=";
var ca = document.cookie.split(';');
for (var i = 0; i < ca.length; i++) {
var c = ca[i].trim();
if (c.indexOf(name) === 0) return c.substring(name.length, c.length);
}
return "";
}
</script>
</html>