mirror of
https://github.com/zhwei820/learn.lianglianglee.com.git
synced 2025-09-25 04:36:41 +08:00
1649 lines
28 KiB
HTML
1649 lines
28 KiB
HTML
<!DOCTYPE html>
|
||
|
||
<!-- saved from url=(0046)https://kaiiiz.github.io/hexo-theme-book-demo/ -->
|
||
|
||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||
|
||
<head>
|
||
|
||
<head>
|
||
|
||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1.0, user-scalable=no">
|
||
|
||
<link rel="icon" href="/static/favicon.png">
|
||
|
||
<title>04 非结构存储:用好 JSON 这张牌.md.html</title>
|
||
|
||
<!-- Spectre.css framework -->
|
||
|
||
<link rel="stylesheet" href="/static/index.css">
|
||
|
||
<!-- theme css & js -->
|
||
|
||
<meta name="generator" content="Hexo 4.2.0">
|
||
|
||
</head>
|
||
|
||
|
||
|
||
<body>
|
||
|
||
|
||
|
||
<div class="book-container">
|
||
|
||
<div class="book-sidebar">
|
||
|
||
<div class="book-brand">
|
||
|
||
<a href="/">
|
||
|
||
<img src="/static/favicon.png">
|
||
|
||
<span>技术文章摘抄</span>
|
||
|
||
</a>
|
||
|
||
</div>
|
||
|
||
<div class="book-menu uncollapsible">
|
||
|
||
<ul class="uncollapsible">
|
||
|
||
<li><a href="/" class="current-tab">首页</a></li>
|
||
|
||
</ul>
|
||
|
||
|
||
|
||
<ul class="uncollapsible">
|
||
|
||
<li><a href="../">上一级</a></li>
|
||
|
||
</ul>
|
||
|
||
|
||
|
||
<ul class="uncollapsible">
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/00 开篇词 从业务出发,开启海量 MySQL 架构设计.md">00 开篇词 从业务出发,开启海量 MySQL 架构设计.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/01 数字类型:避免自增踩坑.md">01 数字类型:避免自增踩坑.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/02 字符串类型:不能忽略的 COLLATION.md">02 字符串类型:不能忽略的 COLLATION.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/03 日期类型:TIMESTAMP 可能是巨坑.md">03 日期类型:TIMESTAMP 可能是巨坑.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
<a class="current-tab" href="/专栏/MySQL实战宝典/04 非结构存储:用好 JSON 这张牌.md">04 非结构存储:用好 JSON 这张牌.md.html</a>
|
||
|
||
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/05 表结构设计:忘记范式准则.md">05 表结构设计:忘记范式准则.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/06 表压缩:不仅仅是空间压缩.md">06 表压缩:不仅仅是空间压缩.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/07 表的访问设计:你该选择 SQL 还是 NoSQL?.md">07 表的访问设计:你该选择 SQL 还是 NoSQL?.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/08 索引:排序的艺术.md">08 索引:排序的艺术.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/09 索引组织表:万物皆索引.md">09 索引组织表:万物皆索引.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/10 组合索引:用好,性能提升 10 倍!.md">10 组合索引:用好,性能提升 10 倍!.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/11 索引出错:请理解 CBO 的工作原理.md">11 索引出错:请理解 CBO 的工作原理.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/12 JOIN 连接:到底能不能写 JOIN?.md">12 JOIN 连接:到底能不能写 JOIN?.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/13 子查询:放心地使用子查询功能吧!.md">13 子查询:放心地使用子查询功能吧!.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/14 分区表:哪些场景我不建议用分区表?.md">14 分区表:哪些场景我不建议用分区表?.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/15 MySQL 复制:最简单也最容易配置出错.md">15 MySQL 复制:最简单也最容易配置出错.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/16 读写分离设计:复制延迟?其实是你用错了.md">16 读写分离设计:复制延迟?其实是你用错了.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/17 高可用设计:你怎么活用三大架构方案?.md">17 高可用设计:你怎么活用三大架构方案?.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/18 金融级高可用架构:必不可少的数据核对.md">18 金融级高可用架构:必不可少的数据核对.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/19 高可用套件:选择这么多,你该如何选?.md">19 高可用套件:选择这么多,你该如何选?.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/20 InnoDB Cluster:改变历史的新产品.md">20 InnoDB Cluster:改变历史的新产品.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/21 数据库备份:备份文件也要检查!.md">21 数据库备份:备份文件也要检查!.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/22 分布式数据库架构:彻底理解什么叫分布式数据库.md">22 分布式数据库架构:彻底理解什么叫分布式数据库.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/23 分布式数据库表结构设计:如何正确地将数据分片?.md">23 分布式数据库表结构设计:如何正确地将数据分片?.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/24 分布式数据库索引设计:二级索引、全局索引的最佳设计实践.md">24 分布式数据库索引设计:二级索引、全局索引的最佳设计实践.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/25 分布式数据库架构选型:分库分表 or 中间件 ?.md">25 分布式数据库架构选型:分库分表 or 中间件 ?.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/26 分布式设计之禅:全链路的条带化设计.md">26 分布式设计之禅:全链路的条带化设计.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
<li>
|
||
|
||
|
||
|
||
|
||
|
||
<a href="/专栏/MySQL实战宝典/27 分布式事务:我们到底要不要使用 2PC?.md">27 分布式事务:我们到底要不要使用 2PC?.md.html</a>
|
||
|
||
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
|
||
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
<div class="sidebar-toggle" onclick="sidebar_toggle()" onmouseover="add_inner()" onmouseleave="remove_inner()">
|
||
|
||
<div class="sidebar-toggle-inner"></div>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
<script>
|
||
|
||
function add_inner() {
|
||
|
||
let inner = document.querySelector('.sidebar-toggle-inner')
|
||
|
||
inner.classList.add('show')
|
||
|
||
}
|
||
|
||
|
||
|
||
function remove_inner() {
|
||
|
||
let inner = document.querySelector('.sidebar-toggle-inner')
|
||
|
||
inner.classList.remove('show')
|
||
|
||
}
|
||
|
||
|
||
|
||
function sidebar_toggle() {
|
||
|
||
let sidebar_toggle = document.querySelector('.sidebar-toggle')
|
||
|
||
let sidebar = document.querySelector('.book-sidebar')
|
||
|
||
let content = document.querySelector('.off-canvas-content')
|
||
|
||
if (sidebar_toggle.classList.contains('extend')) { // show
|
||
|
||
sidebar_toggle.classList.remove('extend')
|
||
|
||
sidebar.classList.remove('hide')
|
||
|
||
content.classList.remove('extend')
|
||
|
||
} else { // hide
|
||
|
||
sidebar_toggle.classList.add('extend')
|
||
|
||
sidebar.classList.add('hide')
|
||
|
||
content.classList.add('extend')
|
||
|
||
}
|
||
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
function open_sidebar() {
|
||
|
||
let sidebar = document.querySelector('.book-sidebar')
|
||
|
||
let overlay = document.querySelector('.off-canvas-overlay')
|
||
|
||
sidebar.classList.add('show')
|
||
|
||
overlay.classList.add('show')
|
||
|
||
}
|
||
|
||
function hide_canvas() {
|
||
|
||
let sidebar = document.querySelector('.book-sidebar')
|
||
|
||
let overlay = document.querySelector('.off-canvas-overlay')
|
||
|
||
sidebar.classList.remove('show')
|
||
|
||
overlay.classList.remove('show')
|
||
|
||
}
|
||
|
||
|
||
|
||
</script>
|
||
|
||
|
||
|
||
<div class="off-canvas-content">
|
||
|
||
<div class="columns">
|
||
|
||
<div class="column col-12 col-lg-12">
|
||
|
||
<div class="book-navbar">
|
||
|
||
<!-- For Responsive Layout -->
|
||
|
||
<header class="navbar">
|
||
|
||
<section class="navbar-section">
|
||
|
||
<a onclick="open_sidebar()">
|
||
|
||
<i class="icon icon-menu"></i>
|
||
|
||
</a>
|
||
|
||
</section>
|
||
|
||
</header>
|
||
|
||
</div>
|
||
|
||
<div class="book-content" style="max-width: 960px; margin: 0 auto;
|
||
|
||
overflow-x: auto;
|
||
|
||
overflow-y: hidden;">
|
||
|
||
<div class="book-post">
|
||
|
||
<p id="tip" align="center"></p>
|
||
|
||
<div><h1>04 非结构存储:用好 JSON 这张牌</h1>
|
||
|
||
<p>前面几讲,我已经带你了解了 MySQL 数据库中常见的 3 种类型:数字类型、字符串类型和日期类型。然而,它们都属于传统关系型设计的范畴。</p>
|
||
|
||
<p>关系型的结构化存储存在一定的弊端,因为它需要预先定义好所有的列以及列对应的类型。但是业务在发展过程中,或许需要扩展单个列的描述功能,这时,如果能用好 JSON 数据类型,那就能打通关系型和非关系型数据的存储之间的界限,为业务提供更好的架构选择。</p>
|
||
|
||
<p>当然,很多同学在用 JSON 数据类型时会遇到各种各样的问题,<strong>其中最容易犯的误区就是将类型 JSON 简单理解成字符串类型。</strong> 但当你学完今天的内容之后,会真正认识到 JSON 数据类型的威力,从而在实际工作中更好地存储非结构化的数据。</p>
|
||
|
||
<h3>JSON 数据类型</h3>
|
||
|
||
<p>JSON(JavaScript Object Notation)主要用于互联网应用服务之间的数据交换。MySQL 支持<a href="https://tools.ietf.org/html/rfc7159?fileGuid=xxQTRXtVcqtHK6j8">RFC 7159</a>定义的 JSON 规范,主要有<strong>JSON 对象</strong>和<strong>JSON 数组</strong>两种类型。下面就是 JSON 对象,主要用来存储图片的相关信息:</p>
|
||
|
||
<pre><code>{
|
||
|
||
|
||
|
||
"Image": {
|
||
|
||
|
||
|
||
"Width": 800,
|
||
|
||
|
||
|
||
"Height": 600,
|
||
|
||
|
||
|
||
"Title": "View from 15th Floor",
|
||
|
||
|
||
|
||
"Thumbnail": {
|
||
|
||
|
||
|
||
"Url": "http://www.example.com/image/481989943",
|
||
|
||
|
||
|
||
"Height": 125,
|
||
|
||
|
||
|
||
"Width": 100
|
||
|
||
|
||
|
||
},
|
||
|
||
|
||
|
||
"IDs": [116, 943, 234, 38793]
|
||
|
||
|
||
|
||
}
|
||
|
||
|
||
|
||
}
|
||
|
||
</code></pre>
|
||
|
||
<p>从中你可以看到, JSON 类型可以很好地描述数据的相关内容,比如这张图片的宽度、高度、标题等(这里使用到的类型有整型、字符串类型)。</p>
|
||
|
||
<p>JSON对象除了支持字符串、整型、日期类型,JSON 内嵌的字段也支持数组类型,如上代码中的 IDs 字段。</p>
|
||
|
||
<p>另一种 JSON 数据类型是数组类型,如:</p>
|
||
|
||
<pre><code>[
|
||
|
||
|
||
|
||
{
|
||
|
||
|
||
|
||
"precision": "zip",
|
||
|
||
|
||
|
||
"Latitude": 37.7668,
|
||
|
||
|
||
|
||
"Longitude": -122.3959,
|
||
|
||
|
||
|
||
"Address": "",
|
||
|
||
|
||
|
||
"City": "SAN FRANCISCO",
|
||
|
||
|
||
|
||
"State": "CA",
|
||
|
||
|
||
|
||
"Zip": "94107",
|
||
|
||
|
||
|
||
"Country": "US"
|
||
|
||
|
||
|
||
},
|
||
|
||
|
||
|
||
{
|
||
|
||
|
||
|
||
"precision": "zip",
|
||
|
||
|
||
|
||
"Latitude": 37.371991,
|
||
|
||
|
||
|
||
"Longitude": -122.026020,
|
||
|
||
|
||
|
||
"Address": "",
|
||
|
||
|
||
|
||
"City": "SUNNYVALE",
|
||
|
||
|
||
|
||
"State": "CA",
|
||
|
||
|
||
|
||
"Zip": "94085",
|
||
|
||
|
||
|
||
"Country": "US"
|
||
|
||
|
||
|
||
}
|
||
|
||
|
||
|
||
]
|
||
|
||
</code></pre>
|
||
|
||
<p>上面的示例演示的是一个 JSON 数组,其中有 2 个 JSON 对象。</p>
|
||
|
||
<p>到目前为止,可能很多同学会把 JSON 当作一个很大的字段串类型,从表面上来看,没有错。但本质上,JSON 是一种新的类型,有自己的存储格式,还能在每个对应的字段上创建索引,做特定的优化,这是传统字段串无法实现的。JSON 类型的另一个好处是<strong>无须预定义字段</strong>,字段可以无限扩展。而传统关系型数据库的列都需预先定义,想要扩展需要执行 ALTER TABLE ... ADD COLUMN ... 这样比较重的操作。</p>
|
||
|
||
<p>需要注意是,JSON 类型是从 MySQL 5.7 版本开始支持的功能,而 8.0 版本解决了更新 JSON 的日志性能瓶颈。如果要在生产环境中使用 JSON 数据类型,强烈推荐使用 MySQL 8.0 版本。</p>
|
||
|
||
<p>讲到这儿,你已经对 JSON 类型的基本概念有所了解了,接下来,我们进入实战环节:如何在业务中用好JSON类型?</p>
|
||
|
||
<h3>业务表结构设计实战</h3>
|
||
|
||
<h4>用户登录设计</h4>
|
||
|
||
<p>在数据库中,<strong>JSON 类型比较适合存储一些修改较少、相对静态的数据</strong>,比如用户登录信息的存储如下:</p>
|
||
|
||
<pre><code>DROP TABLE IF EXISTS UserLogin;
|
||
|
||
|
||
|
||
CREATE TABLE UserLogin (
|
||
|
||
|
||
|
||
userId BIGINT NOT NULL,
|
||
|
||
|
||
|
||
loginInfo JSON,
|
||
|
||
|
||
|
||
PRIMARY KEY(userId)
|
||
|
||
|
||
|
||
);
|
||
|
||
</code></pre>
|
||
|
||
<p>由于当前业务的登录方式越来越多样化,如同一账户支持手机、微信、QQ 账号登录,所以这里可以用 JSON 类型存储登录的信息。</p>
|
||
|
||
<p>接着,插入下面的数据:</p>
|
||
|
||
<pre><code>SET @a = '
|
||
|
||
|
||
|
||
{
|
||
|
||
|
||
|
||
"cellphone" : "13918888888",
|
||
|
||
|
||
|
||
"wxchat" : "破产码农",
|
||
|
||
|
||
|
||
"QQ" : "82946772"
|
||
|
||
|
||
|
||
}
|
||
|
||
|
||
|
||
';
|
||
|
||
|
||
|
||
INSERT INTO UserLogin VALUES (1,@a);
|
||
|
||
|
||
|
||
SET @b = '
|
||
|
||
|
||
|
||
{
|
||
|
||
|
||
|
||
"cellphone" : "15026888888"
|
||
|
||
|
||
|
||
}
|
||
|
||
|
||
|
||
';
|
||
|
||
|
||
|
||
INSERT INTO UserLogin VALUES (2,@b);
|
||
|
||
</code></pre>
|
||
|
||
<p>从上面的例子中可以看到,用户 1 登录有三种方式:手机验证码登录、微信登录、QQ 登录,而用户 2 只有手机验证码登录。</p>
|
||
|
||
<p>而如果不采用 JSON 数据类型,就要用下面的方式建表:</p>
|
||
|
||
<pre><code>CREATE TABLE UserLogin (
|
||
|
||
|
||
|
||
userId BIGINT NOT NULL,
|
||
|
||
|
||
|
||
cellphone VARCHAR(255),
|
||
|
||
|
||
|
||
wechat VARCHAR(255)
|
||
|
||
|
||
|
||
QQ VARCHAR(255),
|
||
|
||
|
||
|
||
PRIMARY KEY(userId)
|
||
|
||
|
||
|
||
);
|
||
|
||
</code></pre>
|
||
|
||
<p>可以看到,虽然用传统关系型的方式也可以完成相关数据的存储,但是存在两个问题。</p>
|
||
|
||
<ul>
|
||
|
||
<li>有些列可能是比较稀疏的,一些列可能大部分都是 NULL 值;</li>
|
||
|
||
<li>如果要新增一种登录类型,如微博登录,则需要添加新列,而 JSON 类型无此烦恼。</li>
|
||
|
||
</ul>
|
||
|
||
<p>因为支持了新的JSON类型,MySQL 配套提供了丰富的 JSON 字段处理函数,用于方便地操作 JSON 数据,具体可以见 MySQL 官方文档。</p>
|
||
|
||
<p>其中,最常见的就是函数 JSON_EXTRACT,它用来从 JSON 数据中提取所需要的字段内容,如下面的这条 SQL 语句就查询用户的手机和微信信息。</p>
|
||
|
||
<pre><code>SELECT
|
||
|
||
|
||
|
||
userId,
|
||
|
||
|
||
|
||
JSON_UNQUOTE(JSON_EXTRACT(loginInfo,"$.cellphone")) cellphone,
|
||
|
||
|
||
|
||
JSON_UNQUOTE(JSON_EXTRACT(loginInfo,"$.wxchat")) wxchat
|
||
|
||
|
||
|
||
FROM UserLogin;
|
||
|
||
|
||
|
||
+--------+-------------+--------------+
|
||
|
||
|
||
|
||
| userId | cellphone | wxchat |
|
||
|
||
|
||
|
||
+--------+-------------+--------------+
|
||
|
||
|
||
|
||
| 1 | 13918888888 | 破产码农 |
|
||
|
||
|
||
|
||
| 2 | 15026888888 | NULL |
|
||
|
||
|
||
|
||
+--------+-------------+--------------+
|
||
|
||
|
||
|
||
2 rows in set (0.01 sec)
|
||
|
||
</code></pre>
|
||
|
||
<p>当然了,每次写 JSON_EXTRACT、JSON_UNQUOTE 非常麻烦,MySQL 还提供了 ->> 表达式,和上述 SQL 效果完全一样:</p>
|
||
|
||
<pre><code>SELECT
|
||
|
||
|
||
|
||
userId,
|
||
|
||
|
||
|
||
loginInfo->>"$.cellphone" cellphone,
|
||
|
||
|
||
|
||
loginInfo->>"$.wxchat" wxchat
|
||
|
||
|
||
|
||
FROM UserLogin;
|
||
|
||
</code></pre>
|
||
|
||
<p>当 JSON 数据量非常大,用户希望对 JSON 数据进行有效检索时,可以利用 MySQL 的<strong>函数索引</strong>功能对 JSON 中的某个字段进行索引。</p>
|
||
|
||
<p>比如在上面的用户登录示例中,假设用户必须绑定唯一手机号,且希望未来能用手机号码进行用户检索时,可以创建下面的索引:</p>
|
||
|
||
<pre><code>ALTER TABLE UserLogin ADD COLUMN cellphone VARCHAR(255) AS (loginInfo->>"$.cellphone");
|
||
|
||
|
||
|
||
ALTER TABLE UserLogin ADD UNIQUE INDEX idx_cellphone(cellphone);
|
||
|
||
</code></pre>
|
||
|
||
<p>上述 SQL 首先创建了一个虚拟列 cellphone,这个列是由函数 loginInfo->>"$.cellphone" 计算得到的。然后在这个虚拟列上创建一个唯一索引 idx_cellphone。这时再通过虚拟列 cellphone 进行查询,就可以看到优化器会使用到新创建的 idx_cellphone 索引:</p>
|
||
|
||
<pre><code>EXPLAIN SELECT * FROM UserLogin
|
||
|
||
|
||
|
||
WHERE cellphone = '13918888888'\G
|
||
|
||
|
||
|
||
*************************** 1. row ***************************
|
||
|
||
|
||
|
||
id: 1
|
||
|
||
|
||
|
||
select_type: SIMPLE
|
||
|
||
|
||
|
||
table: UserLogin
|
||
|
||
|
||
|
||
partitions: NULL
|
||
|
||
|
||
|
||
type: const
|
||
|
||
|
||
|
||
possible_keys: idx_cellphone
|
||
|
||
|
||
|
||
key: idx_cellphone
|
||
|
||
|
||
|
||
key_len: 1023
|
||
|
||
|
||
|
||
ref: const
|
||
|
||
|
||
|
||
rows: 1
|
||
|
||
|
||
|
||
filtered: 100.00
|
||
|
||
|
||
|
||
Extra: NULL
|
||
|
||
|
||
|
||
1 row in set, 1 warning (0.00 sec)
|
||
|
||
</code></pre>
|
||
|
||
<p>当然,我们可以在一开始创建表的时候,就完成虚拟列及函数索引的创建。如下表创建的列 cellphone 对应的就是 JSON 中的内容,是个虚拟列;uk_idx_cellphone 就是在虚拟列 cellphone 上所创建的索引。</p>
|
||
|
||
<pre><code>CREATE TABLE UserLogin (
|
||
|
||
|
||
|
||
userId BIGINT,
|
||
|
||
|
||
|
||
loginInfo JSON,
|
||
|
||
|
||
|
||
cellphone VARCHAR(255) AS (loginInfo->>"$.cellphone"),
|
||
|
||
|
||
|
||
PRIMARY KEY(userId),
|
||
|
||
|
||
|
||
UNIQUE KEY uk_idx_cellphone(cellphone)
|
||
|
||
|
||
|
||
);
|
||
|
||
</code></pre>
|
||
|
||
<h4>用户画像设计</h4>
|
||
|
||
<p>某些业务需要做用户画像(也就是对用户打标签),然后根据用户的标签,通过数据挖掘技术,进行相应的产品推荐。比如:</p>
|
||
|
||
<ul>
|
||
|
||
<li>在电商行业中,根据用户的穿搭喜好,推荐相应的商品;</li>
|
||
|
||
<li>在音乐行业中,根据用户喜欢的音乐风格和常听的歌手,推荐相应的歌曲;</li>
|
||
|
||
<li>在金融行业,根据用户的风险喜好和投资经验,推荐相应的理财产品。</li>
|
||
|
||
</ul>
|
||
|
||
<p>在这,我强烈推荐你用 JSON 类型在数据库中存储用户画像信息,并结合 JSON 数组类型和多值索引的特点进行高效查询。假设有张画像定义表:</p>
|
||
|
||
<pre><code>CREATE TABLE Tags (
|
||
|
||
|
||
|
||
tagId bigint auto_increment,
|
||
|
||
|
||
|
||
tagName varchar(255) NOT NULL,
|
||
|
||
|
||
|
||
primary key(tagId)
|
||
|
||
|
||
|
||
);
|
||
|
||
|
||
|
||
SELECT * FROM Tags;
|
||
|
||
|
||
|
||
+-------+--------------+
|
||
|
||
|
||
|
||
| tagId | tagName |
|
||
|
||
|
||
|
||
+-------+--------------+
|
||
|
||
|
||
|
||
| 1 | 70后 |
|
||
|
||
|
||
|
||
| 2 | 80后 |
|
||
|
||
|
||
|
||
| 3 | 90后 |
|
||
|
||
|
||
|
||
| 4 | 00后 |
|
||
|
||
|
||
|
||
| 5 | 爱运动 |
|
||
|
||
|
||
|
||
| 6 | 高学历 |
|
||
|
||
|
||
|
||
| 7 | 小资 |
|
||
|
||
|
||
|
||
| 8 | 有房 |
|
||
|
||
|
||
|
||
| 9 | 有车 |
|
||
|
||
|
||
|
||
| 10 | 常看电影 |
|
||
|
||
|
||
|
||
| 11 | 爱网购 |
|
||
|
||
|
||
|
||
| 12 | 爱外卖 |
|
||
|
||
|
||
|
||
+-------+--------------+
|
||
|
||
</code></pre>
|
||
|
||
<p>可以看到,表 Tags 是一张画像定义表,用于描述当前定义有多少个标签,接着给每个用户打标签,比如用户 David,他的标签是 80 后、高学历、小资、有房、常看电影;用户 Tom,90 后、常看电影、爱外卖。</p>
|
||
|
||
<p>若不用 JSON 数据类型进行标签存储,通常会将用户标签通过字符串,加上分割符的方式,在一个字段中存取用户所有的标签:</p>
|
||
|
||
<pre><code>+-------+---------------------------------------+
|
||
|
||
|
||
|
||
|用户 |标签 |
|
||
|
||
|
||
|
||
+-------+---------------------------------------+
|
||
|
||
|
||
|
||
|David |80后 ; 高学历 ; 小资 ; 有房 ;常看电影 |
|
||
|
||
|
||
|
||
|Tom |90后 ;常看电影 ; 爱外卖 |
|
||
|
||
|
||
|
||
+-------+---------------------------------------+
|
||
|
||
</code></pre>
|
||
|
||
<p><strong>这样做的缺点是:</strong> 不好搜索特定画像的用户,另外分隔符也是一种自我约定,在数据库中其实可以任意存储其他数据,最终产生脏数据。</p>
|
||
|
||
<p>用 JSON 数据类型就能很好解决这个问题:</p>
|
||
|
||
<pre><code>DROP TABLE IF EXISTS UserTag;
|
||
|
||
|
||
|
||
CREATE TABLE UserTag (
|
||
|
||
|
||
|
||
userId bigint NOT NULL,
|
||
|
||
|
||
|
||
userTags JSON,
|
||
|
||
|
||
|
||
PRIMARY KEY (userId)
|
||
|
||
|
||
|
||
);
|
||
|
||
|
||
|
||
INSERT INTO UserTag VALUES (1,'[2,6,8,10]');
|
||
|
||
|
||
|
||
INSERT INTO UserTag VALUES (2,'[3,10,12]');
|
||
|
||
</code></pre>
|
||
|
||
<p>其中,userTags 存储的标签就是表 Tags 已定义的那些标签值,只是使用 JSON 数组类型进行存储。</p>
|
||
|
||
<p>MySQL 8.0.17 版本开始支持 Multi-Valued Indexes,用于在 JSON 数组上创建索引,并通过函数 member of、json_contains、json_overlaps 来快速检索索引数据。所以你可以在表 UserTag 上创建 Multi-Valued Indexes:</p>
|
||
|
||
<pre><code>ALTER TABLE UserTag
|
||
|
||
|
||
|
||
ADD INDEX idx_user_tags ((cast((userTags->"$") as unsigned array)));
|
||
|
||
</code></pre>
|
||
|
||
<p>如果想要查询用户画像为常看电影的用户,可以使用函数 MEMBER OF:</p>
|
||
|
||
<pre><code>EXPLAIN SELECT * FROM UserTag
|
||
|
||
|
||
|
||
WHERE 10 MEMBER OF(userTags->"$")\G
|
||
|
||
|
||
|
||
*************************** 1. row ***************************
|
||
|
||
|
||
|
||
id: 1
|
||
|
||
|
||
|
||
select_type: SIMPLE
|
||
|
||
|
||
|
||
table: UserTag
|
||
|
||
|
||
|
||
partitions: NULL
|
||
|
||
|
||
|
||
type: ref
|
||
|
||
|
||
|
||
possible_keys: idx_user_tags
|
||
|
||
|
||
|
||
key: idx_user_tags
|
||
|
||
|
||
|
||
key_len: 9
|
||
|
||
|
||
|
||
ref: const
|
||
|
||
|
||
|
||
rows: 1
|
||
|
||
|
||
|
||
filtered: 100.00
|
||
|
||
|
||
|
||
Extra: Using where
|
||
|
||
|
||
|
||
1 row in set, 1 warning (0.00 sec)
|
||
|
||
|
||
|
||
SELECT * FROM UserTag
|
||
|
||
|
||
|
||
WHERE 10 MEMBER OF(userTags->"$");
|
||
|
||
|
||
|
||
+--------+---------------+
|
||
|
||
|
||
|
||
| userId | userTags |
|
||
|
||
|
||
|
||
+--------+---------------+
|
||
|
||
|
||
|
||
| 1 | [2, 6, 8, 10] |
|
||
|
||
|
||
|
||
| 2 | [3, 10, 12] |
|
||
|
||
|
||
|
||
+--------+---------------+
|
||
|
||
|
||
|
||
2 rows in set (0.00 sec)
|
||
|
||
</code></pre>
|
||
|
||
<p>如果想要查询画像为 80 后,且常看电影的用户,可以使用函数 JSON_CONTAINS:</p>
|
||
|
||
<pre><code>EXPLAIN SELECT * FROM UserTag
|
||
|
||
|
||
|
||
WHERE JSON_CONTAINS(userTags->"$", '[2,10]')\G
|
||
|
||
|
||
|
||
*************************** 1. row ***************************
|
||
|
||
|
||
|
||
id: 1
|
||
|
||
|
||
|
||
select_type: SIMPLE
|
||
|
||
|
||
|
||
table: UserTag
|
||
|
||
|
||
|
||
partitions: NULL
|
||
|
||
|
||
|
||
type: range
|
||
|
||
|
||
|
||
possible_keys: idx_user_tags
|
||
|
||
|
||
|
||
key: idx_user_tags
|
||
|
||
|
||
|
||
key_len: 9
|
||
|
||
|
||
|
||
ref: NULL
|
||
|
||
|
||
|
||
rows: 3
|
||
|
||
|
||
|
||
filtered: 100.00
|
||
|
||
|
||
|
||
Extra: Using where
|
||
|
||
|
||
|
||
1 row in set, 1 warning (0.00 sec)
|
||
|
||
|
||
|
||
SELECT * FROM UserTag
|
||
|
||
|
||
|
||
WHERE JSON_CONTAINS(userTags->"$", '[2,10]');
|
||
|
||
|
||
|
||
+--------+---------------+
|
||
|
||
|
||
|
||
| userId | userTags |
|
||
|
||
|
||
|
||
+--------+---------------+
|
||
|
||
|
||
|
||
| 1 | [2, 6, 8, 10] |
|
||
|
||
|
||
|
||
+--------+---------------+
|
||
|
||
|
||
|
||
1 row in set (0.00 sec)
|
||
|
||
</code></pre>
|
||
|
||
<p>如果想要查询画像为 80 后、90 后,且常看电影的用户,则可以使用函数 JSON_OVERLAP:</p>
|
||
|
||
<pre><code>EXPLAIN SELECT * FROM UserTag
|
||
|
||
|
||
|
||
WHERE JSON_OVERLAPS(userTags->"$", '[2,3,10]')\G
|
||
|
||
|
||
|
||
*************************** 1. row ***************************
|
||
|
||
|
||
|
||
id: 1
|
||
|
||
|
||
|
||
select_type: SIMPLE
|
||
|
||
|
||
|
||
table: UserTag
|
||
|
||
|
||
|
||
partitions: NULL
|
||
|
||
|
||
|
||
type: range
|
||
|
||
|
||
|
||
possible_keys: idx_user_tags
|
||
|
||
|
||
|
||
key: idx_user_tags
|
||
|
||
|
||
|
||
key_len: 9
|
||
|
||
|
||
|
||
ref: NULL
|
||
|
||
|
||
|
||
rows: 4
|
||
|
||
|
||
|
||
filtered: 100.00
|
||
|
||
|
||
|
||
Extra: Using where
|
||
|
||
|
||
|
||
1 row in set, 1 warning (0.00 sec)
|
||
|
||
|
||
|
||
SELECT * FROM UserTag
|
||
|
||
|
||
|
||
WHERE JSON_OVERLAPS(userTags->"$", '[2,3,10]');
|
||
|
||
|
||
|
||
+--------+---------------+
|
||
|
||
|
||
|
||
| userId | userTags |
|
||
|
||
|
||
|
||
+--------+---------------+
|
||
|
||
|
||
|
||
| 1 | [2, 6, 8, 10] |
|
||
|
||
|
||
|
||
| 2 | [3, 10, 12] |
|
||
|
||
|
||
|
||
+--------+---------------+
|
||
|
||
|
||
|
||
2 rows in set (0.01 sec)
|
||
|
||
</code></pre>
|
||
|
||
<h3>总结</h3>
|
||
|
||
<p>JSON 类型是 MySQL 5.7 版本新增的数据类型,用好 JSON 数据类型可以有效解决很多业务中实际问题。最后,我总结下今天的重点内容:</p>
|
||
|
||
<ul>
|
||
|
||
<li>使用 JSON 数据类型,推荐用 MySQL 8.0.17 以上的版本,性能更好,同时也支持 Multi-Valued Indexes;</li>
|
||
|
||
<li>JSON 数据类型的好处是无须预先定义列,数据本身就具有很好的描述性;</li>
|
||
|
||
<li>不要将有明显关系型的数据用 JSON 存储,如用户余额、用户姓名、用户身份证等,这些都是每个用户必须包含的数据;</li>
|
||
|
||
<li>JSON 数据类型推荐使用在不经常更新的静态数据存储。</li>
|
||
|
||
</ul>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
<div>
|
||
|
||
<div style="float: left">
|
||
|
||
<a href="/专栏/MySQL实战宝典/03 日期类型:TIMESTAMP 可能是巨坑.md">上一页</a>
|
||
|
||
</div>
|
||
|
||
<div style="float: right">
|
||
|
||
<a href="/专栏/MySQL实战宝典/05 表结构设计:忘记范式准则.md">下一页</a>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
<a class="off-canvas-overlay" onclick="hide_canvas()"></a>
|
||
|
||
</div>
|
||
|
||
<script defer src="https://static.cloudflareinsights.com/beacon.min.js/v652eace1692a40cfa3763df669d7439c1639079717194" integrity="sha512-Gi7xpJR8tSkrpF7aordPZQlW2DLtzUlZcumS8dMQjwDHEnw9I7ZLyiOj/6tZStRBGtGgN6ceN6cMH8z7etPGlw==" data-cf-beacon='{"rayId":"709973021f0f3d60","version":"2021.12.0","r":1,"token":"1f5d475227ce4f0089a7cff1ab17c0f5","si":100}' crossorigin="anonymous"></script>
|
||
|
||
</body>
|
||
|
||
<!-- Global site tag (gtag.js) - Google Analytics -->
|
||
|
||
<script async src="https://www.googletagmanager.com/gtag/js?id=G-NPSEEVD756"></script>
|
||
|
||
<script>
|
||
|
||
window.dataLayer = window.dataLayer || [];
|
||
|
||
|
||
|
||
function gtag() {
|
||
|
||
dataLayer.push(arguments);
|
||
|
||
}
|
||
|
||
|
||
|
||
gtag('js', new Date());
|
||
|
||
gtag('config', 'G-NPSEEVD756');
|
||
|
||
var path = window.location.pathname
|
||
|
||
var cookie = getCookie("lastPath");
|
||
|
||
console.log(path)
|
||
|
||
if (path.replace("/", "") === "") {
|
||
|
||
if (cookie.replace("/", "") !== "") {
|
||
|
||
console.log(cookie)
|
||
|
||
document.getElementById("tip").innerHTML = "<a href='" + cookie + "'>跳转到上次进度</a>"
|
||
|
||
}
|
||
|
||
} else {
|
||
|
||
setCookie("lastPath", path)
|
||
|
||
}
|
||
|
||
|
||
|
||
function setCookie(cname, cvalue) {
|
||
|
||
var d = new Date();
|
||
|
||
d.setTime(d.getTime() + (180 * 24 * 60 * 60 * 1000));
|
||
|
||
var expires = "expires=" + d.toGMTString();
|
||
|
||
document.cookie = cname + "=" + cvalue + "; " + expires + ";path = /";
|
||
|
||
}
|
||
|
||
|
||
|
||
function getCookie(cname) {
|
||
|
||
var name = cname + "=";
|
||
|
||
var ca = document.cookie.split(';');
|
||
|
||
for (var i = 0; i < ca.length; i++) {
|
||
|
||
var c = ca[i].trim();
|
||
|
||
if (c.indexOf(name) === 0) return c.substring(name.length, c.length);
|
||
|
||
}
|
||
|
||
return "";
|
||
|
||
}
|
||
|
||
|
||
|
||
</script>
|
||
|
||
|
||
|
||
</html>
|
||
|