Files
CoderSherlock.github.io/_site/posts/generate-word-cloud-with-chinese-fenci.html
T
haopengzhan 4859927856 Optimized UX and fixed bugs
- Added temp logo
    - Added favicon
    - Enabled Disqus (jekyll.env)
    - Changed Home layout
2021-10-25 14:53:15 -04:00

1275 lines
62 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!DOCTYPE html><html lang="en">
<head><!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-82637164-1"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-82637164-1');
</script><meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no"><title>Generate Word Cloud Figures with Chinese-Tokenization and WordCloud python libraries - Stop Talking, Start Doing</title>
<meta name="description" content="Lets generate a word cloud like this. Dont understand the language is not a big deal.If your written language is based on latin alphabet(or other language ...">
<link rel="canonical" href="https://codersherlock.github.com//posts/generate-word-cloud-with-chinese-fenci"><link rel="alternate" type="application/rss+xml" title="Stop Talking, Start Doing" href="/feed.xml"><link rel="apple-touch-icon" sizes="180x180" href="/assets/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="/assets/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="/assets/favicon-16x16.png">
<link rel="manifest" href="/assets/site.webmanifest">
<link rel="mask-icon" href="/assets/safari-pinned-tab.svg" color="#5bbad5">
<link rel="shortcut icon" href="/assets/favicon.ico">
<meta name="msapplication-TileColor" content="#da532c">
<meta name="msapplication-config" content="/assets/browserconfig.xml">
<meta name="theme-color" content="#ffffff">
<link rel="stylesheet" href="/assets/css/main.css"><link rel="stylesheet" href="https://cdn.bootcdn.net/ajax/libs/font-awesome/5.15.1/css/all.css" ><!-- start custom head snippets -->
<!-- end custom head snippets -->
<script>(function() {
window.isArray = function(val) {
return Object.prototype.toString.call(val) === '[object Array]';
};
window.isString = function(val) {
return typeof val === 'string';
};
window.hasEvent = function(event) {
return 'on'.concat(event) in window.document;
};
window.isOverallScroller = function(node) {
return node === document.documentElement || node === document.body || node === window;
};
window.isFormElement = function(node) {
var tagName = node.tagName;
return tagName === 'INPUT' || tagName === 'SELECT' || tagName === 'TEXTAREA';
};
window.pageLoad = (function () {
var loaded = false, cbs = [];
window.addEventListener('load', function () {
var i;
loaded = true;
if (cbs.length > 0) {
for (i = 0; i < cbs.length; i++) {
cbs[i]();
}
}
});
return {
then: function(cb) {
cb && (loaded ? cb() : (cbs.push(cb)));
}
};
})();
})();
(function() {
window.throttle = function(func, wait) {
var args, result, thisArg, timeoutId, lastCalled = 0;
function trailingCall() {
lastCalled = new Date;
timeoutId = null;
result = func.apply(thisArg, args);
}
return function() {
var now = new Date,
remaining = wait - (now - lastCalled);
args = arguments;
thisArg = this;
if (remaining <= 0) {
clearTimeout(timeoutId);
timeoutId = null;
lastCalled = now;
result = func.apply(thisArg, args);
} else if (!timeoutId) {
timeoutId = setTimeout(trailingCall, remaining);
}
return result;
};
};
})();
(function() {
var Set = (function() {
var add = function(item) {
var i, data = this._data;
for (i = 0; i < data.length; i++) {
if (data[i] === item) {
return;
}
}
this.size ++;
data.push(item);
return data;
};
var Set = function(data) {
this.size = 0;
this._data = [];
var i;
if (data.length > 0) {
for (i = 0; i < data.length; i++) {
add.call(this, data[i]);
}
}
};
Set.prototype.add = add;
Set.prototype.get = function(index) { return this._data[index]; };
Set.prototype.has = function(item) {
var i, data = this._data;
for (i = 0; i < data.length; i++) {
if (this.get(i) === item) {
return true;
}
}
return false;
};
Set.prototype.is = function(map) {
if (map._data.length !== this._data.length) { return false; }
var i, j, flag, tData = this._data, mData = map._data;
for (i = 0; i < tData.length; i++) {
for (flag = false, j = 0; j < mData.length; j++) {
if (tData[i] === mData[j]) {
flag = true;
break;
}
}
if (!flag) { return false; }
}
return true;
};
Set.prototype.values = function() {
return this._data;
};
return Set;
})();
window.Lazyload = (function(doc) {
var queue = {js: [], css: []}, sources = {js: {}, css: {}}, context = this;
var createNode = function(name, attrs) {
var node = doc.createElement(name), attr;
for (attr in attrs) {
if (attrs.hasOwnProperty(attr)) {
node.setAttribute(attr, attrs[attr]);
}
}
return node;
};
var end = function(type, url) {
var s, q, qi, cbs, i, j, cur, val, flag;
if (type === 'js' || type ==='css') {
s = sources[type], q = queue[type];
s[url] = true;
for (i = 0; i < q.length; i++) {
cur = q[i];
if (cur.urls.has(url)) {
qi = cur, val = qi.urls.values();
qi && (cbs = qi.callbacks);
for (flag = true, j = 0; j < val.length; j++) {
cur = val[j];
if (!s[cur]) {
flag = false;
}
}
if (flag && cbs && cbs.length > 0) {
for (j = 0; j < cbs.length; j++) {
cbs[j].call(context);
}
qi.load = true;
}
}
}
}
};
var load = function(type, urls, callback) {
var s, q, qi, node, i, cur,
_urls = typeof urls === 'string' ? new Set([urls]) : new Set(urls), val, url;
if (type === 'js' || type ==='css') {
s = sources[type], q = queue[type];
for (i = 0; i < q.length; i++) {
cur = q[i];
if (_urls.is(cur.urls)) {
qi = cur;
break;
}
}
val = _urls.values();
if (qi) {
callback && (qi.load || qi.callbacks.push(callback));
callback && (qi.load && callback());
} else {
q.push({
urls: _urls,
callbacks: callback ? [callback] : [],
load: false
});
for (i = 0; i < val.length; i++) {
node = null, url = val[i];
if (s[url] === undefined) {
(type === 'js' ) && (node = createNode('script', { src: url }));
(type === 'css') && (node = createNode('link', { rel: 'stylesheet', href: url }));
if (node) {
node.onload = (function(type, url) {
return function() {
end(type, url);
};
})(type, url);
(doc.head || doc.body).appendChild(node);
s[url] = false;
}
}
}
}
}
};
return {
js: function(url, callback) {
load('js', url, callback);
},
css: function(url, callback) {
load('css', url, callback);
}
};
})(this.document);
})();
</script><script>
(function() {
var TEXT_VARIABLES = {
version: '2.2.6',
sources: {
font_awesome: 'https://cdn.bootcdn.net/ajax/libs/font-awesome/5.15.1/css/all.css',
jquery: 'https://cdn.bootcss.com/jquery/3.1.1/jquery.min.js',
leancloud_js_sdk: '//cdn.jsdelivr.net/npm/leancloud-storage@3.13.2/dist/av-min.js',
chart: 'https://cdn.bootcss.com/Chart.js/2.7.2/Chart.bundle.min.js',
gitalk: {
js: 'https://cdn.bootcss.com/gitalk/1.2.2/gitalk.min.js',
css: 'https://cdn.bootcss.com/gitalk/1.2.2/gitalk.min.css'
},
valine: 'https://unpkg.com/valine/dist/Valine.min.js',
mathjax: 'https://cdn.bootcss.com/mathjax/2.7.4/MathJax.js?config=TeX-MML-AM_CHTML',
mermaid: 'https://cdn.bootcss.com/mermaid/8.0.0-rc.8/mermaid.min.js'
},
site: {
toc: {
selectors: 'h1,h2,h3'
}
},
paths: {
search_js: '/assets/search.js'
}
};
window.TEXT_VARIABLES = TEXT_VARIABLES;
})();
</script>
</head>
<body>
<div class="root" data-is-touch="false">
<div class="layout--page js-page-root"><div class="page__main js-page-main page__viewport has-aside cell cell--auto">
<div class="page__main-inner"><div class="page__header d-print-none"><header class="header"><div class="main">
<div class="header__title">
<div class="header__brand"><?xml version="1.0" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN"
"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
<svg version="1.0" xmlns="http://www.w3.org/2000/svg"
width="231.000000pt" height="218.000000pt" viewBox="0 0 231.000000 218.000000"
preserveAspectRatio="xMidYMid meet">
<metadata>
Created by potrace 1.16, written by Peter Selinger 2001-2019
</metadata>
<g transform="translate(0.000000,218.000000) scale(0.100000,-0.100000)"
fill="#000000" stroke="none">
<path d="M1754 2168 c-3 -4 -2 -8 3 -8 4 0 3 -4 -4 -8 -6 -4 -13 -23 -16 -42
-4 -19 -14 -41 -24 -49 -17 -12 -16 -13 6 -8 l25 6 -27 -25 c-31 -29 -33 -34
-11 -34 8 0 12 5 9 10 -3 6 -2 10 4 10 5 0 13 -5 16 -10 4 -6 12 -7 18 -4 8 5
8 3 0 -7 -7 -7 -13 -21 -13 -32 0 -10 -4 -16 -9 -13 -14 9 -7 -18 11 -45 l16
-24 16 39 c9 21 16 44 16 51 0 6 3 15 8 19 4 4 7 11 8 14 0 4 4 23 9 42 9 36
11 46 13 68 1 7 6 11 11 8 4 -3 6 5 4 18 -4 17 0 26 14 29 10 3 -8 5 -40 4
-32 0 -60 -4 -63 -9z"/>
<path d="M1936 2163 c-4 -9 -2 -20 5 -23 14 -9 14 -21 -1 -30 -8 -5 -2 -10 14
-14 15 -4 24 -11 21 -17 -4 -5 -15 -7 -26 -4 -13 5 -19 2 -19 -10 0 -9 7 -15
17 -13 14 3 17 -6 18 -52 1 -30 3 -60 7 -65 3 -6 4 -23 2 -40 -1 -16 -5 -62
-8 -102 -2 -39 -7 -80 -10 -90 -4 -12 3 -28 19 -45 14 -15 21 -30 15 -33 -6
-4 -4 -17 5 -36 9 -16 13 -34 10 -39 -3 -6 1 -10 9 -10 22 0 20 -30 -1 -31
-14 0 -15 -2 -3 -6 22 -9 42 -32 36 -42 -3 -5 -17 -11 -31 -13 -14 -3 -22 -1
-18 5 7 12 -16 27 -42 27 -19 -1 -19 -1 -2 -14 16 -12 16 -15 3 -29 -14 -13
-14 -16 0 -21 13 -5 14 -9 4 -21 -8 -10 -9 -15 -2 -15 6 0 9 -6 6 -13 -2 -7 2
-26 10 -42 8 -16 13 -32 10 -37 -3 -4 0 -8 5 -8 6 0 11 -7 11 -15 0 -8 5 -15
12 -15 6 0 3 -10 -8 -21 -19 -22 -19 -22 5 -14 16 5 21 4 17 -4 -5 -7 -1 -8
10 -4 12 5 15 3 9 -6 -6 -10 -2 -11 16 -7 19 6 21 5 8 -3 -20 -13 -66 -9 -98
8 -15 9 -21 9 -16 2 3 -6 -4 -20 -17 -31 -16 -14 -18 -18 -5 -14 12 4 17 1 17
-11 0 -14 -6 -16 -30 -11 -38 7 -39 -1 -2 -15 24 -9 25 -11 9 -20 -9 -6 -15
-14 -12 -20 4 -5 -2 -9 -12 -9 -10 0 -25 -5 -33 -11 -13 -10 -13 -10 1 -6 9 3
16 -1 16 -9 0 -12 -3 -12 -21 0 -17 10 -21 25 -22 65 -1 29 -7 58 -14 64 -10
9 -11 9 -5 -3 6 -10 1 -8 -13 4 -12 10 -19 22 -17 26 3 4 -2 11 -11 14 -8 3
-12 10 -9 16 3 6 -1 13 -10 16 -10 4 -15 14 -12 25 3 10 -2 19 -12 22 -26 6
-39 -3 -34 -24 6 -23 -13 -49 -37 -49 -10 0 -27 11 -38 25 -24 30 -32 31 -32
4 0 -16 -4 -19 -21 -14 -15 5 -19 4 -14 -5 5 -8 0 -7 -14 2 -14 9 -20 22 -19
40 1 16 6 28 11 28 5 0 3 7 -4 16 -8 9 -12 27 -11 40 7 52 -18 131 -54 168
-40 42 -76 50 -182 44 -86 -5 -133 -24 -174 -72 -23 -26 -27 -40 -28 -94 0
-66 17 -113 47 -132 17 -10 17 -10 -1 -8 -10 2 -21 -3 -24 -11 -3 -10 0 -12
13 -7 15 5 16 4 5 -9 -10 -12 -10 -15 3 -15 8 0 17 9 20 21 3 12 10 18 16 14
8 -4 6 -11 -4 -21 -8 -9 -13 -19 -10 -24 3 -4 -5 -6 -17 -4 -21 4 -21 4 -3 -5
15 -7 24 -4 37 13 21 25 22 20 7 -22 -6 -18 -14 -30 -19 -27 -4 2 -15 -4 -24
-14 -17 -18 -16 -19 18 -18 20 1 36 4 36 8 0 4 7 9 15 13 13 5 13 2 -1 -19
-13 -20 -20 -23 -34 -15 -15 8 -22 5 -33 -10 -12 -17 -16 -18 -20 -6 -4 9 -14
12 -26 9 -13 -3 -21 -1 -21 8 0 7 -3 10 -6 7 -3 -4 -1 -15 4 -25 6 -11 7 -25
3 -32 -6 -8 -10 -2 -14 18 -8 36 -22 40 -38 9 -10 -18 -9 -20 5 -15 9 4 16 1
16 -4 0 -7 -11 -9 -25 -7 -13 3 -26 0 -29 -6 -3 -7 -5 -6 -5 2 -1 7 -7 10 -13
7 -7 -4 -4 2 8 12 19 16 39 53 29 53 -2 0 -17 -14 -33 -31 l-30 -32 5 27 c4
20 1 26 -11 26 -11 0 -15 -5 -10 -12 4 -7 -2 -5 -13 4 -11 10 -27 18 -36 18
-9 0 -21 8 -27 18 -9 14 -10 11 -5 -15 5 -24 2 -34 -12 -43 -16 -10 -15 -10 5
-4 23 6 24 6 5 -14 -18 -19 -18 -19 -52 0 -33 19 -34 19 -28 -1 5 -19 3 -19
-18 -5 -26 17 -39 51 -21 57 6 2 4 3 -4 2 -8 0 -14 -6 -13 -12 1 -6 -5 -17
-14 -25 -10 -8 -14 -8 -10 -1 4 6 2 15 -4 19 -7 4 -9 3 -5 -4 3 -6 -2 -15 -11
-21 -14 -8 -14 -10 -3 -11 9 0 5 -6 -10 -17 -14 -10 -28 -17 -32 -15 -5 1 -8
-6 -8 -16 0 -10 -5 -23 -11 -29 -7 -7 -7 -13 0 -17 7 -4 6 -16 -1 -36 -7 -23
-7 -34 2 -45 11 -14 7 -19 -12 -16 -5 0 -8 -3 -8 -9 0 -13 6 -13 31 1 19 10
20 9 9 -12 -11 -19 -10 -21 4 -16 10 4 14 3 11 -2 -3 -5 -1 -12 5 -16 13 -8
13 -25 -1 -25 -5 0 -7 6 -3 13 4 7 3 9 -2 4 -5 -5 -9 -14 -9 -20 -1 -7 -7 -11
-14 -9 -7 1 -19 -5 -27 -15 -13 -17 -13 -17 -14 4 0 12 5 25 10 28 6 4 7 11 4
17 -4 7 -2 8 4 4 7 -4 12 -1 12 9 0 11 -5 15 -16 11 -8 -3 -12 -2 -9 4 3 6 1
10 -6 10 -7 0 -5 8 6 20 10 11 13 20 7 20 -6 0 -14 -5 -17 -10 -5 -8 -11 -7
-21 1 -7 6 -11 15 -8 20 3 5 -3 6 -12 2 -10 -4 -13 -8 -7 -10 6 -2 9 -16 7
-29 -3 -15 -1 -23 5 -20 5 4 12 3 14 -1 3 -5 -14 -9 -36 -10 -31 -2 -38 0 -27
7 13 9 12 13 -8 30 -13 10 -29 17 -37 14 -17 -7 -28 -34 -14 -34 6 0 9 -9 6
-20 -3 -11 -1 -20 5 -20 5 0 7 -4 3 -9 -3 -5 -1 -12 5 -16 6 -4 7 -11 4 -17
-4 -6 1 -17 11 -25 11 -7 13 -13 6 -13 -7 0 -9 -4 -6 -10 8 -13 -1 -13 -26 0
-13 7 -19 7 -19 0 0 -5 7 -10 16 -10 8 0 13 -4 9 -9 -3 -5 7 -11 22 -13 20 -2
27 -8 25 -20 -2 -10 3 -18 9 -18 7 0 3 -9 -9 -20 -22 -21 -67 -27 -77 -10 -6
10 13 14 45 11 13 -2 13 -1 0 8 -12 9 -13 11 -1 11 8 0 11 3 8 7 -4 3 -22 -2
-41 -12 l-34 -18 26 -24 c14 -12 19 -23 12 -23 -7 0 -10 -7 -6 -16 3 -9 2 -12
-4 -9 -6 4 -9 14 -8 23 2 9 -5 18 -15 20 -16 3 -17 1 -8 -16 8 -15 8 -22 -2
-25 -7 -3 -5 -6 6 -6 9 -1 16 -6 15 -13 -2 -7 8 -12 24 -12 23 1 25 -1 18 -21
-10 -26 -2 -104 13 -122 6 -9 6 -13 -2 -13 -7 0 -9 -4 -6 -10 3 -5 0 -10 -7
-10 -11 0 -11 -2 0 -9 10 -6 11 -13 1 -34 -9 -21 -9 -33 -1 -53 8 -16 9 -31 3
-43 -7 -13 -4 -24 15 -46 13 -15 24 -39 24 -53 0 -17 4 -22 14 -19 10 4 17 -4
21 -24 4 -17 13 -33 20 -35 7 -3 16 -16 19 -30 l6 -24 600 0 c368 0 600 4 600
9 0 6 -4 13 -10 16 -5 3 -10 12 -9 18 0 9 2 9 5 1 7 -16 44 -5 44 12 0 17 21
18 44 1 22 -16 12 -44 -17 -50 -12 -2 28 -5 90 -6 98 -1 112 1 117 17 3 9 20
23 37 31 20 9 28 18 24 29 -4 12 -3 13 9 3 17 -14 36 -7 36 14 0 8 -6 12 -14
9 -10 -4 -13 0 -9 14 5 20 -23 84 -59 134 -12 18 -29 29 -46 29 -24 2 -24 2
-4 6 12 2 22 7 22 10 0 2 -17 29 -37 58 -35 50 -114 115 -141 115 -6 0 -12 4
-12 8 0 5 -16 27 -36 50 -19 23 -31 42 -25 42 6 0 16 -9 23 -20 16 -24 30 -26
21 -3 -5 13 0 12 28 -7 43 -30 80 -70 115 -125 15 -24 31 -42 36 -39 5 3 7 2
6 -3 -5 -15 39 -63 57 -63 13 0 15 -3 7 -13 -9 -10 -1 -34 29 -95 23 -45 46
-82 51 -82 5 0 6 -5 3 -10 -3 -6 0 -13 8 -16 9 -3 13 -13 10 -25 -3 -10 -1
-19 5 -19 6 0 12 -10 14 -22 3 -21 8 -23 63 -23 l60 0 3 62 c3 59 2 63 -19 63
-22 0 -22 -1 -4 -13 11 -9 14 -16 7 -21 -6 -3 -18 2 -28 11 -12 13 -19 14 -22
5 -9 -28 -32 -12 -32 23 0 24 5 35 15 35 8 0 15 -5 15 -11 0 -5 5 -7 11 -3 8
5 8 9 -1 14 -8 5 -9 9 -1 11 12 5 3 49 -10 49 -5 0 -8 -3 -7 -7 2 -5 -1 -8 -7
-8 -5 0 -9 11 -7 25 l2 25 -17 -27 c-21 -33 -34 -35 -58 -8 -10 11 -26 22 -36
25 -23 8 -25 25 -4 25 8 0 15 -5 15 -11 0 -8 4 -8 13 -1 7 6 25 9 40 6 22 -3
27 -1 27 16 0 11 4 20 10 20 5 0 7 -6 4 -14 -7 -17 25 -36 52 -32 10 2 13 1 7
-1 -9 -3 -10 -10 -3 -23 6 -11 9 -23 8 -27 -4 -14 13 -53 23 -53 5 0 7 50 5
123 -6 156 -6 68 0 1050 l5 848 -28 -3 c-20 -2 -29 -9 -31 -24 -2 -16 -8 -20
-21 -17 -13 3 -17 11 -14 24 4 14 0 19 -16 19 -11 0 -21 -3 -20 -7 1 -5 -2
-18 -6 -31 -6 -19 -12 -22 -35 -17 -16 3 -31 2 -34 -3 -7 -11 -106 -3 -123 10
-8 6 -13 3 -16 -9 -3 -10 -5 -1 -6 20 -1 38 -14 49 -25 20z m-1276 -1513 c-12
-7 -12 -10 -1 -10 8 0 11 -6 7 -17 -3 -10 3 -6 15 10 23 30 41 34 59 12 11
-13 10 -14 -5 -9 -13 5 -16 4 -11 -4 4 -7 19 -12 34 -13 l27 -1 -29 -8 c-16
-5 -32 -7 -36 -5 -3 2 -6 -8 -6 -21 0 -21 4 -24 23 -19 21 5 22 4 11 -15 -7
-11 -19 -20 -26 -20 -8 0 -10 5 -6 12 5 8 2 9 -12 3 -39 -16 -67 -19 -59 -5 5
9 4 11 -4 6 -6 -4 -9 -12 -6 -17 3 -5 -1 -6 -10 -3 -9 4 -14 13 -11 20 4 10
-4 9 -29 -4 -18 -9 -32 -21 -32 -26 1 -5 -4 -11 -12 -14 -10 -3 -12 0 -8 11 3
9 1 17 -6 20 -7 2 -4 16 11 40 l22 38 -23 -13 c-12 -6 -32 -12 -45 -13 -20 -1
-21 1 -7 11 12 9 13 13 3 17 -10 3 -10 7 -1 17 10 11 15 11 28 0 11 -9 15 -9
15 -1 0 7 4 10 10 6 5 -3 18 0 29 7 15 9 25 9 46 0 14 -7 24 -8 21 -3 -6 9 8
19 29 20 6 0 3 -4 -5 -9z m914 -29 c34 -29 46 -76 46 -179 0 -87 -3 -105 -26
-148 -50 -95 -82 -115 -187 -115 -60 0 -82 4 -122 26 -44 23 -104 79 -105 97
0 10 -46 110 -82 181 -31 61 -31 78 3 121 27 34 75 34 164 2 61 -22 70 -23
101 -10 18 8 42 14 53 14 10 0 38 9 61 19 45 20 62 19 94 -8z"/>
<path d="M164 1993 c36 -55 77 -136 71 -141 -3 -3 -57 -6 -120 -6 l-115 -1 4
-755 c2 -415 4 -769 5 -786 2 -46 27 -52 35 -9 7 38 29 55 57 48 11 -3 19 0
19 6 0 6 -7 11 -15 11 -20 0 -19 27 2 38 10 6 14 16 10 26 -4 9 -3 16 2 16 7
0 17 38 25 92 1 3 4 10 8 14 4 4 3 13 -3 21 -9 10 -8 13 5 13 9 0 17 9 17 20
2 28 3 33 12 48 5 7 6 21 2 31 -4 13 2 26 15 39 12 11 24 38 27 61 3 23 10 41
16 41 7 0 7 4 -1 12 -14 14 -16 32 -3 23 6 -3 15 4 21 15 8 15 8 20 -2 20 -7
1 -4 6 7 13 11 7 21 23 22 37 1 14 4 37 7 52 6 25 3 28 -29 34 l-36 6 23 16
c13 9 26 20 30 26 7 11 -9 26 -27 26 -5 0 -3 -5 5 -10 9 -6 10 -10 3 -10 -24
0 -24 20 1 29 23 9 29 31 13 47 -4 4 1 12 10 19 11 8 13 14 5 19 -7 4 -12 2
-12 -3 0 -6 -10 -11 -22 -11 -21 0 -21 0 -2 16 13 11 14 14 3 11 -9 -3 -16 1
-16 9 0 9 6 13 14 10 8 -3 11 -1 8 4 -3 5 -2 26 4 45 6 21 6 37 0 41 -5 3 -9
14 -9 25 0 10 -6 19 -12 19 -9 0 -9 3 1 9 8 5 14 22 15 37 1 16 5 35 9 42 5 6
5 12 2 12 -4 0 1 7 10 16 9 10 15 24 12 33 -2 9 2 22 10 29 7 7 13 23 13 35 0
28 15 51 28 43 5 -3 13 2 16 11 6 15 -8 32 -26 33 -5 0 -8 7 -8 15 0 8 -6 15
-12 15 -8 0 -5 7 6 15 12 9 15 15 7 15 -9 0 -12 9 -9 24 3 17 1 23 -8 19 -25
-9 -36 8 -18 28 16 17 16 19 1 19 -10 0 -17 5 -17 10 0 6 5 10 11 10 6 0 9 6
6 13 -2 6 3 17 11 23 13 10 13 14 -1 35 -10 13 -17 18 -17 11 0 -18 -24 -15
-43 6 -59 64 -60 64 -46 74 11 8 10 9 -3 4 -11 -3 -18 0 -18 8 0 12 -20 26
-36 26 -2 0 7 -17 20 -37z"/>
<path d="M1443 1727 c-3 -6 -1 -7 5 -3 23 14 12 -12 -13 -30 -25 -19 -34 -58
-15 -69 6 -4 26 5 45 19 35 27 73 33 104 16 10 -5 42 -12 72 -15 29 -4 59 -11
66 -17 17 -14 17 3 0 27 -9 12 -16 14 -21 7 -7 -12 -64 15 -101 48 -22 20
-132 33 -142 17z"/>
<path d="M726 1647 c-31 -20 -50 -40 -76 -82 l-24 -40 27 26 c18 17 43 28 73
33 77 10 102 7 155 -19 27 -14 54 -23 59 -20 16 10 12 55 -5 55 -8 0 -15 5
-15 10 0 6 -11 10 -24 10 -16 0 -27 7 -31 20 -3 11 -11 20 -16 20 -6 0 -7 -5
-3 -12 5 -8 2 -9 -12 -3 -33 13 -88 15 -108 2z"/>
</g>
</svg>
<a title="My personal blog, with some boring research staff and some tricks I was fancy to. I'll try my best to make this blog fun and useful. Not just a place I complain about all happens in my Lab.
" href="/">Stop Talking, Start Doing</a></div><button class="button button--secondary button--circle search-button js-search-toggle"><i class="fas fa-search"></i></button></div><nav class="navigation">
<ul><li class="navigation__item"><a href="/archive.html">Archive</a></li><li class="navigation__item"><a href="/about.html">About</a></li><li><button class="button button--secondary button--circle search-button js-search-toggle"><i class="fas fa-search"></i></button></li></ul>
</nav></div>
</header>
</div><div class="page__content"><div class ="main"><div class="grid grid--reverse">
<div class="col-aside d-print-none js-col-aside"><aside class="page__aside js-page-aside"><div class="toc-aside js-toc-root"></div>
</aside></div>
<div class="col-main cell cell--auto"><!-- start custom main top snippet -->
<!-- end custom main top snippet -->
<article itemscope itemtype="http://schema.org/Article"><div class="article__header"><header><h1>Generate Word Cloud Figures with Chinese-Tokenization and WordCloud python libraries</h1></header></div><meta itemprop="headline" content="Generate Word Cloud Figures with Chinese-Tokenization and WordCloud python libraries"><div class="article__info clearfix"><ul class="left-col menu"><li>
<a class="button button--secondary button--pill button--sm"
href="/archive.html?tag=visualization">visualization</a>
</li></ul><ul class="right-col menu"><li><i class="fas fa-user"></i> <span>Pengzhan Hao</span></li><li><i class="far fa-calendar-alt"></i> <span>Sep 15, 2020</span>
</li></ul></div><meta itemprop="author" content="Pengzhan Hao"/><meta itemprop="datePublished" content="2020-09-15T22:00:14-04:00">
<meta itemprop="keywords" content="visualization"><div class="js-article-content"><div class="layout--article"><!-- start custom article top snippet -->
<!-- end custom article top snippet -->
<div class="article__content" itemprop="articleBody"><p>Lets generate a word cloud like this.
Dont understand the language is not a big deal.
If your written language is based on latin alphabet(or other language has space between words), skip tokenization.</p>
<p><img src="/static/2020-09/2020-06-28.png" height="250" />
<!--more--></p>
<h2 id="background">Background</h2>
<p>Recently, I set up a web-based RSS client for retrieving and organizing everyday news. I used <a href="https://tt-rss.org/">TinyTinyRSS</a>, or as ttrss, a popular RSS client which friendly to docker. Thanks to developer <a href="https://ttrss.henry.wang/#about">HenryQW</a>, a well-written Nginx-based docker configuration is already available in docker hub. With more feeds were added, I found some feeds does not need to be checked everyday. Thus I was thinking to create a script to automatically list all keywords appears in a last period and generate a heat map kind figure of it.</p>
<p>Before you go further, Ill tell you all my settings to give readers a general overview.</p>
<p>My first step is to read all text-based information from TTRSSs PostgreSQL database. With information, I used a Chinese-NLP library, <a href="https://github.com/fxsjy/jieba">jieba</a>, to extract all keyword with their occurrences frequency. By using <a href="https://github.com/amueller/word_cloud">WordCloud</a>, a python library, word cloud figure is generated and present. More details will be discussed in later sections.</p>
<h2 id="get-rss-feeds-text">Get RSS feeds text</h2>
<p>My first thought is generating a keyword heat map for economy news of a last week. Since this blog post are more skewed to Chinese tokenization and draw the word cloud figure. Ill leave my code here just in case. The SQL connector I used is <a href="https://pypi.org/project/psycopg2/">psycopg2</a>, an easy-use PostgreSQL library.</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="bp">self</span><span class="p">.</span><span class="n">dbe</span> <span class="o">=</span> <span class="n">psycopg2</span><span class="p">.</span><span class="n">connect</span><span class="p">(</span>
<span class="n">host</span><span class="o">=</span><span class="n">DB_HOST</span><span class="p">,</span> <span class="n">port</span><span class="o">=</span><span class="n">DB_PORT</span><span class="p">,</span> <span class="n">database</span><span class="o">=</span><span class="n">DB_NAME</span><span class="p">,</span> <span class="n">user</span><span class="o">=</span><span class="n">DB_USER</span><span class="p">,</span> <span class="n">password</span><span class="o">=</span><span class="n">DB_PASS</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">get_1w_of_feed_byid</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="nb">id</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">:</span>
<span class="n">cur</span> <span class="o">=</span> <span class="bp">self</span><span class="p">.</span><span class="n">dbe</span><span class="p">.</span><span class="n">cursor</span><span class="p">()</span>
<span class="n">cur</span><span class="p">.</span><span class="n">execute</span><span class="p">(</span><span class="s">'SELECT content FROM public.ttrss_entries </span><span class="se">\
</span><span class="s"> where date_updated &gt; now() - interval </span><span class="se">\'</span><span class="s">1 week</span><span class="se">\'</span><span class="s"> AND id in ( </span><span class="se">\
</span><span class="s"> select int_id from DB_TABLE_NAME </span><span class="se">\
</span><span class="s"> where feed_id='</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="nb">id</span><span class="p">)</span> <span class="o">+</span> <span class="s">' </span><span class="se">\
</span><span class="s"> ) </span><span class="se">\
</span><span class="s"> ORDER BY id ASC '</span>
<span class="p">)</span>
<span class="n">rows</span> <span class="o">=</span> <span class="n">cur</span><span class="p">.</span><span class="n">fetchall</span><span class="p">()</span>
<span class="k">return</span> <span class="n">rows</span>
</code></pre></div></div>
<p>Most arguments are intuitive and easy to understand. The only exception is argument of function <em>get_1w_of_feed_byid</em>. This <strong>id</strong> is the feed index of my subscriptions.</p>
<h2 id="tokenize-with-frequency">Tokenize with frequency</h2>
<p>Two popular tokenization library were used, and I chose <a href="https://github.com/fxsjy/jieba">jieba</a> after a few comparison. Before cutting the sentence, we first need to remove all punctuation marks.</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">def</span> <span class="nf">remove_biaodian</span><span class="p">(</span><span class="n">text</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="n">punct</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="sa">u</span><span class="s">''':!),.:;?]}¢'"、。〉》」』】〕〗〞︰︱︳﹐、﹒
﹔﹕﹖﹗﹚﹜﹞!),.:;?|}︴︶︸︺︼︾﹀﹂﹄﹏、~¢
々‖•·ˇˉ―--′’”([{£¥'"‵〈《「『【〔〖([{£¥〝︵︷︹︻
︽︿﹁﹃﹙﹛﹝({“‘-—_…'''</span><span class="p">)</span>
<span class="n">ret</span> <span class="o">=</span> <span class="s">""</span>
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">text</span><span class="p">:</span>
<span class="k">if</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">punct</span><span class="p">:</span>
<span class="n">ret</span> <span class="o">+=</span> <span class="s">''</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">ret</span> <span class="o">+=</span> <span class="n">x</span>
<span class="k">return</span> <span class="n">ret</span>
</code></pre></div></div>
<p>After we have an all characters string, we can call jieba. By using the function <em>jieba.posseg.cut</em> with or without paddle, we can have a word list and their “part of speech”. As you can see in the following code, I also did two more works.</p>
<p>First, in the if statement, I only kept all nouns with some categories. Category abbreviation such as “nr” and “ns” represent different “part of speech”, I attached with categories I used in the following table. For more details you can find in this <a href="https://github.com/fxsjy/jieba">link</a>.</p>
<p>The second work is only keeping words with length longer than 2 characters. In Chinese, theres no space between words such as Latin writing systems. Since then, some single-character-words such as conjunction words are easy to be misrecognized as specialty-noun. And this misrecognition will cause more single-character being regarded as specialty-noun. I am not able to improve NLP method, so I used a easy way to fix this by removing any words less than 2 characters.</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kn">import</span> <span class="nn">jieba.posseg</span> <span class="k">as</span> <span class="n">pseg</span>
<span class="k">def</span> <span class="nf">get_noun_jieba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">content</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">:</span>
<span class="n">content</span> <span class="o">=</span> <span class="n">remove_biaodian</span><span class="p">(</span><span class="n">content</span><span class="p">)</span>
<span class="n">words</span> <span class="o">=</span> <span class="n">pseg</span><span class="p">.</span><span class="n">cut</span><span class="p">(</span><span class="n">content</span><span class="p">)</span> <span class="c1"># Invoking jieba.posseg.cut function
</span>
<span class="n">ret</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">word</span><span class="p">,</span> <span class="n">flag</span> <span class="ow">in</span> <span class="n">words</span><span class="p">:</span>
<span class="c1"># print(word, flag)
</span> <span class="k">if</span> <span class="n">flag</span> <span class="ow">in</span> <span class="p">[</span><span class="s">'nr'</span><span class="p">,</span> <span class="s">'ns'</span><span class="p">,</span> <span class="s">'nt'</span><span class="p">,</span> <span class="s">'nw'</span><span class="p">,</span> <span class="s">'nz'</span><span class="p">,</span> <span class="s">'PER'</span><span class="p">,</span> <span class="s">'ORG'</span><span class="p">,</span> <span class="s">'x'</span><span class="p">]:</span> <span class="c1"># LOC
</span> <span class="n">ret</span><span class="p">.</span><span class="n">append</span><span class="p">(</span><span class="n">word</span><span class="p">)</span>
<span class="k">return</span> <span class="p">[</span><span class="n">remove_biaodian</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">ret</span> <span class="k">if</span> <span class="n">i</span><span class="p">.</span><span class="n">strip</span><span class="p">()</span> <span class="o">!=</span> <span class="s">""</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">remove_biaodian</span><span class="p">(</span><span class="n">i</span><span class="p">.</span><span class="n">strip</span><span class="p">()))</span> <span class="o">&gt;=</span> <span class="mi">2</span><span class="p">]</span>
</code></pre></div></div>
<ul>
<li>Word category names and abbreviations</li>
</ul>
<table>
<thead>
<tr>
<th>Abbreviation</th>
<th>Category name/ Part of speech</th>
</tr>
</thead>
<tbody>
<tr>
<td>nr</td>
<td>People name noun</td>
</tr>
<tr>
<td>ns</td>
<td>Location name noun</td>
</tr>
<tr>
<td>nt</td>
<td>Organization name noun</td>
</tr>
<tr>
<td>nw</td>
<td>Arts work noun</td>
</tr>
<tr>
<td>nz</td>
<td>Other noun</td>
</tr>
<tr>
<td>PER</td>
<td>People name noun</td>
</tr>
<tr>
<td>ORG</td>
<td>Location name noun</td>
</tr>
<tr>
<td>x</td>
<td>Non-morpheme word</td>
</tr>
</tbody>
</table>
<p>With all words extracted, we can easily calculate their frequencies. After this, we can using the following line of code to print a sorted result to verify correctness.</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">noun</span> <span class="o">=</span> <span class="n">seg</span><span class="p">.</span><span class="n">get_noun_jieba</span><span class="p">(</span><span class="n">test_content</span><span class="p">)</span>
<span class="c1"># ... Calculate frequency of above word list ...
</span><span class="k">print</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">a_dict</span><span class="p">.</span><span class="n">items</span><span class="p">(),</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span>
</code></pre></div></div>
<h2 id="draw-word-cloud">Draw word cloud</h2>
<p>With a keyword and frequency dictionary(data structure), we can just call built-in functions from wordcloud library to generate the figure.</p>
<p>First we need to initialize an instance of wordcloud class. As you can see in my code, I set it with 6 parameters. Width and Height of the canvas, maximum amount of words used to generate the figure, the font of words, background color and margin between any two words.</p>
<p>After having the instance, we call function <em>generate_from_frequencies</em> and pass keyword dictionary to it. The return value of this function is an bitmap image, which we can use <a href="https://matplotlib.org/">matplotlib</a> to plot it to your screen.</p>
<p>I tested my plot on ubuntu-subsystem on Windows 10, unfortunately matplotlib under subsystem depends on x11 window manager and its not default available on windows. We need to install an x11 manager to support. <a href="https://sourceforge.net/projects/xming/">Xming</a> is the one I used.</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kn">from</span> <span class="nn">wordcloud</span> <span class="kn">import</span> <span class="n">WordCloud</span>
<span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="n">plt</span>
<span class="n">font_path</span> <span class="o">=</span> <span class="s">"./font/haipai.ttf"</span>
<span class="n">output_path</span> <span class="o">=</span> <span class="s">"./font/out.png"</span>
<span class="k">def</span> <span class="nf">show_figure_with_frequency</span><span class="p">(</span><span class="n">keywords</span><span class="p">:</span> <span class="nb">dict</span><span class="p">):</span>
<span class="n">wc</span> <span class="o">=</span> <span class="n">WordCloud</span><span class="p">(</span><span class="n">width</span><span class="o">=</span><span class="mi">828</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span><span class="mi">1792</span><span class="p">,</span> <span class="n">max_words</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">font_path</span><span class="o">=</span><span class="n">font_path</span><span class="p">,</span>
<span class="n">background_color</span><span class="o">=</span><span class="s">"white"</span><span class="p">,</span> <span class="n">margin</span><span class="o">=</span><span class="mi">1</span><span class="p">).</span><span class="n">generate_from_frequencies</span><span class="p">(</span><span class="n">keywords</span><span class="p">)</span>
<span class="n">plt</span><span class="p">.</span><span class="n">imshow</span><span class="p">(</span><span class="n">wc</span><span class="p">)</span>
<span class="n">plt</span><span class="p">.</span><span class="n">axis</span><span class="p">(</span><span class="s">'off'</span><span class="p">)</span>
<span class="n">plt</span><span class="p">.</span><span class="n">show</span><span class="p">()</span>
</code></pre></div></div>
<p>If everything work fine, a word cloud figure will show up in a new window. My version looks like this.</p>
<p><img src="/static/2020-09/2020-06-28.png" height="150" /></p>
<p>This generated word cloud figure reflects the most popular economy news keyword in the week started 06-28-2020. Two largest words in the figure are “新冠” and “新冠病毒”, both means “Covid-19” (This figure was in the week of the second covid spur in Beijing, China). The size of the image fits my phone screen and I can use an app to automatic sync it to my phones wallpaper. However, in this image, too many location nouns are presented. This will be something I can make progress on in the future.</p>
</div><section class="article__sharing d-print-none"></section><div class="d-print-none"><footer class="article__footer"><meta itemprop="dateModified" content="2020-09-15T22:00:14-04:00"><!-- start custom article footer snippet -->
<!-- end custom article footer snippet -->
<div class="article__subscribe"><div class="subscribe"><i class="fas fa-rss"></i> <a type="application/rss+xml" href="/feed.xml">Subscribe</a></div>
</div><div class="article__license"></div></footer>
<div class="article__section-navigator clearfix"><div class="previous"><span>PREVIOUS</span><a href="/posts/intro-xv6">Xv6 introduction</a></div><div class="next"><span>NEXT</span><a href="/posts/eddl-how-do-we-train-on-limited-edge-devices">EDDL: How do we train neural networks on limited edge devices - PART 1</a></div></div></div>
</div>
<script>(function() {
var SOURCES = window.TEXT_VARIABLES.sources;
window.Lazyload.js(SOURCES.jquery, function() {
$(function() {
var $this ,$scroll;
var $articleContent = $('.js-article-content');
var hasSidebar = $('.js-page-root').hasClass('layout--page--sidebar');
var scroll = hasSidebar ? '.js-page-main' : 'html, body';
$scroll = $(scroll);
$articleContent.find('.highlight').each(function() {
$this = $(this);
$this.attr('data-lang', $this.find('code').attr('data-lang'));
});
$articleContent.find('h1[id], h2[id], h3[id], h4[id], h5[id], h6[id]').each(function() {
$this = $(this);
$this.append($('<a class="anchor d-print-none" aria-hidden="true"></a>').html('<i class="fas fa-anchor"></i>'));
});
$articleContent.on('click', '.anchor', function() {
$scroll.scrollToAnchor('#' + $(this).parent().attr('id'), 400);
});
});
});
})();
</script>
</div><section class="page__comments d-print-none"><div id="disqus_thread"></div>
<script>
/**
* RECOMMENDED CONFIGURATION VARIABLES: EDIT AND UNCOMMENT THE SECTION BELOW TO INSERT DYNAMIC VALUES FROM YOUR PLATFORM OR CMS.
* LEARN WHY DEFINING THESE VARIABLES IS IMPORTANT: https://disqus.com/admin/universalcode/#configuration-variables*/
var disqus_config = function () {
this.page.url = 'https://codersherlock.github.com//posts/generate-word-cloud-with-chinese-fenci';
this.page.identifier = '/posts/generate-word-cloud-with-chinese-fenci';
};
(function() { // DON'T EDIT BELOW THIS LINE
var d = document, s = d.createElement('script');
s.src = 'https://codersherlockblog.disqus.com/embed.js';
s.setAttribute('data-timestamp', +new Date());
(d.head || d.body).appendChild(s);
})();
</script>
<noscript>Please enable JavaScript to view the <a href="https://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript></section></article><!-- start custom main bottom snippet -->
<!-- end custom main bottom snippet -->
</div>
</div></div></div><div class="page__footer d-print-none">
<footer class="footer py-4 js-page-footer">
<div class="main"><div itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Pengzhan Hao"><meta itemprop="url" content="/"><div class="footer__author-links"><div class="author-links">
<ul class="menu menu--nowrap menu--inline"><li title="Send me an Email.">
<a class="button button--circle mail-button" itemprop="email" href="mailto:haopengzhan@gmail.com" target="_blank">
<i class="fas fa-envelope"></i>
</a><li title="Follow me on Linkedin.">
<a class="button button--circle linkedin-button" itemprop="sameAs" href="https://www.linkedin.com/in/pengzhanhao" target="_blank">
<div class="icon"><svg fill="#000000" width="24px" height="24px" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg">
<path d="M260.096 155.648c0 27.307008-9.899008 50.516992-29.696 69.632-19.796992 19.115008-45.396992 28.672-76.8 28.672-30.036992 0-54.612992-9.556992-73.728-28.672-19.115008-19.115008-28.672-42.324992-28.672-69.632 0-28.672 9.556992-52.224 28.672-70.656 19.115008-18.432 44.372992-27.648 75.776-27.648 31.403008 0 56.32 9.216 74.752 27.648 18.432 18.432 28.331008 41.984 29.696 70.656 0 0 0 0 0 0m-202.752 808.96c0 0 0-632.832 0-632.832 0 0 196.608 0 196.608 0 0 0 0 632.832 0 632.832 0 0-196.608 0-196.608 0 0 0 0 0 0 0m313.344-430.08c0-58.708992-1.364992-126.292992-4.096-202.752 0 0 169.984 0 169.984 0 0 0 10.24 88.064 10.24 88.064 0 0 4.096 0 4.096 0 40.96-68.267008 105.812992-102.4 194.56-102.4 68.267008 0 123.220992 22.868992 164.864 68.608 41.643008 45.739008 62.464 113.664 62.464 203.776 0 0 0 374.784 0 374.784 0 0-196.608 0-196.608 0 0 0 0-350.208 0-350.208 0-91.476992-33.451008-137.216-100.352-137.216-47.787008 0-81.236992 24.576-100.352 73.728-4.096 8.192-6.144 24.576-6.144 49.152 0 0 0 364.544 0 364.544 0 0-198.656 0-198.656 0 0 0 0-430.08 0-430.08 0 0 0 0 0 0" />
</svg>
</div>
</a>
</li><li title="Follow me on Github.">
<a class="button button--circle github-button" itemprop="sameAs" href="https://github.com/codersherlock" target="_blank">
<div class="icon"><svg fill="#000000" width="24px" height="24px" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg">
<path class="svgpath" data-index="path_0" fill="#272636" d="M0 525.2c0 223.6 143.3 413.7 343 483.5 26.9 6.8 22.8-12.4 22.8-25.4l0-88.7c-155.3 18.2-161.5-84.6-172-101.7-21.1-36-70.8-45.2-56-62.3 35.4-18.2 71.4 4.6 113.1 66.3 30.2 44.7 89.1 37.2 119 29.7 6.5-26.9 20.5-50.9 39.7-69.6C248.8 728.2 181.7 630 181.7 513.2c0-56.6 18.7-108.7 55.3-150.7-23.3-69.3 2.2-128.5 5.6-137.3 66.5-6 135.5 47.6 140.9 51.8 37.8-10.2 80.9-15.6 129.1-15.6 48.5 0 91.8 5.6 129.8 15.9 12.9-9.8 77-55.8 138.8-50.2 3.3 8.8 28.2 66.7 6.3 135 37.1 42.1 56 94.6 56 151.4 0 117-67.5 215.3-228.8 243.7 26.9 26.6 43.6 63.4 43.6 104.2l0 128.8c0.9 10.3 0 20.5 17.2 20.5C878.1 942.4 1024 750.9 1024 525.3c0-282.9-229.3-512-512-512C229.1 13.2 0 242.3 0 525.2L0 525.2z" />
</svg>
</div>
</a>
</li></ul>
</div>
</div>
</div><div class="site-info mt-2">
<div>© Stop Talking, Start Doing 2021,
Powered by <a title="Jekyll is a simple, blog-aware, static site generator." href="http://jekyllrb.com/">Jekyll</a> & <a
title="TeXt is a super customizable Jekyll theme." href="https://github.com/kitian616/jekyll-TeXt-theme">TeXt Theme</a>.
</div>
</div>
</div>
</footer>
</div></div>
</div><script>(function() {
var SOURCES = window.TEXT_VARIABLES.sources;
window.Lazyload.js(SOURCES.jquery, function() {
var $body = $('body'), $window = $(window);
var $pageRoot = $('.js-page-root'), $pageMain = $('.js-page-main');
var activeCount = 0;
function modal(options) {
var $root = this, visible, onChange, hideWhenWindowScroll = false;
var scrollTop;
function setOptions(options) {
var _options = options || {};
visible = _options.initialVisible === undefined ? false : show;
onChange = _options.onChange;
hideWhenWindowScroll = _options.hideWhenWindowScroll;
}
function init() {
setState(visible);
}
function setState(isShow) {
if (isShow === visible) {
return;
}
visible = isShow;
if (visible) {
activeCount++;
scrollTop = $(window).scrollTop() || $pageMain.scrollTop();
$root.addClass('modal--show');
$pageMain.scrollTop(scrollTop);
activeCount === 1 && ($pageRoot.addClass('show-modal'), $body.addClass('of-hidden'));
hideWhenWindowScroll && window.hasEvent('touchstart') && $window.on('scroll', hide);
$window.on('keyup', handleKeyup);
} else {
activeCount > 0 && activeCount--;
$root.removeClass('modal--show');
$window.scrollTop(scrollTop);
activeCount === 0 && ($pageRoot.removeClass('show-modal'), $body.removeClass('of-hidden'));
hideWhenWindowScroll && window.hasEvent('touchstart') && $window.off('scroll', hide);
$window.off('keyup', handleKeyup);
}
onChange && onChange(visible);
}
function show() {
setState(true);
}
function hide() {
setState(false);
}
function handleKeyup(e) {
// Char Code: 27 ESC
if (e.which === 27) {
hide();
}
}
setOptions(options);
init();
return {
show: show,
hide: hide,
$el: $root
};
}
$.fn.modal = modal;
});
})();
</script><div class="modal modal--overflow page__search-modal d-print-none js-page-search-modal"><script>
(function () {
var SOURCES = window.TEXT_VARIABLES.sources;
window.Lazyload.js(SOURCES.jquery, function() {
// search panel
var search = (window.search || (window.search = {}));
var useDefaultSearchBox = window.useDefaultSearchBox === undefined ?
true : window.useDefaultSearchBox ;
var $searchModal = $('.js-page-search-modal');
var $searchToggle = $('.js-search-toggle');
var searchModal = $searchModal.modal({ onChange: handleModalChange, hideWhenWindowScroll: true });
var modalVisible = false;
search.searchModal = searchModal;
var $searchBox = null;
var $searchInput = null;
var $searchClear = null;
function getModalVisible() {
return modalVisible;
}
search.getModalVisible = getModalVisible;
function handleModalChange(visible) {
modalVisible = visible;
if (visible) {
search.onShow && search.onShow();
useDefaultSearchBox && $searchInput[0] && $searchInput[0].focus();
} else {
search.onShow && search.onHide();
useDefaultSearchBox && $searchInput[0] && $searchInput[0].blur();
setTimeout(function() {
useDefaultSearchBox && ($searchInput.val(''), $searchBox.removeClass('not-empty'));
search.clear && search.clear();
window.pageAsideAffix && window.pageAsideAffix.refresh();
}, 400);
}
}
$searchToggle.on('click', function() {
modalVisible ? searchModal.hide() : searchModal.show();
});
// Char Code: 83 S, 191 /
$(window).on('keyup', function(e) {
if (!modalVisible && !window.isFormElement(e.target || e.srcElement) && (e.which === 83 || e.which === 191)) {
modalVisible || searchModal.show();
}
});
if (useDefaultSearchBox) {
$searchBox = $('.js-search-box');
$searchInput = $searchBox.children('input');
$searchClear = $searchBox.children('.js-icon-clear');
search.getSearchInput = function() {
return $searchInput.get(0);
};
search.getVal = function() {
return $searchInput.val();
};
search.setVal = function(val) {
$searchInput.val(val);
};
$searchInput.on('focus', function() {
$(this).addClass('focus');
});
$searchInput.on('blur', function() {
$(this).removeClass('focus');
});
$searchInput.on('input', window.throttle(function() {
var val = $(this).val();
if (val === '' || typeof val !== 'string') {
search.clear && search.clear();
} else {
$searchBox.addClass('not-empty');
search.onInputNotEmpty && search.onInputNotEmpty(val);
}
}, 400));
$searchClear.on('click', function() {
$searchInput.val(''); $searchBox.removeClass('not-empty');
search.clear && search.clear();
});
}
});
})();
</script><div class="search search--dark">
<div class="main">
<div class="search__header">Search</div>
<div class="search-bar">
<div class="search-box js-search-box">
<div class="search-box__icon-search"><i class="fas fa-search"></i></div>
<input type="text" />
<div class="search-box__icon-clear js-icon-clear">
<a><i class="fas fa-times"></i></a>
</div>
</div>
<button class="button button--theme-dark button--pill search__cancel js-search-toggle">
Cancel</button>
</div>
<div class="search-result js-search-result"></div>
</div>
</div>
<script>var SOURCES = window.TEXT_VARIABLES.sources;
var PAHTS = window.TEXT_VARIABLES.paths;
window.Lazyload.js([SOURCES.jquery, PAHTS.search_js], function() {
var search = (window.search || (window.search = {}));
var searchData = window.TEXT_SEARCH_DATA || {};
function memorize(f) {
var cache = {};
return function () {
var key = Array.prototype.join.call(arguments, ',');
if (key in cache) return cache[key];
else return cache[key] = f.apply(this, arguments);
};
}
/// search
function searchByQuery(query) {
var i, j, key, keys, cur, _title, result = {};
keys = Object.keys(searchData);
for (i = 0; i < keys.length; i++) {
key = keys[i];
for (j = 0; j < searchData[key].length; j++) {
cur = searchData[key][j], _title = cur.title;
if ((result[key] === undefined || result[key] && result[key].length < 4 )
&& _title.toLowerCase().indexOf(query.toLowerCase()) >= 0) {
if (result[key] === undefined) {
result[key] = [];
}
result[key].push(cur);
}
}
}
return result;
}
var renderHeader = memorize(function(header) {
return $('<p class="search-result__header">' + header + '</p>');
});
var renderItem = function(index, title, url) {
return $('<li class="search-result__item" data-index="' + index + '"><a class="button" href="' + url + '">' + title + '</a></li>');
};
function render(data) {
if (!data) { return null; }
var $root = $('<ul></ul>'), i, j, key, keys, cur, itemIndex = 0;
keys = Object.keys(data);
for (i = 0; i < keys.length; i++) {
key = keys[i];
$root.append(renderHeader(key));
for (j = 0; j < data[key].length; j++) {
cur = data[key][j];
$root.append(renderItem(itemIndex++, cur.title, cur.url));
}
}
return $root;
}
// search box
var $result = $('.js-search-result'), $resultItems;
var lastActiveIndex, activeIndex;
function clear() {
$result.html(null);
$resultItems = $('.search-result__item'); activeIndex = 0;
}
function onInputNotEmpty(val) {
$result.html(render(searchByQuery(val)));
$resultItems = $('.search-result__item'); activeIndex = 0;
$resultItems.eq(0).addClass('active');
}
search.clear = clear;
search.onInputNotEmpty = onInputNotEmpty;
function updateResultItems() {
lastActiveIndex >= 0 && $resultItems.eq(lastActiveIndex).removeClass('active');
activeIndex >= 0 && $resultItems.eq(activeIndex).addClass('active');
}
function moveActiveIndex(direction) {
var itemsCount = $resultItems ? $resultItems.length : 0;
if (itemsCount > 1) {
lastActiveIndex = activeIndex;
if (direction === 'up') {
activeIndex = (activeIndex - 1 + itemsCount) % itemsCount;
} else if (direction === 'down') {
activeIndex = (activeIndex + 1 + itemsCount) % itemsCount;
}
updateResultItems();
}
}
// Char Code: 13 Enter, 37 ⬅, 38 ⬆, 39 ➡, 40 ⬇
$(window).on('keyup', function(e) {
var modalVisible = search.getModalVisible && search.getModalVisible();
if (modalVisible) {
if (e.which === 38) {
modalVisible && moveActiveIndex('up');
} else if (e.which === 40) {
modalVisible && moveActiveIndex('down');
} else if (e.which === 13) {
modalVisible && $resultItems && activeIndex >= 0 && $resultItems.eq(activeIndex).children('a')[0].click();
}
}
});
$result.on('mouseover', '.search-result__item > a', function() {
var itemIndex = $(this).parent().data('index');
itemIndex >= 0 && (lastActiveIndex = activeIndex, activeIndex = itemIndex, updateResultItems());
});
});
</script>
</div></div>
<script>(function() {
var SOURCES = window.TEXT_VARIABLES.sources;
window.Lazyload.js(SOURCES.jquery, function() {
function scrollToAnchor(anchor, duration, callback) {
var $root = this;
$root.animate({ scrollTop: $(anchor).position().top }, duration, function() {
window.history.replaceState(null, '', window.location.href.split('#')[0] + anchor);
callback && callback();
});
}
$.fn.scrollToAnchor = scrollToAnchor;
});
})();
(function() {
var SOURCES = window.TEXT_VARIABLES.sources;
window.Lazyload.js(SOURCES.jquery, function() {
function affix(options) {
var $root = this, $window = $(window), $scrollTarget, $scroll,
offsetBottom = 0, scrollTarget = window, scroll = window.document, disabled = false, isOverallScroller = true,
rootTop, rootLeft, rootHeight, scrollBottom, rootBottomTop,
hasInit = false, curState;
function setOptions(options) {
var _options = options || {};
_options.offsetBottom && (offsetBottom = _options.offsetBottom);
_options.scrollTarget && (scrollTarget = _options.scrollTarget);
_options.scroll && (scroll = _options.scroll);
_options.disabled !== undefined && (disabled = _options.disabled);
$scrollTarget = $(scrollTarget);
isOverallScroller = window.isOverallScroller($scrollTarget[0]);
$scroll = $(scroll);
}
function preCalc() {
top();
rootHeight = $root.outerHeight();
rootTop = $root.offset().top + (isOverallScroller ? 0 : $scrollTarget.scrollTop());
rootLeft = $root.offset().left;
}
function calc(needPreCalc) {
needPreCalc && preCalc();
scrollBottom = $scroll.outerHeight() - offsetBottom - rootHeight;
rootBottomTop = scrollBottom - rootTop;
}
function top() {
if (curState !== 'top') {
$root.removeClass('fixed').css({
left: 0,
top: 0
});
curState = 'top';
}
}
function fixed() {
if (curState !== 'fixed') {
$root.addClass('fixed').css({
left: rootLeft + 'px',
top: 0
});
curState = 'fixed';
}
}
function bottom() {
if (curState !== 'bottom') {
$root.removeClass('fixed').css({
left: 0,
top: rootBottomTop + 'px'
});
curState = 'bottom';
}
}
function setState() {
var scrollTop = $scrollTarget.scrollTop();
if (scrollTop >= rootTop && scrollTop <= scrollBottom) {
fixed();
} else if (scrollTop < rootTop) {
top();
} else {
bottom();
}
}
function init() {
if(!hasInit) {
var interval, timeout;
calc(true); setState();
// run calc every 100 millisecond
interval = setInterval(function() {
calc();
}, 100);
timeout = setTimeout(function() {
clearInterval(interval);
}, 45000);
window.pageLoad.then(function() {
setTimeout(function() {
clearInterval(interval);
clearTimeout(timeout);
}, 3000);
});
$scrollTarget.on('scroll', function() {
disabled || setState();
});
$window.on('resize', function() {
disabled || (calc(true), setState());
});
hasInit = true;
}
}
setOptions(options);
if (!disabled) {
init();
}
$window.on('resize', window.throttle(function() {
init();
}, 200));
return {
setOptions: setOptions,
refresh: function() {
calc(true, { animation: false }); setState();
}
};
}
$.fn.affix = affix;
});
})();
(function() {
var SOURCES = window.TEXT_VARIABLES.sources;
window.Lazyload.js(SOURCES.jquery, function() {
function toc(options) {
var $root = this, $window = $(window), $scrollTarget, $scroller, $tocUl = $('<ul class="toc toc--ellipsis"></ul>'), $tocLi, $headings, $activeLast, $activeCur,
selectors = 'h1,h2,h3', container = 'body', scrollTarget = window, scroller = 'html, body', disabled = false,
headingsPos, scrolling = false, hasRendered = false, hasInit = false;
function setOptions(options) {
var _options = options || {};
_options.selectors && (selectors = _options.selectors);
_options.container && (container = _options.container);
_options.scrollTarget && (scrollTarget = _options.scrollTarget);
_options.scroller && (scroller = _options.scroller);
_options.disabled !== undefined && (disabled = _options.disabled);
$headings = $(container).find(selectors).filter('[id]');
$scrollTarget = $(scrollTarget);
$scroller = $(scroller);
}
function calc() {
headingsPos = [];
$headings.each(function() {
headingsPos.push(Math.floor($(this).position().top));
});
}
function setState(element, disabled) {
var scrollTop = $scrollTarget.scrollTop(), i;
if (disabled || !headingsPos || headingsPos.length < 1) { return; }
if (element) {
$activeCur = element;
} else {
for (i = 0; i < headingsPos.length; i++) {
if (scrollTop >= headingsPos[i]) {
$activeCur = $tocLi.eq(i);
} else {
$activeCur || ($activeCur = $tocLi.eq(i));
break;
}
}
}
$activeLast && $activeLast.removeClass('active');
($activeLast = $activeCur).addClass('active');
}
function render() {
if(!hasRendered) {
$root.append($tocUl);
$headings.each(function() {
var $this = $(this);
$tocUl.append($('<li></li>').addClass('toc-' + $this.prop('tagName').toLowerCase())
.append($('<a></a>').text($this.text()).attr('href', '#' + $this.prop('id'))));
});
$tocLi = $tocUl.children('li');
$tocUl.on('click', 'a', function(e) {
e.preventDefault();
var $this = $(this);
scrolling = true;
setState($this.parent());
$scroller.scrollToAnchor($this.attr('href'), 400, function() {
scrolling = false;
});
});
}
hasRendered = true;
}
function init() {
var interval, timeout;
if(!hasInit) {
render(); calc(); setState(null, scrolling);
// run calc every 100 millisecond
interval = setInterval(function() {
calc();
}, 100);
timeout = setTimeout(function() {
clearInterval(interval);
}, 45000);
window.pageLoad.then(function() {
setTimeout(function() {
clearInterval(interval);
clearTimeout(timeout);
}, 3000);
});
$scrollTarget.on('scroll', function() {
disabled || setState(null, scrolling);
});
$window.on('resize', window.throttle(function() {
if (!disabled) {
render(); calc(); setState(null, scrolling);
}
}, 100));
}
hasInit = true;
}
setOptions(options);
if (!disabled) {
init();
}
$window.on('resize', window.throttle(function() {
init();
}, 200));
return {
setOptions: setOptions
};
}
$.fn.toc = toc;
});
})();
/*(function () {
})();*/
</script><script>
/* toc must before affix, since affix need to konw toc' height. */(function() {
var SOURCES = window.TEXT_VARIABLES.sources;
var TOC_SELECTOR = window.TEXT_VARIABLES.site.toc.selectors;
window.Lazyload.js(SOURCES.jquery, function() {
var $window = $(window);
var $articleContent = $('.js-article-content');
var $tocRoot = $('.js-toc-root'), $col2 = $('.js-col-aside');
var toc;
var tocDisabled = false;
var hasSidebar = $('.js-page-root').hasClass('layout--page--sidebar');
var hasToc = $articleContent.find(TOC_SELECTOR).length > 0;
function disabled() {
return $col2.css('display') === 'none' || !hasToc;
}
tocDisabled = disabled();
toc = $tocRoot.toc({
selectors: TOC_SELECTOR,
container: $articleContent,
scrollTarget: hasSidebar ? '.js-page-main' : null,
scroller: hasSidebar ? '.js-page-main' : null,
disabled: tocDisabled
});
$window.on('resize', window.throttle(function() {
tocDisabled = disabled();
toc && toc.setOptions({
disabled: tocDisabled
});
}, 100));
});
})();
(function() {
var SOURCES = window.TEXT_VARIABLES.sources;
window.Lazyload.js(SOURCES.jquery, function() {
var $window = $(window), $pageFooter = $('.js-page-footer');
var $pageAside = $('.js-page-aside');
var affix;
var tocDisabled = false;
var hasSidebar = $('.js-page-root').hasClass('layout--page--sidebar');
affix = $pageAside.affix({
offsetBottom: $pageFooter.outerHeight(),
scrollTarget: hasSidebar ? '.js-page-main' : null,
scroller: hasSidebar ? '.js-page-main' : null,
scroll: hasSidebar ? $('.js-page-main').children() : null,
disabled: tocDisabled
});
$window.on('resize', window.throttle(function() {
affix && affix.setOptions({
disabled: tocDisabled
});
}, 100));
window.pageAsideAffix = affix;
});
})();
</script>
</div>
<script>(function () {
var $root = document.getElementsByClassName('root')[0];
if (window.hasEvent('touchstart')) {
$root.dataset.isTouch = true;
document.addEventListener('touchstart', function(){}, false);
}
})();
</script>
</body>
</html>