from os import path
import jieba
from wordcloud import WordCloud
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
def handle_data():
"""
处理文本数据
:return:
"""
# 读取数据
with open("data.txt", "r") as f:
txt = f.read()
# 去除无效数据
re_move = [",", "。", " ", '\n', '\xa0']
for i in re_move:
txt = txt.replace(i, " ")
# 使用精确分词模式
word = jieba.lcut(txt)
# 保存数据
with open("data_handled.txt", 'w') as file:
for i in word:
file.write(str(i) + ' ')
def generate_image():
"""
生成图片
:return:
"""
# 读取数据
with open("data_handled.txt", "r") as file:
txt = file.read()
# 图片路径
d = path.dirname(__file__)
# 生成mask
mask = np.array(Image.open(path.join(d, "mask.jpg")))
# 生成word
word = WordCloud(
background_color="white",
width=800,
height=800,
mask=mask,
# 字体路径,WordCloud默认不支持中文,这里的SimHei.ttf需要下载放到系统字体库目录下
font_path='SimHei.ttf'
).generate(txt)
# 保存图片
word.to_file('world_cloud.png')
# 使用plt库显示图片
plt.imshow(word)
plt.axis("off")
plt.show()
if __name__ == '__main__':
handle_data()
generate_image()
作者:〃唯美划指边
链接:https://juejin.cn/post/7008833196750012452
来源:掘金
著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。