elasticsearch系统学习笔记5-中文分词器
https://github.com/medcl/elasticsearch-analysis-ik
下载地址 https://github.com/medcl/elasticsearch-analysis-ik/releases
本机下载 elasticsearch-analysis-ik-6.3.2.zip
目录名改为 analysis-ik
// 3. 将文件夹 analysis-ik/config 文件夹下的内容移动到 {ES_HOME}/config 目录下
将解压后的目录移动到 {ES_HOME}/plugins 目录下
重启 ES 服务
启动窗口加载日志会多出一些加载插件的信息:[YABPFPe] loaded plugin [analysis-ik]
测试:
GET /_analyze { "text": "中华人民共和国国歌", "analyzer": "ik_smart" } { "tokens": [ { "token": "中华人民共和国", "start_offset": 0, "end_offset": 7, "type": "CN_WORD", "position": 0 }, { "token": "国歌", "start_offset": 7, "end_offset": 9, "type": "CN_WORD", "position": 1 } ] }
GET /_analyze { "text": "中华人民共和国国歌", "analyzer": "ik_max_word" } { "tokens": [ { "token": "中华人民共和国", "start_offset": 0, "end_offset": 7, "type": "CN_WORD", "position": 0 }, { "token": "中华人民", "start_offset": 0, "end_offset": 4, "type": "CN_WORD", "position": 1 }, { "token": "中华", "start_offset": 0, "end_offset": 2, "type": "CN_WORD", "position": 2 }, { "token": "华人", "start_offset": 1, "end_offset": 3, "type": "CN_WORD", "position": 3 }, { "token": "人民共和国", "start_offset": 2, "end_offset": 7, "type": "CN_WORD", "position": 4 }, { "token": "人民", "start_offset": 2, "end_offset": 4, "type": "CN_WORD", "position": 5 }, { "token": "共和国", "start_offset": 4, "end_offset": 7, "type": "CN_WORD", "position": 6 }, { "token": "共和", "start_offset": 4, "end_offset": 6, "type": "CN_WORD", "position": 7 }, { "token": "国", "start_offset": 6, "end_offset": 7, "type": "CN_CHAR", "position": 8 }, { "token": "国歌", "start_offset": 7, "end_offset": 9, "type": "CN_WORD", "position": 9 } ] }