直接上代码
def n_grams(s, n): # 计算分词后的n-gram s:list tokens n: gram num result = [] for i in range(len(s)-n+1): res = "".join(s[i:i+n]) result.append(res) return result for each in data[:10].iterrows(): item = each[1] title = item[2] for each_query in str(item[3]).split(','): tokens = [] term = lac.run(each_query) print(term) for i in range(1,len(term)+1): tokens.extend(n_grams(term, i)) print(tokens) print()
结果展示: