import jieba from simhash import Simhash words1 = jieba.lcut('我很想要打游戏,但是女朋友会生气!', cut_all=True) words2 = jieba.lcut('我很想要打游戏,但是女朋友非常生气!', cut_all=True) # print(Simhash(words1).distance(Simhash(words2))) print(Simhash(words1).value) print(Simhash(words2).value) #输出:6,因为短文本使用simhash的话,文字稍微有些改动,还是挺明显的,大家可以用长文本尝试 1495213811346268772 351422926174413540 1495213811346268772 1504362022967304932 5oiR5b6I5oOz6KaB5omT5ri45oiP77yM5L2G5piv5aWz5pyL5Y+L5Lya55Sf5rCU77yB 5oiR5b6I5oOz6KaB5omT5ri45oiP77yM5L2G5piv5aWz5pyL5Y+L6Z2e5bi455Sf5rCU77yB