123456789101112131415161718192021222324 |
- import jieba
- from simhash import Simhash
- words1 = jieba.lcut('我很想要打游戏,但是女朋友会生气!', cut_all=True)
- words2 = jieba.lcut('我很想要打游戏,但是女朋友非常生气!', cut_all=True)
- # print(Simhash(words1).distance(Simhash(words2)))
- print(Simhash(words1).value)
- print(Simhash(words2).value)
- #输出:6,因为短文本使用simhash的话,文字稍微有些改动,还是挺明显的,大家可以用长文本尝试
- 1495213811346268772
- 351422926174413540
- 1495213811346268772
- 1504362022967304932
- 5oiR5b6I5oOz6KaB5omT5ri45oiP77yM5L2G5piv5aWz5pyL5Y+L5Lya55Sf5rCU77yB
- 5oiR5b6I5oOz6KaB5omT5ri45oiP77yM5L2G5piv5aWz5pyL5Y+L6Z2e5bi455Sf5rCU77yB
|