import jieba def merge_synonyms(words, synonyms): merged_words = [] for word in words: for synonym_list in synonyms: if word in synonym_list: merged_words.append(synonym_list[0]) break else: merged_words.append(word) return merged_words def remove_stopwords(words, stopwords): # 去除停用词 return [word for word in words if word not in stopwords] def main(): with open("E:\\示例文本文件\\西游记.txt", "r", encoding='GB2312', errors='ignore') as file: text = file.read() words = list(jieba.cut(text, cut_all=False)) synonyms = [ ['孙悟空', '孙猴子'], ] stopwords = [ '和', '在', '是', '的', '他', '她', '它', '了', '这', '那', '你', '我' ] merged_words = merge_synonyms(words, synonyms) filtered_words = remove_stopwords(merged_words, stopwords) word_freq = {} for word in filtered_words: if len(word) > 1: word_freq[word] = word_freq.get(word, 0) + 1 sorted_word_freq = sorted(word_freq.items(), key=lambda x: x[1], reverse=True) for i in range(min(20, len(sorted_word_freq))): k, v = sorted_word_freq[i] print("{} {}".format(k, v)) if __name__ == "__main__": main()