Advertisement

京东评论词云图解决方案

阅读量:

集成了网上几个成熟的词云图方案而构造的一个比较靠谱的。

结构:

在任意一空文件夹中放入main.pymain2.py

以及simhei.ttf(自选字体,可改名,代码自改),stop.txt(停用词),001.png(词云用背景框架)

main.py内容:

复制代码
 import requests

    
  
    
  
    
 def get_comments(product_id, sort_type, page, page_size):
    
     # 根据用户选择的好评或差评,设置相应的评分
    
     score = '3' if sort_type == '好评' else '1'
    
  
    
     # 构建获取评论的URL
    
     url = f"https://api.m.jd.com/?appid=item-v3&functionId=pc_club_productPageComments&client=pc&clientVersion=1.0.0&t=1710622550689&loginType=3&uuid=181111935.1997641277.1707936015.1710416671.1710620660.8&productId={product_id}&score={score}&sortType=5&page={page}&pageSize={page_size}&isShadowSku=0&fold=1&bbtf=&shield="
    
     try:
    
     response = requests.get(url=url)
    
     # 检查响应状态码
    
     if response.status_code == 200:
    
         json_data = response.json()
    
         if 'comments' in json_data:
    
             return json_data['comments']
    
         else:
    
             return []
    
     else:
    
         print(f"请求失败,状态码:{response.status_code}")
    
         return []
    
     except requests.exceptions.RequestException as e:
    
     # 打印错误信息
    
     print(f"请求异常:{e}")
    
     return []
    
  
    
  
    
 def filter_comments(comments):
    
     return [comment for comment in comments if '此用户未填写评价内容' not in comment['content']]
    
  
    
 def save_comments_to_file(comments, filename):
    
     with open(filename, 'w', encoding='utf-8') as file:
    
     for comment in comments:
    
         content = comment['content']
    
         file.write(content + '\n')
    
 def main():
    
     # 商品ID
    
     product_id = input("请输入商品ID: ")
    
     # 用户选择获取好评或差评
    
     sort_type = input("请输入好评或差评的选择('好评' 或 '差评'): ").lower()
    
     if sort_type not in ['好评', '差评']:
    
     print("输入无效,请输入'好评'或'差评'")
    
     return
    
     # 初始化评论列表
    
     comments = []
    
     page = 0
    
     page_size = 10  # 每页评论数量
    
     max_comments = 200  # 最大评论数量
    
     # 持续获取评论直到没有更多评论或达到最大评论数量为止
    
     while len(comments) < max_comments:
    
     comments_page = get_comments(product_id, sort_type, page, page_size)
    
     if comments_page:
    
         comments.extend(comments_page)
    
         page += 1
    
         # 检查是否已经达到200条评论
    
         if len(comments) >= max_comments:
    
             break
    
     else:
    
         break
    
     # 过滤评论
    
     filtered_comments = filter_comments(comments)
    
     if filtered_comments:
    
     # 保存评论到文件
    
     filename = 'text.txt'  # 文件名
    
     save_comments_to_file(filtered_comments, filename)
    
     print(f"评论已保存到文件:{filename}")
    
     for comment in filtered_comments:
    
         content = comment['content']
    
         print(content)
    
     else:
    
     print("没有评论可显示")
    
 if __name__ == "__main__":
    
     main()
    
    
    
    
    python
    
    
![](https://ad.itadn.com/c/weblog/blog-img/images/2025-08-18/2gtPCr7uxEADQ8fiLVMdBbZYjN4I.png)

main2.py内容:

复制代码
 import jieba

    
 from wordcloud import WordCloud
    
 import numpy as np
    
 from PIL import Image, ImageDraw
    
 from matplotlib import colors
    
 import re
    
  
    
 # 获取用户输入
    
 user_input = input("请输入您的评价(好评/差评):")
    
 # 根据用户输入设置颜色
    
 if user_input.startswith("好评"):
    
     good_review_colors = ['#90EE90', '#ADFF2F', '#FFD700', '#FFA07A', '#32CD32']
    
     colormaps = colors.ListedColormap(good_review_colors)
    
 elif user_input.startswith("差评"):
    
     bad_review_colors = ['#FF0000', '#FF6347', '#FFA07A', '#FF7F50', '#CD5C5C']
    
     colormaps = colors.ListedColormap(bad_review_colors)
    
 else:
    
     colormaps = colors.ListedColormap([
    
     '#ADD8E6', '#87CEEB', '#6495ED', '#4682B4', '#1874CD',
    
     '#104E8B', '#0000FF', '#00008B', '#000080', '#191970'
    
     ])
    
  
    
 # 读取文本文件
    
 f = open(r"text.txt", "r", encoding="utf-8")
    
 text = f.read()
    
 f.close()
    
  
    
 # 去除文本中的标点符号、表情和空格
    
 clean_text = re.sub(r'[^\w]', '', text)
    
  
    
 # 使用jieba进行分词
    
 words_list_jieba = jieba.lcut(clean_text)
    
  
    
 # 读取停用词文件
    
 stop_words = set(['\n'])
    
 with open("stop.txt", 'r', encoding='utf-8') as f1:
    
     for line in f1:
    
     stop_words.add(line.strip())
    
 f1.close()
    
  
    
 # 创建词频字典
    
 word_freq = {}
    
 for word in words_list_jieba:
    
     if word not in stop_words:
    
     word_freq[word] = word_freq.get(word, 0) + 1
    
  
    
 # 背景图片
    
 background_image = np.array(Image.open('001.png'))
    
  
    
 # 生成词云
    
 wordcloud = WordCloud(
    
     font_path='simhei.ttf',
    
     prefer_horizontal=0.99,
    
     background_color='white',
    
     max_words=10000,
    
     max_font_size=200,
    
     min_font_size=5,
    
     stopwords=stop_words,
    
     mask=background_image,
    
     repeat=True
    
 ).fit_words(word_freq)
    
  
    
 # 展示和保存词云图片
    
 image = wordcloud.to_image()
    
 draw = ImageDraw.Draw(image)
    
 border_color = 'black'
    
 border_width = 5
    
 width, height = image.size
    
 draw.rectangle([(0, 0), (width, height)], outline=border_color, width=border_width)
    
 image.show()
    
 image.save('词云图_带边框.png')
    
    
    
    
    python
    
    
![](https://ad.itadn.com/c/weblog/blog-img/images/2025-08-18/GZ9MJTLo4qHj2zFupW36PAKn8Dct.png)

实例:

①停用词格式:
一下
一个
一些
一何
一切
一则
一则通过
一天
一定
一方面
一旦
...

②先点main.py再点main2.py

③001.png内容:

④词云图效果:

全部评论 (0)

还没有任何评论哟~