爬取网易热评做成评论截图形式（给自己看的很乱有待更改）-白红宇

强烈建议你试试无所不能的chatGPT，快点击我

爬取网易热评做成评论截图形式（给自己看的很乱有待更改）

阅读量：5367 次

发布时间：2019-06-15

本文共 6480 字，大约阅读时间需要 21 分钟。

# coding: utf-8 import requests, json, os, datetime, math, re from PIL import Image, ImageDraw, ImageFont # 字体区 nor_font = 'C:/windows/fonts/simsun.ttc' micro_font = 'C:/windows/fonts/msyh.ttc'  # 微软雅黑 times_font = 'C:/windows/fonts/times.ttf'  # times new Roman huawen_font = 'C:/windows/fonts/STXINGKA.TTF'  # 华文行楷 english_font = 'C:/windows/fonts/STXINGKA.TTF'  # 英文的 fangsong_font = 'C:/windows/fonts/simfang.ttf'  # 仿宋 path_req_img = r'C:\Users\HDWEN\Desktop\test\test1' path = r'C:\Users\HDWEN\Desktop\test' def get_pic(user, url, comment, count, time, reply_user=None, reply_content=None):     # 将头像图片都设置为固定大小（1024*1024）     with open(os.path.join(path_req_img, os.path.basename(url)), 'wb') as f:         f.write(requests.get(url).content)     img1 = Image.open(os.path.join(path_req_img, os.path.basename(url)))     img1 = img1.resize((1024, 1024), Image.ANTIALIAS)     width1 = img1.size[0]     height1 = img1.size[1]     img1 = img1.crop((0, 0, width1, height1))     # print(width1,height1)     # 计算文字需要的行数     if reply_user == None: reply_content = ' ';reply_user = ''     # 评论区     # comment     ft = ImageFont.truetype("%s" % fangsong_font, 200)     # 一行设置为50个字     one_zh_width = ft.getsize('中')[0]     width = 1500 * 8     w1, h1 = ft.getsize(user + comment)     w2, h2 = ft.getsize(reply_user + reply_content)     line_w1 = math.ceil(w1 / (width - 1100)) + 1     line_w2 = math.ceil(w2 / (width - 1400)) + 1     line = line_w1 + line_w2     height = (line + 5) * 250     # print(line_w1,line_w2,height)     # 创建空白图片     image = Image.new('RGB', (width, height), (247, 238, 214))     draw = ImageDraw.Draw(image)     # 评论区     text1 = user + '：' + comment     ft = ImageFont.truetype('C:/windows/fonts/simfang.ttf', 200)     all_h = []     def multi_line(text, h=0):         if width - 1100 > ft.getsize(text)[0]:             draw.text((1100, h * 250 + 50), '%s' % text, fill='black', font=ft)             all_h.append(0)         else:             for i in range(int((width - 1100) / 250), len(text) + 1):                 if width - 1100 - 250 < ft.getsize(text[:i])[0] < width - 1100:                     draw.text((1100, h * 250 + 50), '%s' % (text[:i]), fill='black', font=ft)                     h += 1                     t = text[i:]                     all_h.append(h)                     multi_line(t, h)     multi_line(text1)     # 给自己Hdwen打一个水印     draw.text((100, 1200), 'Hdwen', font=ImageFont.truetype(english_font, 250), fill='black')     # 点赞区     draw.text((width - 2200, height - 400), '点赞:(%s)|回复' % (count), font=ImageFont.truetype(fangsong_font, 200),               fill='blue')     # 时间区     dateArray = datetime.datetime.utcfromtimestamp(time)     shift_time = dateArray.strftime("%Y-%m-%d %H:%M:%S")     draw.text((1100, height - 400), '%s' % shift_time, font=ImageFont.truetype(fangsong_font, 200), fill='gray')     # 回复区     image.save(os.path.join(path, os.path.basename(url)), 'jpeg')     img3 = Image.open(os.path.join(path, os.path.basename(url)))     img3.paste(img1, (50, 50))     if reply_user != '':         img = ImageDraw.Draw(img3)         # 若是分行的话就要参照评论第二行的高度了         h = max(all_h) + 2         text2 = reply_user + '：' + reply_content         def multi_line(text, h):             if width - 1400 > ft.getsize(text)[0]:                 img.rectangle((1300, h * 250 + 20, width, (line) * 250), fill=(250, 240, 230))                 img.text((1400, h * 250 + 50), '%s' % text, fill='black', font=ft)             for i in range(1, len(text) + 1):                 if width - 1400 - 250 < ft.getsize(text[:i])[0] < width - 1400:                     img.rectangle((1300, h * 250 + 20, width, (line + 1) * 250), fill=(250, 240, 230))                     img.text((1400, h * 250 + 50), '%s' % (text[:i]), fill='black', font=ImageFont.truetype(fangsong_font, 200))                     h += 1                     t = text[i:]                     all_h.append(h)                     multi_line(t, h)         multi_line(text2, h)         # img.text((1400,650),'%s：'%(reply_user),font=ImageFont.truetype(nor_font, 250),fill='blue')         # img.text((1400+ft.getsize(reply_user)[0]+ 250,650),'%s'%(reply_content),font=ImageFont.truetype(fangsong_font, 250),fill='black')         # print(len(text),one_line_len)     img3.show()     img3.save(os.path.join(r'C:\Users\HDWEN\Desktop\pic', os.path.basename(url))) params = 'N/k4O/N6NXalQC6Rv9BW8PTzWfT5CNZadhKUGlbtvyv4Txkq6VA4hy9CsYGAukRmtMJ2fhEF0IuVTxYdrhmpkb6WkYO25h/RV0uDd6dC9W7wU8y1Jt3+HlIPnnqvZeEgUOM8DcUZKx6Br+YcWI6G3v7ZPSecMA90sWdNXb9DEZhtcZD+V2GCRQxp/vxFgepdy/KaT1P8mMZ4wNdW99PYWMVLNprGNuyc8/GgMnIYHWQ=' encSecKey = 'a1264cb1d89ebc410d9a6d7ebae75fd78798c66e17b05299fe7564b33edda38653454ee8ed240c1eb77b8d159478ff6d9cd3521943371a1dd682474a1218c30800090541d87f6cbde133c69158a4bf72141c48872ecd3248578079cc1be6e2fb6b0dc80749857d893eae0ab4f516794a11cc8e39210659e421a4a6f7a8f5cf4d' headers = {
       'Cookie': 'appver=1.5.0.75771;',     'Referer': 'http://music.163.com/',     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36' } data = {
       "params": params,     "encSecKey": encSecKey } song_id = '422428548' url = 'http://music.163.com/weapi/v1/resource/comments/R_SO_4_{}?csrf_token=63c8e79f67b0ee78ce7f3d38a5081b2e'.format(     song_id)  # format里面填写的是歌曲的id res = requests.post(url, headers=headers, data=data) dict = json.loads(res.content) # print(dict) dicts = dict['hotComments'] # print(dicts) for i in dicts:     user = i['user']['nickname']     comment = ''.join([t for t in i['content'] if t not in {'\n', ' '}])     count = i['likedCount']     url = i['user']['avatarUrl']     time = int(str(i['time'])[:-3])     reply = i['beReplied']     if reply == []:         # pass         get_pic(user, url, str(comment), count, time)         print(i['user']['nickname'], ':', comment, '*****Count:', i['likedCount'], i['user']['avatarUrl'],               int(str(i['time'])[:-3]))     else:         reply_user = i['beReplied'][0]['user']['nickname']         reply_content = ''.join([t for t in i['beReplied'][0]['content'] if t not in {'\n',' '}])         get_pic(user, url, comment, count, time, reply_user, reply_content)         # print(i['user']['nickname'], ':', i['content'], '对于这个评论的回复：', i['beReplied'][0]['content'], 'Count:',         #       i['likedCount'])         # print(list(i['beReplied'][0]['content']))         # print(list(reply_content))

转载于:https://www.cnblogs.com/hdwen/p/7554122.html

你可能感兴趣的文章

angular、jquery、vue 的区别与联系

Intellij idea创建javaWeb以及Servlet简单实现

Open multiple excel files in WebBrowser, only the last one gets activated

FFmpeg进行视频帧提取&音频重采样-Process.waitFor()引发的阻塞超时

最近邻与K近邻算法思想

【VS开发】ATL辅助COM组件开发

FlatBuffers In Android

《演说之禅》I & II 读书笔记

thinkphp3.2接入支付宝支付接口（PC端）

【转】在Eclipse中安装和使用TFS插件

C#中Monitor和Lock以及区别

【NOIP2017】奶酪

5.6.3.7 localeCompare() 方法

Linux下好用的简单实用命令

描绘应用程序级的信息

php环境搭建脚本

php 编译常见错误

hdu 2767(tarjan)

喝酒易醉，品茶养心，人生如梦，品茶悟道，何以解忧？唯有杜康！-- 愿君每日到此一游！

当前时间: 2024-11-30 11:32:26 当前IP: 3.137.198.143 联系邮箱:javaeecc@qq.com Copyright © 2020 - 2022 baihongyu.com 京ICP备2021015314号-2

强烈建议你试试无所不能的CHAT-GPT，快点击我

强烈建议你试试无所不能的CHAT-GPT，快点击我