博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
爬取网易热评做成评论截图形式(给自己看的很乱有待更改)
阅读量:5367 次
发布时间:2019-06-15

本文共 6480 字,大约阅读时间需要 21 分钟。

# coding: utf-8 import requests, json, os, datetime, math, re from PIL import Image, ImageDraw, ImageFont # 字体区 nor_font = 'C:/windows/fonts/simsun.ttc' micro_font = 'C:/windows/fonts/msyh.ttc'  # 微软雅黑 times_font = 'C:/windows/fonts/times.ttf'  # times new Roman huawen_font = 'C:/windows/fonts/STXINGKA.TTF'  # 华文行楷 english_font = 'C:/windows/fonts/STXINGKA.TTF'  # 英文的 fangsong_font = 'C:/windows/fonts/simfang.ttf'  # 仿宋 path_req_img = r'C:\Users\HDWEN\Desktop\test\test1' path = r'C:\Users\HDWEN\Desktop\test' def get_pic(user, url, comment, count, time, reply_user=None, reply_content=None):     # 将头像图片都设置为固定大小(1024*1024)     with open(os.path.join(path_req_img, os.path.basename(url)), 'wb') as f:         f.write(requests.get(url).content)     img1 = Image.open(os.path.join(path_req_img, os.path.basename(url)))     img1 = img1.resize((1024, 1024), Image.ANTIALIAS)     width1 = img1.size[0]     height1 = img1.size[1]     img1 = img1.crop((0, 0, width1, height1))     # print(width1,height1)     # 计算文字需要的行数     if reply_user == None: reply_content = ' ';reply_user = ''     # 评论区     # comment     ft = ImageFont.truetype("%s" % fangsong_font, 200)     # 一行设置为50个字     one_zh_width = ft.getsize('中')[0]     width = 1500 * 8     w1, h1 = ft.getsize(user + comment)     w2, h2 = ft.getsize(reply_user + reply_content)     line_w1 = math.ceil(w1 / (width - 1100)) + 1     line_w2 = math.ceil(w2 / (width - 1400)) + 1     line = line_w1 + line_w2     height = (line + 5) * 250     # print(line_w1,line_w2,height)     # 创建空白图片     image = Image.new('RGB', (width, height), (247, 238, 214))     draw = ImageDraw.Draw(image)     # 评论区     text1 = user + ':' + comment     ft = ImageFont.truetype('C:/windows/fonts/simfang.ttf', 200)     all_h = []     def multi_line(text, h=0):         if width - 1100 > ft.getsize(text)[0]:             draw.text((1100, h * 250 + 50), '%s' % text, fill='black', font=ft)             all_h.append(0)         else:             for i in range(int((width - 1100) / 250), len(text) + 1):                 if width - 1100 - 250 < ft.getsize(text[:i])[0] < width - 1100:                     draw.text((1100, h * 250 + 50), '%s' % (text[:i]), fill='black', font=ft)                     h += 1                     t = text[i:]                     all_h.append(h)                     multi_line(t, h)     multi_line(text1)     # 给自己Hdwen打一个水印     draw.text((100, 1200), 'Hdwen', font=ImageFont.truetype(english_font, 250), fill='black')     # 点赞区     draw.text((width - 2200, height - 400), '点赞:(%s)|回复' % (count), font=ImageFont.truetype(fangsong_font, 200),               fill='blue')     # 时间区     dateArray = datetime.datetime.utcfromtimestamp(time)     shift_time = dateArray.strftime("%Y-%m-%d %H:%M:%S")     draw.text((1100, height - 400), '%s' % shift_time, font=ImageFont.truetype(fangsong_font, 200), fill='gray')     # 回复区     image.save(os.path.join(path, os.path.basename(url)), 'jpeg')     img3 = Image.open(os.path.join(path, os.path.basename(url)))     img3.paste(img1, (50, 50))     if reply_user != '':         img = ImageDraw.Draw(img3)         # 若是分行的话就要参照评论第二行的高度了         h = max(all_h) + 2         text2 = reply_user + ':' + reply_content         def multi_line(text, h):             if width - 1400 > ft.getsize(text)[0]:                 img.rectangle((1300, h * 250 + 20, width, (line) * 250), fill=(250, 240, 230))                 img.text((1400, h * 250 + 50), '%s' % text, fill='black', font=ft)             for i in range(1, len(text) + 1):                 if width - 1400 - 250 < ft.getsize(text[:i])[0] < width - 1400:                     img.rectangle((1300, h * 250 + 20, width, (line + 1) * 250), fill=(250, 240, 230))                     img.text((1400, h * 250 + 50), '%s' % (text[:i]), fill='black', font=ImageFont.truetype(fangsong_font, 200))                     h += 1                     t = text[i:]                     all_h.append(h)                     multi_line(t, h)         multi_line(text2, h)         # img.text((1400,650),'%s:'%(reply_user),font=ImageFont.truetype(nor_font, 250),fill='blue')         # img.text((1400+ft.getsize(reply_user)[0]+ 250,650),'%s'%(reply_content),font=ImageFont.truetype(fangsong_font, 250),fill='black')         # print(len(text),one_line_len)     img3.show()     img3.save(os.path.join(r'C:\Users\HDWEN\Desktop\pic', os.path.basename(url))) params = 'N/k4O/N6NXalQC6Rv9BW8PTzWfT5CNZadhKUGlbtvyv4Txkq6VA4hy9CsYGAukRmtMJ2fhEF0IuVTxYdrhmpkb6WkYO25h/RV0uDd6dC9W7wU8y1Jt3+HlIPnnqvZeEgUOM8DcUZKx6Br+YcWI6G3v7ZPSecMA90sWdNXb9DEZhtcZD+V2GCRQxp/vxFgepdy/KaT1P8mMZ4wNdW99PYWMVLNprGNuyc8/GgMnIYHWQ=' encSecKey = 'a1264cb1d89ebc410d9a6d7ebae75fd78798c66e17b05299fe7564b33edda38653454ee8ed240c1eb77b8d159478ff6d9cd3521943371a1dd682474a1218c30800090541d87f6cbde133c69158a4bf72141c48872ecd3248578079cc1be6e2fb6b0dc80749857d893eae0ab4f516794a11cc8e39210659e421a4a6f7a8f5cf4d' headers = {
'Cookie': 'appver=1.5.0.75771;', 'Referer': 'http://music.163.com/', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36' } data = {
"params": params, "encSecKey": encSecKey } song_id = '422428548' url = 'http://music.163.com/weapi/v1/resource/comments/R_SO_4_{}?csrf_token=63c8e79f67b0ee78ce7f3d38a5081b2e'.format( song_id) # format里面填写的是歌曲的id res = requests.post(url, headers=headers, data=data) dict = json.loads(res.content) # print(dict) dicts = dict['hotComments'] # print(dicts) for i in dicts: user = i['user']['nickname'] comment = ''.join([t for t in i['content'] if t not in {'\n', ' '}]) count = i['likedCount'] url = i['user']['avatarUrl'] time = int(str(i['time'])[:-3]) reply = i['beReplied'] if reply == []: # pass get_pic(user, url, str(comment), count, time) print(i['user']['nickname'], ':', comment, '*****Count:', i['likedCount'], i['user']['avatarUrl'], int(str(i['time'])[:-3])) else: reply_user = i['beReplied'][0]['user']['nickname'] reply_content = ''.join([t for t in i['beReplied'][0]['content'] if t not in {'\n',' '}]) get_pic(user, url, comment, count, time, reply_user, reply_content) # print(i['user']['nickname'], ':', i['content'], '对于这个评论的回复:', i['beReplied'][0]['content'], 'Count:', # i['likedCount']) # print(list(i['beReplied'][0]['content'])) # print(list(reply_content))

转载于:https://www.cnblogs.com/hdwen/p/7554122.html

你可能感兴趣的文章
angular、jquery、vue 的区别与联系
查看>>
Intellij idea创建javaWeb以及Servlet简单实现
查看>>
代理网站
查看>>
Open multiple excel files in WebBrowser, only the last one gets activated
查看>>
FFmpeg进行视频帧提取&音频重采样-Process.waitFor()引发的阻塞超时
查看>>
最近邻与K近邻算法思想
查看>>
【VS开发】ATL辅助COM组件开发
查看>>
FlatBuffers In Android
查看>>
《演说之禅》I &amp; II 读书笔记
查看>>
thinkphp3.2接入支付宝支付接口(PC端)
查看>>
【转】在Eclipse中安装和使用TFS插件
查看>>
C#中Monitor和Lock以及区别
查看>>
【NOIP2017】奶酪
查看>>
5.6.3.7 localeCompare() 方法
查看>>
Linux下好用的简单实用命令
查看>>
描绘应用程序级的信息
查看>>
php环境搭建脚本
查看>>
php 编译常见错误
查看>>
MES架构
查看>>
hdu 2767(tarjan)
查看>>