﻿{"id":549,"date":"2020-09-17T21:04:59","date_gmt":"2020-09-17T13:04:59","guid":{"rendered":"https:\/\/byy3.com\/?p=549"},"modified":"2021-01-09T10:10:08","modified_gmt":"2021-01-09T02:10:08","slug":"python%e7%88%ac%e8%99%ab%e7%88%ac%e5%8f%96%e8%b1%86%e7%93%a3%e7%94%b5%e5%bd%b1%e6%8e%92%e8%a1%8c%e6%a6%9ctop250","status":"publish","type":"post","link":"https:\/\/byy3.com\/?p=549","title":{"rendered":"python\u722c\u866b\u722c\u53d6\u8c46\u74e3\u7535\u5f71\u6392\u884c\u699ctop250"},"content":{"rendered":"<p><code># -*- coding:utf-8 -*-python\u722c\u866b\u8c46\u74e3\u7535\u5f71top250\u722c\u53d6<\/code><br \/>\n<code>import requests<\/code><br \/>\n<code>from bs4 import BeautifulSoup<\/code><br \/>\n<code>import re<\/code><br \/>\n<code>import time<\/code><br \/>\n<code>import sys<\/code><\/p>\n<p><code>def getHTMLText(url,k):<\/code><br \/>\n<code>try:<\/code><br \/>\n<code>if(k==0):kw={}<\/code><br \/>\n<code>else: kw={'start':k,'filter':''}<\/code><br \/>\n<code>r = requests.get(url,params=kw,headers={'User-Agent': 'Mozilla\/4.0'})<\/code><br \/>\n<code>r.raise_for_status()<\/code><br \/>\n<code>r.encoding = r.apparent_encoding<\/code><br \/>\n<code>return r.text<\/code><br \/>\n<code>except:<\/code><br \/>\n<code>print(\"Failed!\")<\/code><\/p>\n<p><code>def getData(html):<\/code><br \/>\n<code>soup = BeautifulSoup(html, \"html.parser\")<\/code><br \/>\n<code>movieList=soup.find('ol',attrs={'class':'grid_view'})#\u627e\u5230\u7b2c\u4e00\u4e2aclass\u5c5e\u6027\u503c\u4e3agrid_view\u7684ol\u6807\u7b7e<\/code><br \/>\n<code>moveInfo=[]<\/code><br \/>\n<code>for movieLi in movieList.find_all('li'):#\u627e\u5230\u6240\u6709li\u6807\u7b7e<\/code><br \/>\n<code>data = []<\/code><br \/>\n<code>#\u5f97\u5230\u7535\u5f71\u540d\u5b57<\/code><br \/>\n<code>movieHd=movieLi.find('div',attrs={'class':'hd'})#\u627e\u5230\u7b2c\u4e00\u4e2aclass\u5c5e\u6027\u503c\u4e3ahd\u7684div\u6807\u7b7e<\/code><br \/>\n<code>movieName=movieHd.find('span',attrs={'class':'title'}).getText()#\u627e\u5230\u7b2c\u4e00\u4e2aclass\u5c5e\u6027\u503c\u4e3atitle\u7684span\u6807\u7b7e<\/code><br \/>\n<code>#\u4e5f\u53ef\u4f7f\u7528.string\u65b9\u6cd5<\/code><br \/>\n<code>data.append(movieName)<\/code><\/p>\n<p><code>#\u5f97\u5230\u7535\u5f71\u7684\u8bc4\u5206<\/code><br \/>\n<code>movieScore=movieLi.find('span',attrs={'class':'rating_num'}).getText()<\/code><br \/>\n<code>data.append(movieScore)<\/code><\/p>\n<p><code>#\u5f97\u5230\u7535\u5f71\u7684\u8bc4\u4ef7\u4eba\u6570<\/code><br \/>\n<code>movieEval=movieLi.find('div',attrs={'class':'star'})<\/code><br \/>\n<code>movieEvalNum=re.findall(r'\\d+',str(movieEval))[-1]<\/code><br \/>\n<code>data.append(movieEvalNum)<\/code><\/p>\n<p><code># \u5f97\u5230\u7535\u5f71\u7684\u77ed\u8bc4<\/code><br \/>\n<code>movieQuote = movieLi.find('span', attrs={'class': 'inq'})<\/code><br \/>\n<code>if(movieQuote):<\/code><br \/>\n<code>data.append(movieQuote.getText())<\/code><br \/>\n<code>else:<\/code><br \/>\n<code>data.append(\"\u65e0\")<\/code><\/p>\n<p><code>print(outputMode.format(data[0], data[1], data[2],data[3],chr(12288)))<\/code><\/p>\n<p><code>#\u5c06\u8f93\u51fa\u91cd\u5b9a\u5411\u5230txt\u6587\u4ef6<\/code><br \/>\n<code>output=sys.stdout<\/code><br \/>\n<code>outputfile=open(\"moviedata.txt\",'w',encoding='utf-8')<\/code><br \/>\n<code>sys.stdout=outputfile<\/code><\/p>\n<p><code>outputMode= \"{0:{4}^20}\\t{1:^10}\\t{2:^10}\\t{3:{4}&lt;10}\"<\/code><br \/>\n<code>print(outputMode.format('\u7535\u5f71\u540d\u79f0', '\u8bc4\u5206', '\u8bc4\u8bba\u4eba\u6570', '\u77ed\u8bc4', chr(12288)))<\/code><br \/>\n<code>basicUrl='https:\/\/movie.douban.com\/top250'<\/code><br \/>\n<code>k=0<\/code><br \/>\n<code>while k&lt;=225:<\/code><br \/>\n<code>html=getHTMLText(basicUrl,k)<\/code><br \/>\n<code>time.sleep(2)<\/code><br \/>\n<code>k+=25<\/code><br \/>\n<code>getData(html)<\/code><\/p>\n<p><code>outputfile.close()<\/code><br \/>\n<code>sys.stdout=output<\/code><\/p>\n","protected":false},"excerpt":{"rendered":"<p># -*- coding:utf-8 -*-python\u722c\u866b\u8c46\u74e3\u7535\u5f71top250\u722c\u53d6 import reque [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[20],"tags":[33,352,49,51],"class_list":["post-549","post","type-post","status-publish","format-standard","hentry","category-python","tag-python"],"_links":{"self":[{"href":"https:\/\/byy3.com\/index.php?rest_route=\/wp\/v2\/posts\/549","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/byy3.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/byy3.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/byy3.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/byy3.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=549"}],"version-history":[{"count":0,"href":"https:\/\/byy3.com\/index.php?rest_route=\/wp\/v2\/posts\/549\/revisions"}],"wp:attachment":[{"href":"https:\/\/byy3.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=549"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/byy3.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=549"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/byy3.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=549"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}