﻿{"id":798,"date":"2021-01-05T20:42:03","date_gmt":"2021-01-05T12:42:03","guid":{"rendered":"https:\/\/byy3.com\/?p=798"},"modified":"2021-01-09T10:08:02","modified_gmt":"2021-01-09T02:08:02","slug":"%e4%bd%bf%e7%94%a8python%e6%89%b9%e9%87%8f%e7%88%ac%e5%8f%96%e4%b8%bb%e6%b5%81%e6%90%9c%e7%b4%a2%e5%bc%95%e6%93%8e%e5%9b%be%e7%89%87","status":"publish","type":"post","link":"https:\/\/byy3.com\/?p=798","title":{"rendered":"\u4f7f\u7528python\u6279\u91cf\u722c\u53d6\u4e3b\u6d41\u641c\u7d22\u5f15\u64ce\u56fe\u7247"},"content":{"rendered":"<p>\u6700\u8fd1\u5728\u505a\u4e00\u4e2a\u53e3\u7f69\u8bc6\u522b\u7684\u5e94\u7528\uff0c\u9700\u8981\u5f88\u591a\u6234\u53e3\u7f69\u7684\u4eba\u7684\u56fe\u7247\u4f5c\u4e3a\u6570\u636e\u8bad\u7ec3\u6a21\u578b\uff0c\u56e0\u516c\u53f8\u6ca1\u6709\u63d0\u4f9b\u6570\u636e\uff0c\u53ea\u80fd\u6211\u4eec\u81ea\u5df1\u7528python\u722c\u866b\u722c\u53d6\u5404\u4e3b\u6d41\u7f51\u7ad9\u7684\u6234\u53e3\u7f69\u7684\u56fe\u7247\uff0c\u6211\u4eec\u4e3b\u8981\u722c\u53d6\u4e86\u5fc5\u5e94\u3001360\u3001\u641c\u72d7\u7684\u56fe\u7247(\u767e\u5ea6\u7684\u6709\u70b9\u6742\uff0c\u4e0d\u5982\u8fd9\u4e09\u5bb6\u4e2a)\uff0c\u4ee3\u7801\u5982\u4e0b\uff08\u4ec5\u4f9b\u5b66\u4e60\u53c2\u8003\uff09\uff1a<\/p>\n<p><strong>\u5fc5\u5e94\u641c\u7d22<\/strong><\/p>\n<pre class=\"prism-token token language-javascript\"><span class=\"token keyword\">from<\/span> bs4 <span class=\"token keyword\">import<\/span> BeautifulSoup\r\n<span class=\"token keyword\">import<\/span> urllib<span class=\"token punctuation\">.<\/span>request\r\n<span class=\"token keyword\">import<\/span> requests\r\n<span class=\"token keyword\">import<\/span> time\r\n<span class=\"token keyword\">import<\/span> json\r\n<span class=\"token keyword\">import<\/span> sys\r\n<span class=\"token keyword\">import<\/span> re\r\n<span class=\"token keyword\">import<\/span> os\r\n\r\n#\u722c\u53d6\u76ee\u6807\u7f51\u7ad9url\r\nCRAWL_TARGET_URL <span class=\"token operator\">=<\/span> <span class=\"token string\">'https:\/\/cn.bing.com\/images\/async?q=%s&amp;first=%d&amp;count=%d&amp;relp=%d&amp;lostate=r&amp;mmasync=1'<\/span>\r\n#<span class=\"token function\">\u6bcf\u6b21\u6293\u53d6\u56fe\u7247\u6570\u91cf<\/span><span class=\"token punctuation\">(<\/span><span class=\"token number\">35<\/span>\u662f\u6b64\u7f51\u9875\u6bcf\u6b21\u7ffb\u9875\u8bf7\u6c42\u6570\u91cf<span class=\"token punctuation\">)<\/span>\r\nNUMS_PER_CRAWL <span class=\"token operator\">=<\/span> <span class=\"token number\">35<\/span>\r\n#<span class=\"token function\">\u6293\u53d6\u56fe\u7247\u6700\u5c0f\u5927\u5c0f<\/span><span class=\"token punctuation\">(<\/span>\u5355\u4f4d\u5b57\u8282<span class=\"token punctuation\">)<\/span>\uff0c\u5c0f\u4e8e\u6b64\u503c\u629b\u5f03\r\nMIN_IMAGE_SIZE <span class=\"token operator\">=<\/span> <span class=\"token number\">102400<\/span>\r\n\r\n\r\ndef <span class=\"token function\">get_image<\/span><span class=\"token punctuation\">(<\/span>url<span class=\"token punctuation\">,<\/span> path<span class=\"token punctuation\">,<\/span> count<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n <span class=\"token keyword\">try<\/span><span class=\"token punctuation\">:<\/span>\r\n u <span class=\"token operator\">=<\/span> urllib<span class=\"token punctuation\">.<\/span>request<span class=\"token punctuation\">.<\/span><span class=\"token function\">urlopen<\/span><span class=\"token punctuation\">(<\/span>url<span class=\"token punctuation\">,<\/span> timeout<span class=\"token operator\">=9<\/span><span class=\"token punctuation\">)<\/span>\r\n t <span class=\"token operator\">=<\/span> u<span class=\"token punctuation\">.<\/span><span class=\"token function\">read<\/span><span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>\r\n <span class=\"token keyword\">if<\/span> sys<span class=\"token punctuation\">.<\/span><span class=\"token function\">getsizeof<\/span><span class=\"token punctuation\">(<\/span>t<span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&lt;<\/span> MIN_IMAGE_SIZE<span class=\"token punctuation\">:<\/span>\r\n <span class=\"token keyword\">return<\/span> <span class=\"token operator\">-<\/span><span class=\"token number\">1<\/span>\r\n except Exception <span class=\"token keyword\">as<\/span> e<span class=\"token punctuation\">:<\/span>\r\n <span class=\"token function\">print<\/span><span class=\"token punctuation\">(<\/span>url<span class=\"token punctuation\">,<\/span> e<span class=\"token punctuation\">)<\/span>\r\n <span class=\"token keyword\">return<\/span> <span class=\"token operator\">-<\/span><span class=\"token number\">2<\/span>\r\n #\u63d0\u53d6\u56fe\u7247\u683c\u5f0f\r\n frmt <span class=\"token operator\">=<\/span> url<span class=\"token punctuation\">[<\/span>url<span class=\"token punctuation\">.<\/span><span class=\"token function\">rfind<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">'.'<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><span class=\"token punctuation\">]<\/span>\r\n p <span class=\"token operator\">=<\/span> re<span class=\"token punctuation\">.<\/span><span class=\"token function\">compile<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">\"^\\\\.[a-zA-Z]+\"<\/span><span class=\"token punctuation\">)<\/span>\r\n m <span class=\"token operator\">=<\/span> p<span class=\"token punctuation\">.<\/span><span class=\"token function\">match<\/span><span class=\"token punctuation\">(<\/span>frmt<span class=\"token punctuation\">)<\/span>\r\n frmt <span class=\"token operator\">=<\/span> m<span class=\"token punctuation\">.<\/span><span class=\"token function\">group<\/span><span class=\"token punctuation\">(<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\">)<\/span>\r\n <span class=\"token keyword\">try<\/span><span class=\"token punctuation\">:<\/span>\r\n <span class=\"token keyword\">if<\/span> not os<span class=\"token punctuation\">.<\/span>path<span class=\"token punctuation\">.<\/span><span class=\"token function\">exists<\/span><span class=\"token punctuation\">(<\/span>path<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n os<span class=\"token punctuation\">.<\/span><span class=\"token function\">mkdir<\/span><span class=\"token punctuation\">(<\/span>path<span class=\"token punctuation\">)<\/span>\r\n f <span class=\"token operator\">=<\/span> <span class=\"token function\">open<\/span><span class=\"token punctuation\">(<\/span>os<span class=\"token punctuation\">.<\/span>path<span class=\"token punctuation\">.<\/span><span class=\"token function\">join<\/span><span class=\"token punctuation\">(<\/span>path<span class=\"token punctuation\">,<\/span> <span class=\"token function\">str<\/span><span class=\"token punctuation\">(<\/span>count<span class=\"token punctuation\">)<\/span><span class=\"token operator\">+<\/span>frmt<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">'wb'<\/span><span class=\"token punctuation\">)<\/span>\r\n f<span class=\"token punctuation\">.<\/span><span class=\"token function\">write<\/span><span class=\"token punctuation\">(<\/span>t<span class=\"token punctuation\">)<\/span>\r\n f<span class=\"token punctuation\">.<\/span><span class=\"token function\">close<\/span><span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>\r\n except Exception <span class=\"token keyword\">as<\/span> e<span class=\"token punctuation\">:<\/span>\r\n <span class=\"token function\">print<\/span><span class=\"token punctuation\">(<\/span>os<span class=\"token punctuation\">.<\/span>path<span class=\"token punctuation\">.<\/span><span class=\"token function\">join<\/span><span class=\"token punctuation\">(<\/span>path<span class=\"token punctuation\">,<\/span> <span class=\"token function\">str<\/span><span class=\"token punctuation\">(<\/span>count<span class=\"token punctuation\">)<\/span><span class=\"token operator\">+<\/span>frmt<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> e<span class=\"token punctuation\">)<\/span>\r\n <span class=\"token keyword\">return<\/span> <span class=\"token operator\">-<\/span><span class=\"token number\">3<\/span>\r\n <span class=\"token keyword\">return<\/span> <span class=\"token number\">0<\/span>\r\n\r\n\r\ndef <span class=\"token function\">crawl_data<\/span><span class=\"token punctuation\">(<\/span>info<span class=\"token punctuation\">,<\/span> num<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n first <span class=\"token operator\">=<\/span> <span class=\"token number\">0<\/span>\r\n count <span class=\"token operator\">=<\/span> <span class=\"token number\">0<\/span>\r\n #\u521b\u5efa\u4e00\u4e2a\u4f1a\u8bdd\r\n s <span class=\"token operator\">=<\/span> requests<span class=\"token punctuation\">.<\/span><span class=\"token function\">Session<\/span><span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>\r\n #\u521b\u5efa\u6587\u4ef6\u8def\u5f84\r\n path<span class=\"token operator\">=<\/span><span class=\"token string\">\".\/\"<\/span><span class=\"token operator\">+<\/span>info\r\n <span class=\"token keyword\">if<\/span> not os<span class=\"token punctuation\">.<\/span>path<span class=\"token punctuation\">.<\/span><span class=\"token function\">exists<\/span><span class=\"token punctuation\">(<\/span>path<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n os<span class=\"token punctuation\">.<\/span><span class=\"token function\">mkdir<\/span><span class=\"token punctuation\">(<\/span>path<span class=\"token punctuation\">)<\/span>\r\n index<span class=\"token operator\">=<\/span><span class=\"token function\">len<\/span><span class=\"token punctuation\">(<\/span>os<span class=\"token punctuation\">.<\/span><span class=\"token function\">listdir<\/span><span class=\"token punctuation\">(<\/span>path<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span>#\u6587\u4ef6\u4e2d\u539f\u6709\u56fe\u7247\u6570\r\n <span class=\"token keyword\">while<\/span><span class=\"token punctuation\">(<\/span>count <span class=\"token operator\">&lt;<\/span> num<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n u <span class=\"token operator\">=<\/span> CRAWL_TARGET_URL<span class=\"token operator\">%<\/span><span class=\"token punctuation\">(<\/span>info<span class=\"token punctuation\">,<\/span> first<span class=\"token punctuation\">,<\/span> NUMS_PER_CRAWL<span class=\"token punctuation\">,<\/span> NUMS_PER_CRAWL<span class=\"token punctuation\">)<\/span>\r\n #5<span class=\"token punctuation\">.<\/span>05s\u4e3a\u53d1\u9001\u8d85\u65f6\u65f6\u95f4\uff0c10s\u4e3a\u63a5\u6536\u5230\u6570\u636e\u8d85\u65f6\u65f6\u95f4\r\n req <span class=\"token operator\">=<\/span> s<span class=\"token punctuation\">.<\/span><span class=\"token keyword\">get<\/span><span class=\"token punctuation\">(<\/span>url <span class=\"token operator\">=<\/span>u<span class=\"token punctuation\">,<\/span> timeout<span class=\"token operator\">=<\/span><span class=\"token punctuation\">(5<\/span><span class=\"token number\">.05<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token number\">10<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span>\r\n bf <span class=\"token operator\">=<\/span> <span class=\"token function\">BeautifulSoup<\/span><span class=\"token punctuation\">(<\/span>req<span class=\"token punctuation\">.<\/span>text<span class=\"token punctuation\">,<\/span> <span class=\"token string\">\"html.parser\"<\/span><span class=\"token punctuation\">)<\/span>\r\n imgtags <span class=\"token operator\">=<\/span> bf<span class=\"token punctuation\">.<\/span><span class=\"token function\">find_all<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">\"a\"<\/span><span class=\"token punctuation\">,<\/span> class_ <span class=\"token operator\">=<\/span> <span class=\"token string\">\"iusc\"<\/span><span class=\"token punctuation\">)<\/span>\r\n <span class=\"token keyword\">for<\/span> e <span class=\"token keyword\">in<\/span> imgtags<span class=\"token punctuation\">:<\/span>\r\n <span class=\"token keyword\">if<\/span> count <span class=\"token operator\">==<\/span> num<span class=\"token punctuation\">:<\/span>\r\n <span class=\"token keyword\">return<\/span> False\r\n urldict <span class=\"token operator\">=<\/span> json<span class=\"token punctuation\">.<\/span><span class=\"token function\">loads<\/span><span class=\"token punctuation\">(<\/span>e<span class=\"token punctuation\">.<\/span><span class=\"token keyword\">get<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">'m'<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span>\r\n <span class=\"token keyword\">if<\/span> <span class=\"token function\">get_image<\/span><span class=\"token punctuation\">(<\/span>urldict<span class=\"token punctuation\">[<\/span><span class=\"token string\">\"murl\"<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">,<\/span> path<span class=\"token punctuation\">,<\/span> index<span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&lt;<\/span> <span class=\"token number\">0<\/span><span class=\"token punctuation\">:<\/span>\r\n <span class=\"token keyword\">continue<\/span>\r\n <span class=\"token function\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">\"Downloaded %d picture\"<\/span><span class=\"token operator\">%<\/span><span class=\"token punctuation\">(<\/span>count<span class=\"token operator\">+<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span>\r\n sys<span class=\"token punctuation\">.<\/span>stdout<span class=\"token punctuation\">.<\/span><span class=\"token function\">flush<\/span><span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>\r\n count <span class=\"token operator\">=<\/span>count<span class=\"token operator\">+<\/span><span class=\"token number\">1<\/span>\r\n index<span class=\"token operator\">=<\/span>index<span class=\"token operator\">+<\/span><span class=\"token number\">1<\/span>\r\n time<span class=\"token punctuation\">.<\/span><span class=\"token function\">sleep<\/span><span class=\"token punctuation\">(<\/span><span class=\"token number\">0.09<\/span><span class=\"token punctuation\">)<\/span>\r\n first <span class=\"token operator\">=<\/span> first <span class=\"token operator\">+<\/span> NUMS_PER_CRAWL\r\n time<span class=\"token punctuation\">.<\/span><span class=\"token function\">sleep<\/span><span class=\"token punctuation\">(<\/span><span class=\"token number\">0.9<\/span><span class=\"token punctuation\">)<\/span>\r\n\r\n <span class=\"token keyword\">return<\/span> True\r\n\r\n<span class=\"token keyword\">if<\/span> __name__ <span class=\"token operator\">==<\/span> <span class=\"token string\">'__main__'<\/span><span class=\"token punctuation\">:<\/span>\r\n \r\n # \u5173\u952e\u8bcd\uff0c\u53ef\u8bbe\u7f6e\u4e3a\u591a\u4e2a\r\n key_words<span class=\"token operator\">=<\/span><span class=\"token punctuation\">[<\/span><span class=\"token string\">'\u6234\u53e3\u7f69'<\/span><span class=\"token punctuation\">,<\/span><span class=\"token punctuation\">]<\/span>\r\n # \u4e0b\u8f7d\u7684\u56fe\u7247\u6570\u91cf\r\n picture_num <span class=\"token operator\">=<\/span> 5<span class=\"token number\">00<\/span>\r\n\r\n <span class=\"token keyword\">for<\/span> i <span class=\"token keyword\">in<\/span> <span class=\"token function\">range<\/span><span class=\"token punctuation\">(<\/span><span class=\"token function\">len<\/span><span class=\"token punctuation\">(<\/span>key_words<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n word<span class=\"token operator\">=<\/span>key_words<span class=\"token punctuation\">[<\/span>i<span class=\"token punctuation\">]<\/span>\r\n <span class=\"token function\">print<\/span><span class=\"token punctuation\">(<\/span>word<span class=\"token punctuation\">)<\/span>\r\n <span class=\"token keyword\">if<\/span> <span class=\"token function\">crawl_data<\/span><span class=\"token punctuation\">(<\/span>word<span class=\"token punctuation\">,<\/span> picture_num<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n i<span class=\"token operator\">=<\/span>i<span class=\"token operator\">+<\/span><span class=\"token number\">1<\/span>\r\n<\/pre>\n<p><strong>360\u641c\u7d22<\/strong><\/p>\n<pre class=\"prism-token token language-javascript\"><span class=\"token keyword\">import<\/span> json\r\n<span class=\"token keyword\">import<\/span> os\r\n<span class=\"token keyword\">import<\/span> requests\r\n\r\n# \u8def\u5f84\r\nBASE_URL <span class=\"token operator\">=<\/span> <span class=\"token string\">'.\/\u6234\u53e3\u7f69'<\/span>\r\n# \u5173\u952e\u8bcd\r\nNAME <span class=\"token operator\">=<\/span> <span class=\"token string\">'\u6234\u53e3\u7f69'<\/span>\r\n\r\n<span class=\"token keyword\">class<\/span> <span class=\"token class-name\">PictureDownload<\/span><span class=\"token punctuation\">(<\/span>object<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n def <span class=\"token function\">__init__<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">,<\/span> q<span class=\"token operator\">=<\/span>None<span class=\"token punctuation\">,<\/span> sn<span class=\"token operator\">=<\/span><span class=\"token number\">100<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n self<span class=\"token punctuation\">.<\/span>url <span class=\"token operator\">=<\/span> <span class=\"token string\">'https:\/\/m.image.so.com\/j?q={}&amp;src=srp&amp;pn=100&amp;sn={}&amp;kn=0&amp;gn=0&amp;cn=0'<\/span>\r\n self<span class=\"token punctuation\">.<\/span>headers <span class=\"token operator\">=<\/span> <span class=\"token punctuation\">{<\/span>\r\n <span class=\"token string\">'User-Agent'<\/span><span class=\"token punctuation\">:<\/span> <span class=\"token string\">'Mozilla\/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit\/604.1.38 (KHTML, like Gecko) Version\/11.0 Mobile\/15A372 Safari\/604.1'<\/span>\r\n <span class=\"token punctuation\">}<\/span>\r\n self<span class=\"token punctuation\">.<\/span>q <span class=\"token operator\">=<\/span> q\r\n self<span class=\"token punctuation\">.<\/span>sn <span class=\"token operator\">=<\/span> sn\r\n self<span class=\"token punctuation\">.<\/span>num <span class=\"token operator\">=<\/span> <span class=\"token number\">0<\/span>\r\n self<span class=\"token punctuation\">.<\/span>total <span class=\"token operator\">=<\/span> <span class=\"token number\">2<\/span>\r\n\r\n def <span class=\"token function\">makedir<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n <span class=\"token keyword\">if<\/span> not os<span class=\"token punctuation\">.<\/span>path<span class=\"token punctuation\">.<\/span><span class=\"token function\">exists<\/span><span class=\"token punctuation\">(<\/span>os<span class=\"token punctuation\">.<\/span>path<span class=\"token punctuation\">.<\/span><span class=\"token function\">join<\/span><span class=\"token punctuation\">(<\/span>BASE_URL<span class=\"token punctuation\">,<\/span> self<span class=\"token punctuation\">.<\/span>q<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n os<span class=\"token punctuation\">.<\/span><span class=\"token function\">makedirs<\/span><span class=\"token punctuation\">(<\/span>os<span class=\"token punctuation\">.<\/span>path<span class=\"token punctuation\">.<\/span><span class=\"token function\">join<\/span><span class=\"token punctuation\">(<\/span>BASE_URL<span class=\"token punctuation\">,<\/span> self<span class=\"token punctuation\">.<\/span>q<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span>\r\n\r\n def <span class=\"token function\">parse_url<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n response <span class=\"token operator\">=<\/span> requests<span class=\"token punctuation\">.<\/span><span class=\"token keyword\">get<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">.<\/span>url<span class=\"token punctuation\">.<\/span><span class=\"token function\">format<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">.<\/span>q<span class=\"token punctuation\">,<\/span> self<span class=\"token punctuation\">.<\/span>num<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> headers<span class=\"token operator\">=<\/span>self<span class=\"token punctuation\">.<\/span>headers<span class=\"token punctuation\">)<\/span>\r\n <span class=\"token keyword\">return<\/span> response<span class=\"token punctuation\">.<\/span>content<span class=\"token punctuation\">.<\/span><span class=\"token function\">decode<\/span><span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>\r\n\r\n def <span class=\"token function\">parse_image_list<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">,<\/span> html_json_str<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n image_list <span class=\"token operator\">=<\/span> json<span class=\"token punctuation\">.<\/span><span class=\"token function\">loads<\/span><span class=\"token punctuation\">(<\/span>html_json_str<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">[<\/span><span class=\"token string\">'list'<\/span><span class=\"token punctuation\">]<\/span>\r\n total <span class=\"token operator\">=<\/span> json<span class=\"token punctuation\">.<\/span><span class=\"token function\">loads<\/span><span class=\"token punctuation\">(<\/span>html_json_str<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">[<\/span><span class=\"token string\">'total'<\/span><span class=\"token punctuation\">]<\/span>\r\n <span class=\"token keyword\">return<\/span> image_list<span class=\"token punctuation\">,<\/span> total\r\n\r\n def <span class=\"token function\">save_image<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">,<\/span> image_list<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n\r\n <span class=\"token keyword\">for<\/span> item <span class=\"token keyword\">in<\/span> image_list<span class=\"token punctuation\">:<\/span>\r\n response <span class=\"token operator\">=<\/span> requests<span class=\"token punctuation\">.<\/span><span class=\"token keyword\">get<\/span><span class=\"token punctuation\">(<\/span>item<span class=\"token punctuation\">[<\/span><span class=\"token string\">'thumb'<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">,<\/span> headers<span class=\"token operator\">=<\/span>self<span class=\"token punctuation\">.<\/span>headers<span class=\"token punctuation\">)<\/span>\r\n <span class=\"token keyword\">with<\/span> <span class=\"token function\">open<\/span><span class=\"token punctuation\">(<\/span>os<span class=\"token punctuation\">.<\/span>path<span class=\"token punctuation\">.<\/span><span class=\"token function\">join<\/span><span class=\"token punctuation\">(<\/span>BASE_URL<span class=\"token punctuation\">,<\/span> <span class=\"token string\">'%s\\%s.jpg'<\/span> <span class=\"token operator\">%<\/span> <span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">.<\/span>q<span class=\"token punctuation\">,<\/span> item<span class=\"token punctuation\">[<\/span><span class=\"token string\">'index'<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">'wb'<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token keyword\">as<\/span> f<span class=\"token punctuation\">:<\/span>\r\n f<span class=\"token punctuation\">.<\/span><span class=\"token function\">write<\/span><span class=\"token punctuation\">(<\/span>response<span class=\"token punctuation\">.<\/span>content<span class=\"token punctuation\">)<\/span>\r\n\r\n def <span class=\"token function\">run<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n self<span class=\"token punctuation\">.<\/span><span class=\"token function\">makedir<\/span><span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>\r\n <span class=\"token keyword\">while<\/span> self<span class=\"token punctuation\">.<\/span>num <span class=\"token operator\">&lt;<\/span> self<span class=\"token punctuation\">.<\/span>total<span class=\"token punctuation\">:<\/span>\r\n html_json_str <span class=\"token operator\">=<\/span> self<span class=\"token punctuation\">.<\/span><span class=\"token function\">parse_url<\/span><span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>\r\n image_list<span class=\"token punctuation\">,<\/span> self<span class=\"token punctuation\">.<\/span>total <span class=\"token operator\">=<\/span> self<span class=\"token punctuation\">.<\/span><span class=\"token function\">parse_image_list<\/span><span class=\"token punctuation\">(<\/span>html_json_str<span class=\"token punctuation\">)<\/span>\r\n self<span class=\"token punctuation\">.<\/span><span class=\"token function\">save_image<\/span><span class=\"token punctuation\">(<\/span>image_list<span class=\"token punctuation\">)<\/span>\r\n self<span class=\"token punctuation\">.<\/span>num <span class=\"token operator\">+=<\/span> <span class=\"token number\">100<\/span>\r\n <span class=\"token function\">print<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">.<\/span>num<span class=\"token punctuation\">)<\/span>\r\n\r\n\r\n<span class=\"token keyword\">if<\/span> __name__ <span class=\"token operator\">==<\/span> <span class=\"token string\">'__main__'<\/span><span class=\"token punctuation\">:<\/span>\r\n xxx <span class=\"token operator\">=<\/span> <span class=\"token function\">PictureDownload<\/span><span class=\"token punctuation\">(<\/span>NAME<span class=\"token punctuation\">)<\/span>\r\n xxx<span class=\"token punctuation\">.<\/span><span class=\"token function\">run<\/span><span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>\r\n\r\n<\/pre>\n<p><strong>\u641c\u72d7\u641c\u7d22<\/strong><\/p>\n<pre class=\"prism-token token language-javascript\"><span class=\"token keyword\">import<\/span> requests\r\n<span class=\"token keyword\">import<\/span> json\r\n<span class=\"token keyword\">import<\/span> urllib\r\n\r\n# \u4e09\u4e2a\u53c2\u6570\uff0c\u4f60\u8981\u83b7\u53d6\u6574\u4e2a\u56fe\u7247\u96c6\u7684\u540d\u5b57\uff0c\u4f60\u8981\u83b7\u53d6\u591a\u5c11\u5f20\uff0c\u83b7\u53d6\u8fc7\u6765\u7684\u653e\u5728\u54ea\u91cc\r\ndef <span class=\"token function\">getSogoulmag<\/span><span class=\"token punctuation\">(<\/span>category<span class=\"token punctuation\">,<\/span>length<span class=\"token punctuation\">,<\/span>path<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\r\n n<span class=\"token operator\">=<\/span>length\r\n cate<span class=\"token operator\">=<\/span>category\r\n # \u83b7\u53d6\u7684\u662f\u56fe\u7247\u6240\u6709\u4fe1\u606f\r\n imgs<span class=\"token operator\">=<\/span>requests<span class=\"token punctuation\">.<\/span><span class=\"token keyword\">get<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">'http:\/\/pic.sogou.com\/pics\/channel\/getAllRecomPicByTag.jsp?category='<\/span><span class=\"token operator\">+<\/span>cate<span class=\"token operator\">+<\/span>\r\n <span class=\"token string\">'&amp;tag=%E5%85%A8%E9%83%A8&amp;start=0&amp;len='<\/span><span class=\"token operator\">+<\/span><span class=\"token function\">str<\/span><span class=\"token punctuation\">(<\/span>n<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span>\r\n\r\n # \u8f6c\u6362\u6210\u4e3ajson\u683c\u5f0f\r\n jd<span class=\"token operator\">=<\/span>json<span class=\"token punctuation\">.<\/span><span class=\"token function\">loads<\/span><span class=\"token punctuation\">(<\/span>imgs<span class=\"token punctuation\">.<\/span>text<span class=\"token punctuation\">)<\/span>\r\n # all_items\u6240\u6709\u7684\u56fe\u7247\r\n jd<span class=\"token operator\">=<\/span>jd<span class=\"token punctuation\">[<\/span><span class=\"token string\">'all_items'<\/span><span class=\"token punctuation\">]<\/span>\r\n\r\n imgs_t<span class=\"token operator\">=<\/span><span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">]<\/span>\r\n <span class=\"token keyword\">for<\/span> j <span class=\"token keyword\">in<\/span> jd<span class=\"token punctuation\">:<\/span>\r\n # \u901a\u8fc7\u5b9a\u4f4dbthumbUrl\u83b7\u53d6\u56fe\u7247\r\n imgs_t<span class=\"token punctuation\">.<\/span><span class=\"token function\">append<\/span><span class=\"token punctuation\">(<\/span>j<span class=\"token punctuation\">[<\/span><span class=\"token string\">'bthumbUrl'<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">)<\/span>\r\n m<span class=\"token operator\">=<\/span><span class=\"token number\">0<\/span>\r\n <span class=\"token keyword\">for<\/span> img <span class=\"token keyword\">in<\/span> imgs_t<span class=\"token punctuation\">:<\/span>\r\n # \u6253\u5370\u67d0\u4e00\u5f20\u56fe\u7247\u6b63\u5728\u4e0b\u8f7d\r\n <span class=\"token function\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token function\">str<\/span><span class=\"token punctuation\">(<\/span>m<span class=\"token punctuation\">)<\/span><span class=\"token operator\">+<\/span><span class=\"token string\">'.jpg'<\/span><span class=\"token operator\">+<\/span><span class=\"token string\">'Downlod......'<\/span><span class=\"token punctuation\">)<\/span>\r\n # \u7528\u6765\u628a\u8fdc\u7a0b\u6570\u636e\u4e0b\u8f7d\u5230\u672c\u5730\r\n urllib<span class=\"token punctuation\">.<\/span>request<span class=\"token punctuation\">.<\/span><span class=\"token function\">urlretrieve<\/span><span class=\"token punctuation\">(<\/span>img<span class=\"token punctuation\">,<\/span>path<span class=\"token operator\">+<\/span><span class=\"token function\">str<\/span><span class=\"token punctuation\">(<\/span>m<span class=\"token punctuation\">)<\/span><span class=\"token operator\">+<\/span><span class=\"token string\">'.jpg'<\/span><span class=\"token punctuation\">)<\/span>\r\n m<span class=\"token operator\">=<\/span>m<span class=\"token operator\">+<\/span><span class=\"token number\">1<\/span>\r\n <span class=\"token function\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">'Complete!'<\/span><span class=\"token punctuation\">)<\/span>\r\n \r\n# \u8c03\u7528\u6574\u4e2a\u5904\u7406\u903b\u8f91\r\n# \u4e09\u4e2a\u53c2\u6570\uff0c\u4f60\u8981\u83b7\u53d6\u6574\u4e2a\u56fe\u7247\u96c6\u7684\u540d\u5b57\uff0c\u4f60\u8981\u83b7\u53d6\u591a\u5c11\u5f20\uff0c\u83b7\u53d6\u8fc7\u6765\u7684\u653e\u5728\u54ea\u91cc\r\n<span class=\"token function\">getSogoulmag<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">'\u6234\u53e3\u7f69'<\/span><span class=\"token punctuation\">,<\/span><span class=\"token number\">10<\/span><span class=\"token punctuation\">,<\/span><span class=\"token string\">'.\/sougoutupian\/'<\/span><span class=\"token punctuation\">)<\/span><\/pre>\n<p><strong>\u722c\u53d6360\u56fe\u7247\u7684\u8fc7\u7a0b\u5982\u56fe1\u6240\u793a\uff1a<\/strong><\/p>\n<figure>\n<div class=\"image-block\"><img decoding=\"async\" class=\"\" data-original=\"https:\/\/ask.qcloudimg.com\/http-save\/yehe-1249275\/0sg5jzfn1c.gif\" src=\"https:\/\/byy3.com\/wp-content\/themes\/MNews%20V2.4\/images\/post-loading.gif\" title=\"\u4f7f\u7528python\u6279\u91cf\u722c\u53d6\u4e3b\u6d41\u641c\u7d22\u5f15\u64ce\u56fe\u7247\u63d2\u56fe\" alt=\"\u4f7f\u7528python\u6279\u91cf\u722c\u53d6\u4e3b\u6d41\u641c\u7d22\u5f15\u64ce\u56fe\u7247\u63d2\u56fe\" \/><\/div>\n<\/figure>\n<p>\u56fe1 \u722c\u53d6360\u56fe\u7247\u5168\u8fc7\u7a0b<\/p>\n<p>\u6211\u4eec\u53ef\u4ee5\u770b\u5230\uff0c\u4f7f\u7528 pycharm\u8fd0\u884c\u7a0b\u5e8f\u540e\uff0c\u56fe\u7247\u9646\u7eed\u5f00\u59cb\u4e0b\u8f7d\uff0c\u5f53\u7136\uff0c\u6709\u4e9b\u56fe\u7247\u662f\u5e72\u6270\u6570\u636e\uff0c\u9700\u8981\u624b\u52a8\u6e05\u7406\u6389\uff0c\u76f8\u6bd4\u4e8e\u4e00\u5f20\u5f20\u4e0b\u8f7d\uff0c\u8fd8\u662f\u65b9\u4fbf\u5f88\u591a\u7684\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6700\u8fd1\u5728\u505a\u4e00\u4e2a\u53e3\u7f69\u8bc6\u522b\u7684\u5e94\u7528\uff0c\u9700\u8981\u5f88\u591a\u6234\u53e3\u7f69\u7684\u4eba\u7684\u56fe\u7247\u4f5c\u4e3a\u6570\u636e\u8bad\u7ec3\u6a21\u578b\uff0c\u56e0\u516c\u53f8\u6ca1\u6709\u63d0\u4f9b\u6570\u636e\uff0c\u53ea\u80fd\u6211\u4eec\u81ea\u5df1\u7528pyt [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[20],"tags":[33,352,61,317,51],"class_list":["post-798","post","type-post","status-publish","format-standard","hentry","category-python","tag-python"],"_links":{"self":[{"href":"https:\/\/byy3.com\/index.php?rest_route=\/wp\/v2\/posts\/798","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/byy3.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/byy3.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/byy3.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/byy3.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=798"}],"version-history":[{"count":0,"href":"https:\/\/byy3.com\/index.php?rest_route=\/wp\/v2\/posts\/798\/revisions"}],"wp:attachment":[{"href":"https:\/\/byy3.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=798"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/byy3.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=798"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/byy3.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=798"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}