最近在看小甲鱼的爬虫视频,试着自己写了一下:

from urllib import request
import os
import re
from urllib.request import urlretrieve
#import Requests


def get_picaddress(html,fold,i):
    img_list = re.findall(r'src="(//.*?\.(?:jpg|png))"',html)
    count = 0
    for ad in img_list:
#        print(ad)
        address = "http:"+ad
#        print(address)
        picname = str(i)+"_"+str(count)+"."+address.split(".")[-1]
#        print(picname)
        urlretrieve(address,fold+"/"+picname)
        count+=1
        
def get_html(url):
    headers ={}
    headers["User-Agent"]="Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
    res = request.Request(url,headers=headers)
    response = request.urlopen(res)
    html = response.read().decode("utf-8")
    return html
    
def downloadpic(fold="picfold",page=10):
    if os.path.exists(fold):
        os.removedirs(fold)
    os.makedirs(fold,exist_ok=True)
    url = "http://jandan/ooxx"
    regex = repile(r'href="(.*?)" class="previous-comment-page"')
    for i in range(10):
        if i ==0:
            pass
        else:
            html = get_html(url)
            preurl = regex.findall(html)[0]
#            print(preurl)
            url = "http:"+preurl
        html = get_html(url)
        get_picaddress(html,fold,i)
              
if __name__ == "__main__":
    downloadpic()

最后得到了很多漂亮小姐姐的图片。

由此记录。

更多推荐

煎蛋网图片爬虫