最近在看小甲鱼的爬虫视频,试着自己写了一下:
from urllib import request
import os
import re
from urllib.request import urlretrieve
#import Requests
def get_picaddress(html,fold,i):
img_list = re.findall(r'src="(//.*?\.(?:jpg|png))"',html)
count = 0
for ad in img_list:
# print(ad)
address = "http:"+ad
# print(address)
picname = str(i)+"_"+str(count)+"."+address.split(".")[-1]
# print(picname)
urlretrieve(address,fold+"/"+picname)
count+=1
def get_html(url):
headers ={}
headers["User-Agent"]="Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
res = request.Request(url,headers=headers)
response = request.urlopen(res)
html = response.read().decode("utf-8")
return html
def downloadpic(fold="picfold",page=10):
if os.path.exists(fold):
os.removedirs(fold)
os.makedirs(fold,exist_ok=True)
url = "http://jandan/ooxx"
regex = repile(r'href="(.*?)" class="previous-comment-page"')
for i in range(10):
if i ==0:
pass
else:
html = get_html(url)
preurl = regex.findall(html)[0]
# print(preurl)
url = "http:"+preurl
html = get_html(url)
get_picaddress(html,fold,i)
if __name__ == "__main__":
downloadpic()
最后得到了很多漂亮小姐姐的图片。
由此记录。
更多推荐
煎蛋网图片爬虫
发布评论