煎蛋图片的url之前页数是page-1 page-2,现在变成随机串+序列了。
from bs4 import BeautifulSoup
import requests
import re
def download(img_url,headers,n):
req = requests.get(img_url, headers=headers)
name = '%s'%n+'='+img_url[-15:]
path = r'E:\jandan'
file_name = path + '\\' + name
f = open(file_name, 'wb')
f.write(req.content)
f.close
def get(url):
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"}
target = url
while 1:
req = requests.get(url = target)
html = req.text
bf = BeautifulSoup(html)
page = 0
count=0
for k in bf.find_all('a', class_ = 'view_img_link'):
img_url = k['href']
img_url = 'http:'+img_url
print(img_url)#查a标签的href值
count=count+1
download(img_url,headers,count)
print(target)
for k in bf.find_all('a', class_ = 'previous-comment-page'):
next = k['href']
next = 'http:'+next
target = next
get(target)
page = page+1
get('http://jandan/ooxx/')
更多推荐
煎蛋图片爬虫
发布评论