煎蛋图片爬虫

煎蛋图片的url之前页数是page-1 page-2，现在变成随机串+序列了。

from bs4 import BeautifulSoup
import requests
import re

def download(img_url,headers,n):
    req = requests.get(img_url, headers=headers)
    name = '%s'%n+'='+img_url[-15:]
    path = r'E:\jandan'
    file_name = path + '\\' + name
    f = open(file_name, 'wb')
    f.write(req.content)
    f.close




def get(url):
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"}
    target = url
    while 1:
        req = requests.get(url = target)
        html = req.text
        bf = BeautifulSoup(html)
        page = 0
        count=0
        for k in bf.find_all('a', class_ = 'view_img_link'):
            img_url = k['href']
            img_url = 'http:'+img_url
            print(img_url)#查a标签的href值
            count=count+1
            download(img_url,headers,count)
        print(target)
        for k in bf.find_all('a', class_ = 'previous-comment-page'):
            next = k['href']
            next = 'http:'+next
            target = next
            get(target)
        page = page+1
get('http://jandan/ooxx/')

更多推荐

煎蛋图片爬虫

煎蛋图片爬虫

发布评论取消回复

最近发表

热门文章

标签列表

煎蛋图片爬虫

相关文章

发布评论取消回复

最近发表

热门文章

标签列表