import requests
import bs4
import base64
import urllib.request
num_photo = 1
def download_photo(url , num ):
global num_photo
response = urllib.request.urlopen(url)
cat = response.read()
with open( 'I:/a/' + num + '.jpg' , 'wb') as f :
f.write(cat)
a = num_photo
print("当前已下载第%d张" % a)
num_photo = num_photo + 1
def get_url(url): # 下载这个网页
headers = { "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6735.400 QQBrowser/10.2.2614.400" }
res = requests.get(url , headers = headers)
return res
def get_soup(res):
soup = bs4.BeautifulSoup(res.text , "html.parser")
return soup
def get_page(num):
# url = input("请输入一个url:")
url = "http://jandan/ooxx/page-" + str(num) + "#comments"
large_url = "http://wx2.sinaimg/large/"
res = get_url(url)
# with open("date.txt","w" , encoding ='utf-8') as file:
# file.write(res.text)
# print(res.text)
soup = get_soup(res)
num = 0
for i in soup.select('.img-hash'):
#print(i.text)
num = num + 1
link = base64.b64decode(i.text.encode('utf-8'))
# print(link)
B_link = str(link , 'utf-8').split('/')[-1]
#去找到他的哈希码
#print(B_link)
New_url = large_url + B_link
#print(New_url)
download_photo(New_url , B_link , )
def main():
print("you should input tow number to request download some picture what you like:")
num = input("请输入要下载煎蛋网妹子图的页数:(当前输入第一个数字)")
num1 = input("请输入要下载煎蛋网妹子图的页数:(当前输入第二个数字)")
for each in range(int(num) , int(num1)):
get_page(each)
print("下载完成!")
if __name__ == "__main__":
main()
更多推荐
爬取煎蛋网妹子图片
发布评论