import requests
from fake_useragent import UserAgent
from lxml import etree
class Spider(object):
"""爬取我要自学网会员中心的账号信息
第一步:从登录界面的源码中提取token的值,用于后面发送post请求;
第二步:输入账号和密码,完善data表单数据,发送post请求,使session对象保存登录的cookie信息
第三步:发送get请求,解析会员中心数据
"""
def __init__(self):
self.login_url = 'https://www.51zxw/login' # 登录界面url
self.post_url = 'https://www.51zxw/login/NewLogin/AjaxForlogin' # 发送post请求url
self.url = 'https://www.51zxw/Login/UserCenter' # 会员中心url
self.headers = {'User-Agent': UserAgent().random}
self.s = requests.session() # 创建session对象
def get_token(self):
"""获取登录界面中的token的值"""
r = self.s.get(self.login_url, headers=self.headers).content
html = etree.HTML(r)
return html.xpath(r'//input[@name="__RequestVerificationToken"]/@value')[0] # 得到token的值
def get_cookie(self):
"""发送post请求,使session对象保存cookie信息"""
user = input('输入账号:')
password = input('输入密码:')
data = {'loginStr': user,
'pwd': password,
'__RequestVerificationToken': self.get_token(),
'isRememberlogin': 'false'
}
self.s.post(self.post_url, data=data, headers=self.headers)
def get_data(self):
"""根据cookie获取会员中心的账号数据"""
r = self.s.get(self.url, headers=self.headers).content
html = etree.HTML(r)
item = {}
item['member_name'] = html.xpath(r'//div[@class="Others"]/div[1]/b/text()')[0]
item['member_type'] = html.xpath(r'//div[@class="Others"]/div[2]/text()')[0][5:]
item['register_date'] = html.xpath(r'//div[@class="Others"]/div[4]/text()')[0][4:]
item['consume_V'] = html.xpath(r'//div[@class="webCount"]//li[3]//div[2]/text()')[0]
item['remain_V'] = html.xpath(r'//div[@class="webCount"]//li[3]//div[3]/text()')[0]
print(item)
def main():
spider = Spider()
spider.get_token()
spider.get_cookie()
spider.get_data()
if __name__ == '__main__':
main()
更多推荐
通过session爬取我要自学网会员中心的账号信息
发布评论