java 爬虫 下一页,Python + selenium 爬虫，点击下一页后，页面依然显示本页的数据？...

用代码里换页的部分，对页面中页码的输入框进行清空，输入页码，点击跳页的操作，没有任何报错，但是页面的数据始终是第一页的数据，请问要怎么处理？

代码：

import time

from datetime import datetime

from selenium import webdriver

# from selenium.webdriver import ChromeOptions

from selenium.webdriver.chrome.options import Options

CHROME_DRIVER = 'chromedriver.exe'

URL_ = 'https://red.library.sh/searchInstance'

TXT_FILE = '文献.txt' # TODO

MAX_PAGE = 851 # TODO

MAX_ITEM = 8505 # TODO

class RedLib:

def __init__(self):

self.chrome_options = Options()

# self.chrome_options.add_argument('--headless')

self.chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])

self.chrome_options.add_experimental_option('useAutomationExtension', False)

self.chrome_options.add_argument("--disable-web-security")

prefs = {"profile.managed_default_content_settings.images": 2}

self.chrome_options.add_experimental_option("prefs", prefs)

self.driver = webdriver.Chrome(executable_path=CHROME_DRIVER, options=self.chrome_options)

self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {

"source": """

Object.defineProperty(navigator, 'webdriver', {

get: () => undefined

})

"""

})

def handle_info(self, start_page):

self.driver.get(URL_)

time.sleep(5)

for now_page in range(start_page, 6): # TODO MAX_PAGE + 1

print(f'\n当前正处理第{now_page}页，{now_page / MAX_PAGE * 100:.2f}%，{str(datetime.now()).split(" ")[1].split(".")[0]}')

time.sleep(2)

# 换页操作

self.driver.find_element_by_xpath('//input[@class="searchInput"]').clear()

self.driver.find_element_by_xpath('//input[@class="searchInput"]').send_keys(f'{now_page}')

self.driver.find_element_by_xpath('//div[@class="pagination"]/button').click()

time.sleep(2)

items = 5 if now_page == MAX_PAGE else 10

for item in range(1, items + 1):

info = self.driver.find_element_by_xpath(f'//div[@class="s_right"]/div[{item}]/div[@class="book"]').text

info_text = info.replace('\n', '&&').split('&&更多版本信息')[0]

print(item, info_text)

with open(TXT_FILE, 'a', encoding='utf-8') as tf:

tf.write(info_text)

tf.write('\n')

# self.driver.find_element_by_xpath('//button[@class="btn-next"]').click()

self.driver.quit()

if __name__ == '__main__':

start_page = 1

redlib = RedLib()

redlib.handle_info(start_page)

更多推荐

java 爬虫下一页,Python + selenium 爬虫,点击下一页后,页面依然显示本页的数据？...

java 爬虫下一页,Python + selenium 爬虫，点击下一页后，页面依然显示本页的数据？...

发布评论取消回复

最近发表

热门文章

标签列表