Advertisement

使用selenium获取列表数据并展示;获取标题-图片-超链接数据

阅读量:

代码:

复制代码
 from selenium import webdriver

    
 from selenium.webdriver.chrome.service import Service
    
 from selenium.webdriver.chrome.options import Options
    
 from selenium.webdriver.common.by import By
    
 from selenium.webdriver.support.ui import WebDriverWait
    
 from selenium.webdriver.support import expected_conditions as EC
    
 import time
    
  
    
 # 设置 Chrome 浏览器选项
    
 chrome_options = Options()
    
 chrome_options.add_argument("--start-maximized")  # 启动时最大化窗口
    
  
    
 # 指定 ChromeDriver 路径
    
 driver_path = r'D:\software\chromedriver\chromedriver127\chromedriver-win64\chromedriver.exe'
    
  
    
  
    
 # 定义公共常量
    
 BASE_URL = "http://www.***.com"
    
 TARGET_XPATH = "/html/body/div[6]/ul[1]/li/font[1]/b"
    
 SUCCESS_TEXT = "5"
    
 NEW_PAGE_URL = "http://www.***.com/Home/Category/index/fClass/295.html"
    
 LI_TARGET_XPATH = "//ul[@class='class_gdlist_frm']//li"
    
  
    
  
    
 # 创建 Service 对象并传入 ChromeDriver 路径
    
 service = Service(executable_path=driver_path)
    
  
    
 # 启动浏览器并打开百度首页
    
 driver = webdriver.Chrome(service=service, options=chrome_options)
    
 driver.get(BASE_URL)
    
  
    
  
    
  
    
 # 等待页面加载完成,假设等待某个元素加载完成
    
 # 企查查已陪伴您1428天
    
 WebDriverWait(driver, 600).until(
    
     EC.presence_of_element_located((By.XPATH, TARGET_XPATH))
    
 )
    
  
    
 # 获取目标元素
    
 element = driver.find_element(By.XPATH, TARGET_XPATH)
    
  
    
 # 获取并打印该元素的文本内容
    
 element_text = element.text
    
 if element_text == SUCCESS_TEXT:
    
  
    
     driver.get(NEW_PAGE_URL)  # 跳转到新页面
    
  
    
  
    
 # 等待页面加载完成
    
 time.sleep(2)  # 等待 2 秒以确保页面加载
    
  
    
 # 查找所有商品的 li 项目
    
 items = driver.find_elements(By.XPATH, LI_TARGET_XPATH)
    
  
    
 # 遍历每个 li 项目,提取数据
    
 for item in items:
    
     try:
    
     # 获取标题
    
     title_element = item.find_element(By.XPATH, ".//a[@title]")
    
     title = title_element.get_attribute("title")
    
  
    
     # 获取价格
    
     price_element = item.find_element(By.XPATH, ".//span//big")
    
     price = price_element.text
    
  
    
     # 获取图片链接
    
     image_element = item.find_element(By.XPATH, ".//img")
    
     image_url = image_element.get_attribute("src")
    
  
    
     # 获取商品详情链接
    
     detail_link_element = item.find_element(By.XPATH, ".//a[@title]")
    
     detail_link = detail_link_element.get_attribute("href")
    
  
    
     # 打印提取的数据
    
     print(f"标题: {title}")
    
     print(f"价格: {price}")
    
     print(f"图片: {image_url}")
    
     print(f"超链接: {detail_link}")
    
     print("-" * 50)  # 分隔符
    
  
    
     except Exception as e:
    
     print(f"Error occurred: {e}")
    
  
    
  
    
 print("Element Text:", element_text)
    
  
    
  
    
  
    
 # 等待一段时间,保持浏览器开启
    
 input("Press Enter to close the browser...")
    
  
    
 # 关闭浏览器
    
 # driver.quit()
![](https://ad.itadn.com/c/weblog/blog-img/images/2025-08-16/zqW4OrjES3mNFhY97kuIpiZCctV1.png)

解释:

为什么有些时候获取到数据有些时候没有获取到并报错:这是因为有些时候遍历下的元素不规整,没有获取到对应的标签导致的:

全部评论 (0)

还没有任何评论哟~