Advertisement

python学习之使用selenium爬取豆瓣图书信息

阅读量:

使用selenium爬取豆瓣图书信息

复制代码
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.edge.options import Options
    
     
    
    
    # 设定函数,获取所有图书信息
    def all_bookonfo(driver, fp):
    url = "https://book.douban.com/tag/%E4%B8%AD%E5%9B%BD%E6%96%87%E5%AD%A6"
    driver.get(url)
    a_list = driver.find_elements(By.XPATH, "//ul[@class='subject-list']/li/div[2]")
    # @href
    for url in a_list:
        bookname = url.find_element(By.XPATH, "./h2/a").text
        info = url.find_element(By.XPATH, "./div[1]").text
        #将一行信息切分为不同模块
        writer = info.split("/")[0]
        publication = info.split("/")[1]
        year = info.split("/")[2]
        #去除原数据中的”元“
        price = info.split("/")[3].replace("元", " ")
        value = url.find_element(By.XPATH, "./div[2]/span[2]").text
        print(bookname, writer, publication, year, price, value)
        #写入文本中
        fp.write(bookname+","+writer+","+publication+","+year+","+price+","+value+"\n")
    
    
    
    
    if __name__ == '__main__':
    
    #使得运行时不出现浏览器界面
    opt = Options()
    opt.add_argument("--headless")
    driver = webdriver.Edge(opt)
    # 爬取豆瓣图书的信息
    fp = open("./doubantushu.txt3", "w+", encoding="utf-8")
    
    # 调用获取所有图书信息的方法
    all_bookonfo(driver, fp)
    
    fp.close()
    
    
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    

全部评论 (0)

还没有任何评论哟~