Advertisement

爬取京东图书Java实现

阅读量:

案例demo

使用的是jsoup.jar包

复制代码
 <dependency>

    
         <groupId>org.jsoup</groupId>
    
         <artifactId>jsoup</artifactId>
    
         <version>1.10.2</version>
    
     </dependency>
复制代码
  
    
 import org.jsoup.Jsoup;
    
 import org.jsoup.nodes.Document;
    
 import org.jsoup.nodes.Element;
    
 import org.jsoup.select.Elements;
    
  
    
 import java.io.IOException;
    
 import java.net.MalformedURLException;
    
 import java.net.URL;
    
  
    
 public class HtmlParseUtil {
    
  
    
     public void test1() throws IOException {
    
     //可以通过字符串拼接的方法,自定义爬取商品
    
     //"https://search.jd.com/Search?keyword="+搜索的商品
    
      String url  = "https://search.jd.com/Search?keyword=java";
    
     Document parse = Jsoup.parse(new URL(url), 30000);
    
     Element elementById = parse.getElementById("J_goodsList");
    
    // System.out.println(elementById);
    
     Elements li = elementById.getElementsByTag("li");
    
  
    
  
    
     for (Element el: li) {
    
         String img  =  el.getElementsByTag("img").eq(0).attr("src");
    
         String price = el.getElementsByClass("p-price").eq(0).text().replaceAll("¥","");
    
         String title = el.getElementsByClass("curr-shop hd-shopname").eq(0).attr("title");
    
         String s  =  el.getElementsByClass("promo-words").eq(0).text();
    
         String em = el.getElementsByClass("p-name").eq(0).text();
    
  
    
  
    
         System.out.println(img);
    
         System.out.println(title);
    
         System.out.println(em.replaceAll(s,""));
    
         System.out.println(price);
    
         System.out.println("========================");
    
     }
    
     }
    
  
    
  
    
     public static void main(String[] args) throws IOException {
    
     test();
    
     }
    
 }

爬取的数据就是解析出html , 根据html中的id , class , div 属性进行筛选爬取

全部评论 (0)

还没有任何评论哟~