爬取页面的json数据
package pachong;
导入java.util.reading.BuffedReader;
导入java.util_printing.FileOutputstream;
导入java.util_writing.Writer;
导入java.lang exceptions.IOException;
导入java.util.InputStream;
导入java.lang Reading.InputStreamReader;
导入java.util OutputStreamWriter;
导入javax.net URLConnection MalformedURLException exception handling ;
导入javax.net URL class ;
导入javax.net URLConnection class ;
public class Spider {public static void main(String[] args) {
String filepath = "d:/125.html";
字符串 url_str 赋值为 'http://aqicn.org/forecast/beijing/cn/'。
URL对象 url 初始化为null。
进入 try 块。
通过字符串 url_str 创建一个新的 URL对象 url。
在捕获 Mal(nodesourceException e)时 {
打印该 Mal(nodesourceException 的详细信息。
}
charSet被赋值为"utf-8";
定义整数sec_cont为1000;
尝试执行操作:
通过getUrl方法获取URL连接;
配置输出功能;
设定读超时值为sec_cont乘以10;
配置首字符属性为"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)";
创建InputStream对象并将其赋值给htm_in变量;
将输入的HTM数据通过指定字符编码转换为字符串类型。
调用saveHtml函数并将其参数分别赋值为filepath和htm_str。
} catch (IOException e) {
e.printStackTrace();
}
}
/**
- Method: saveHtml
- Description: save String to file
- @param filepath
- file path which need to be saved
- @param str
- string saved
*/
public static void saveHtml(String filepath, String str){
try {
/@SuppressWarnings("resource")
FileWriter fw = new FileWriter(filepath);
fw.write(str);
fw.flush();/
OutputStreamWriter outs = new OutputStreamWriter(new FileOutputStream(filepath, true), "utf-8");
outs.write(str);
System.out.print(str);
outs.close();
} catch (IOException e) {
System.out.println("Error at save html...");
e.printStackTrace();
}
}
/**
- Method: InputStream2String
- Description: make InputStream to String
- @param in_st
- inputstream which need to be converted
- @param charset
- encoder of value
- @throws IOException
- if an error occurred
*/
public static String InputStream2String(InputStream in_st,String charset) throws IOException{
BufferedReader buff = new BufferedReader(new InputStreamReader(in_st, charset));
StringBuffer res = new StringBuffer();
String line = "";
while((line = buff.readLine()) != null){
res.append(line);
}
return res.toString();
}
}
