需求:
抓取一个网页(比如www.lianhehuishang)中的url地址,并存到F:\spider_url.txt文件中。
程序:
package com.zheng;
import java.io.BufferedReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.MalformedURLException;
import java.URL;
import java.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class WebSpider {
public static void main(String[] args) {
URL url = null;
URLConnection urlconn = null;
BufferedReader br = null;
PrintWriter pw = null;
String regex = "http://[\\w+\\.?/?]+\\.[A-Za-z]+";
Pattern p = Patternpile(regex);
try {
url = new URL("http://www.lianhehuishang/");
urlconn = url.openConnection();
pw = new PrintWriter(new FileWriter("f:\\spider_url.txt"), true);
br = new BufferedReader(new InputStreamReader(
urlconn.getInputStream()));
String buf = null;
while ((buf = br.readLine()) != null) {
Matcher buf_m = p.matcher(buf);
while (buf_m.find()) {
pw.println(buf_m.group());
}
}
System.out.println("获取成功!");
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
pw.close();
}
}
}
运行结果:
打开F:\spider_url.txt
更多推荐
Java实现网络爬虫入门Demo
发布评论