Web Crawling with Jsoap library
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
public void WebCrawlingTest() throws IOException {
String URL = "https://news.daum.net/";
Document doc;
try {
doc = Jsoup.connect(URL).get();
Elements els = doc.select(".item_issue a");
for (Element el : els) {
String href = el.attr("href");
if (!el.text().equals("")) {
System.out.println("title : " + el.text()+" news link : "+href);
}
}
} catch (IOException e) {
e.printStackTrace();
}
}