webCrawling | JeongKeepsCalm

webCrawling

Web Crawling with Jsoap library

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
public void WebCrawlingTest() throws IOException {

  String URL = "https://news.daum.net/";
  Document doc;

  try {
      doc = Jsoup.connect(URL).get();
      Elements els = doc.select(".item_issue a");
      for (Element el : els) {
          String href = el.attr("href");
          if (!el.text().equals("")) {
              System.out.println("title : " + el.text()+" news link : "+href);
          }
      }
  } catch (IOException e) {
      e.printStackTrace();
  }

}