2018년 11월 9일 금요일

[Java] Html Parser Example



1. Html Parser Example (JSoup)
1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import java.io.File;
import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class HtmlParser {

    Document doc;
    
    public HtmlParser() {
        
    }
    
    public void loadUrl(String url) throws IOException {
        doc = Jsoup.connect(url).get(); //"http://example.com/"
    }

    public void loadFile(String file) throws IOException {
        File input = new File(file); //"/tmp/input.html"
        doc = Jsoup.parse(input, "UTF-8"); //, baseUrl
    }
    
    public void loadString(String string) throws IOException {
        doc = Jsoup.parse(string); //, baseUrl
    }
    
    public String getTitle() {
        return doc.title();
    }

    public Elements getElementsByTag(String tag) {
        return doc.getElementsByTag(tag);
    }
    
    public Elements getLinks() {
        Elements links = doc.getElementsByTag("a");
        /*
        for (Element link : links) {
          String linkHref = link.attr("href");
          String linkText = link.text();
          System.out.println(linkHref + ", " + linkText);
        }
        */
        return links;
    }
    
    public Elements getImages() {
        Elements links = doc.getElementsByTag("img");
        /*
        for (Element link : links) {
          String linkHref = link.attr("href");
          String linkText = link.text();
          System.out.println(linkHref + ", " + linkText);
        }
        */
        return links;
    }

}

댓글 없음:

댓글 쓰기