1. Html Parser Example (JSoup)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | import java.io.File; import java.io.IOException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class HtmlParser { Document doc; public HtmlParser() { } public void loadUrl(String url) throws IOException { doc = Jsoup.connect(url).get(); //"http://example.com/" } public void loadFile(String file) throws IOException { File input = new File(file); //"/tmp/input.html" doc = Jsoup.parse(input, "UTF-8"); //, baseUrl } public void loadString(String string) throws IOException { doc = Jsoup.parse(string); //, baseUrl } public String getTitle() { return doc.title(); } public Elements getElementsByTag(String tag) { return doc.getElementsByTag(tag); } public Elements getLinks() { Elements links = doc.getElementsByTag("a"); /* for (Element link : links) { String linkHref = link.attr("href"); String linkText = link.text(); System.out.println(linkHref + ", " + linkText); } */ return links; } public Elements getImages() { Elements links = doc.getElementsByTag("img"); /* for (Element link : links) { String linkHref = link.attr("href"); String linkText = link.text(); System.out.println(linkHref + ", " + linkText); } */ return links; } } |
댓글 없음:
댓글 쓰기