1. ToonKor Example
package com.zdiv.jlib.app.WebToon;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.zdiv.jlib.base.Encoding;
import com.zdiv.jlib.base.FileUtility;
public class TookKor {
final static boolean debug = true;
final static String filter = null; //"jpg";
public static Document getJsoupDocument(String url) throws InterruptedException {
while( true ) {
try {
return Jsoup.connect(url).get();
} catch ( Exception e ) {
e.printStackTrace();
Thread.sleep(1000);
}
}
}
public static void getTookKor(String comicsUrl, String baseUrl, String baseDir)
throws InterruptedException, MalformedURLException, IOException {
Document doc_toc = getJsoupDocument(comicsUrl);
if( debug ) {
String text = doc_toc.text();
String html = doc_toc.html();
//System.out.println(html);
FileUtility.StringToFile("D:/aa.html",html);
}
Element title_table = doc_toc.select("table[class=bt_view2]").first();
Elements title_list = title_table.select("td[class=bt_title]"); //"td[class=episode__index]"
String doc_title = title_list.get(0).text();
System.out.println(doc_title);
Element table = doc_toc.select("table[class=web_list]").first();
Elements list = table.select("td[class=content__title]"); //"td[class=episode__index]"
File dir = new File(baseDir,doc_title.replaceAll("[?/:]","_"));
dir.mkdirs();
int i = 1;
for( Element e : list ) {
//if( ++i < 207 ) continue;
try {
String url = baseUrl + e.attr("data-role");
String img_title = e.attr("alt");
//System.out.println(url);
System.out.println(img_title);
Document doc_img = getJsoupDocument(url);
String html_img = doc_img.html();
if( debug ) {
//System.out.println(html_img);
FileUtility.StringToFile(String.format("D:/aa_%03d.html",i++),html_img);
}
int begin = html_img.indexOf("var tnimg = '");
int end = html_img.indexOf("';",begin);
String data = html_img.substring(begin + 13, end);
String img_list = new String(Encoding.decodeBase64(data));
Document doc_imgs = Jsoup.parse(img_list);
Elements imgs = doc_imgs.select("img");
File subdir = new File(dir.getPath(),img_title.replaceAll("[?/:]","_"));
subdir.mkdirs();
int k = 1;
for( Element img : imgs ) {
String img_url = img.attr("src");
if( filter == null || img_url.endsWith(filter) ) {
if( ! img_url.startsWith("http") ) {
img_url = baseUrl + img_url;
}
String file_name = String.format("img_%04d.jpg",k++);
System.out.println( img_url + " -> " + file_name );
FileUtility.urlToFile5(img_url,subdir.getPath() + "/" + file_name);
}
}
} catch( Exception e1 ){
e1.printStackTrace();
}
}
}
public static void main(String[] args) throws InterruptedException, MalformedURLException, IOException {
String[] url = {
"https://tkr034.com/webtoon/505",
//"https://tkor.mobi/%EB%AA%A8%EA%B8%B0%EB%96%BC",
};
String iurl = "https://tkr034.com";
String dir = "D:/Temp3/";
for( String u : url ) {
getTookKor(u, iurl, dir);
System.out.println( "END" );
}
}
}
package com.zdiv.jlib.app.WebToon; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.zdiv.jlib.base.Encoding; import com.zdiv.jlib.base.FileUtility; public class TookKor { final static boolean debug = true; final static String filter = null; //"jpg"; public static Document getJsoupDocument(String url) throws InterruptedException { while( true ) { try { return Jsoup.connect(url).get(); } catch ( Exception e ) { e.printStackTrace(); Thread.sleep(1000); } } } public static void getTookKor(String comicsUrl, String baseUrl, String baseDir) throws InterruptedException, MalformedURLException, IOException { Document doc_toc = getJsoupDocument(comicsUrl); if( debug ) { String text = doc_toc.text(); String html = doc_toc.html(); System.out.println(html); FileUtility.StringToFile("D:/aa.html",html); } Element table = doc_toc.select("table[class=web_list]").first(); Elements list = table.select("td[class=episode__index]"); File dir = new File(baseDir,doc_toc.title().replaceAll("[?/:]","_")); dir.mkdirs(); System.out.println(doc_toc.title()); int i = 1; for( Element e : list ) { //if( ++i < 97 ) continue; try { String url = baseUrl + e.attr("data-role"); System.out.println(url); Document doc_img = getJsoupDocument(url); String html_img = doc_img.html(); if( debug ) { System.out.println(html_img); FileUtility.StringToFile(String.format("D:/aa_%03d.html",i++),html_img); } int begin = html_img.indexOf("var toon_img = '"); int end = html_img.indexOf("';",begin); String data = html_img.substring(begin + 16, end); String img_list = new String(Encoding.decodeBase64(data)); Document doc_imgs = Jsoup.parse(img_list); Elements imgs = doc_imgs.select("img"); System.out.println(doc_img.title()); File subdir = new File(dir.getPath(),doc_img.title().replaceAll("[?/:]","_")); subdir.mkdirs(); System.out.println(doc_img.title()); int k = 1; for( Element img : imgs ) { String img_url = img.attr("src"); if( filter == null || img_url.endsWith(filter) ) { if( ! img_url.startsWith("http") ) { img_url = baseUrl + img_url; } String file_name = String.format("img_%04d.jpg",k++); System.out.println( img_url + " -> " + file_name ); FileUtility.urlToFile5(img_url,subdir.getPath() + "/" + file_name); } } } catch( Exception e1 ){ } } } public static void main(String[] args) throws InterruptedException, MalformedURLException, IOException { String[] url = { //"https://tkor.lol/%EB%8F%84%EB%B0%95%EB%AC%B5%EC%8B%9C%EB%A1%9D_%EC%B9%B4%EC%9D%B4%EC%A7%80", //도박묵시록 카이지 "https://tkor.lol/%EC%A4%91%EA%B0%84%EA%B4%80%EB%A6%AC%EB%A1%9D_%ED%86%A0%EB%84%A4%EA%B0%80%EC%99%80", //토네가와 //"https://tkor.lol/%EC%97%B4%ED%98%88%EA%B0%95%ED%98%B8", //열혈강호 //"https://tkor.lol/%EC%9A%A9%EB%B9%84%EB%B6%88%ED%8C%A8", //용비불패 //"https://tkor.lol/%EB%93%9C%EB%9E%98%EA%B3%A4%EB%B3%BC", //드래곤볼 //"https://tkor.lol/%EB%93%9C%EB%9E%98%EA%B3%A4%EB%B3%BC-%EC%8A%88%ED%8D%BC", //드래곤볼 슈퍼 }; String iurl = "https://tkor.lol"; String dir = "D:/Temp2/"; for( String u : url ) { getTookKor(u, iurl, dir); } } }
package com.zdiv.jlib.app.WebToon; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.zdiv.jlib.base.Encoding; import com.zdiv.jlib.base.FileUtility; public class TookKor { final static boolean debug = false; final static String filter = null; //"jpg"; public static Document getJsoupDocument(String url) throws InterruptedException { while( true ) { try { return Jsoup.connect(url).get(); } catch ( Exception e ) { e.printStackTrace(); Thread.sleep(1000); } } } public static void getTookKor(String comicsUrl, String baseUrl, String baseDir) throws InterruptedException, MalformedURLException, IOException { Document doc_toc = getJsoupDocument(comicsUrl); if( debug ) { String text = doc_toc.text(); String html = doc_toc.html(); System.out.println(html); FileUtility.StringToFile("D:/aa.html",html); } Element table = doc_toc.select("table[class=web_list]").first(); Elements list = table.select("td[class=episode__index]"); File dir = new File(baseDir,doc_toc.title().replaceAll("[?/:]","_")); dir.mkdirs(); int i = 1; for( Element e : list ) { //if( i++ < 38 ) continue; try { String url = baseUrl + e.attr("data-role"); System.out.println(url); Document doc_img = getJsoupDocument(url); String html_img = doc_img.html(); if( debug ) { System.out.println(html_img); FileUtility.StringToFile(String.format("D:/aa_%03d.html",i++),html_img); } int begin = html_img.indexOf("var toon_img = '"); int end = html_img.indexOf("';",begin); String data = html_img.substring(begin + 16, end); String img_list = new String(Encoding.decodeBase64(data)); Document doc_imgs = Jsoup.parse(img_list); Elements imgs = doc_imgs.select("img"); System.out.println(doc_img.title()); File subdir = new File(dir.getPath(),doc_img.title().replaceAll("[?/:]","_")); subdir.mkdirs(); int k = 1; for( Element img : imgs ) { String img_url = img.attr("src"); if( filter == null || img_url.endsWith(filter) ) { if( ! img_url.startsWith("http") ) { img_url = baseUrl + img_url; } String file_name = String.format("img_%04d.jpg",k++); System.out.println( img_url + " -> " + file_name ); FileUtility.urlToFile4(img_url,subdir.getPath() + "/" + file_name); } } } catch( Exception e1 ){ } } } public static void main(String[] args) throws InterruptedException, MalformedURLException, IOException { String url = "https://tkor.lol/%EC%84%B1%EC%9D%B8%EC%9A%A9%ED%92%88%EC%A0%90-%EA%B7%B8%EB%85%80"; String iurl = "https://tkor.lol"; String dir = "D:/Temp2/"; getTookKor(url, iurl, dir); } }
package com.zdiv.jlib.app.WebToon; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.zdiv.jlib.base.Encoding; import com.zdiv.jlib.base.FileUtility; public class TookKor { public static Document getJsoupDocument(String url) throws InterruptedException { while( true ) { try { return Jsoup.connect(url).get(); } catch ( Exception e ) { e.printStackTrace(); Thread.sleep(1000); } } } public static void main(String[] args) throws InterruptedException, MalformedURLException, IOException { String comics = "https://tkor.fit/%ED%8E%B8%EC%9D%98%EC%A0%90-%EC%83%9B%EB%B3%84%EC%9D%B4"; String baseDir = "D:/Temp2/"; String baseUrl = "https://tkor.fit"; Document doc_toc = getJsoupDocument(comics); String html = doc_toc.html(); //String text = doc_toc.text(); System.out.println(html); FileUtility.StringToFile("D:/aa.html",html); Element table = doc_toc.select("table[class=web_list]").first(); Elements list = table.select("td[class=episode__index]"); File dir = new File(baseDir,doc_toc.title()); dir.mkdirs(); int i = 1; for( Element e : list ) { //if( i++ < 96 ) continue; String url = baseUrl + e.attr("data-role"); System.out.println(url); Document doc_img = getJsoupDocument(url); String html_img = doc_img.html(); System.out.println(html_img); FileUtility.StringToFile(String.format("D:/aa_%03d.html",i++),html_img); int begin = html_img.indexOf("var toon_img = '"); int end = html_img.indexOf("';",begin); String data = html_img.substring(begin + 16, end); System.out.println(data); String img_list = new String(Encoding.decodeBase64(data)); System.out.println(img_list); Document doc_imgs = Jsoup.parse(img_list); Elements imgs = doc_imgs.select("img"); System.out.println(doc_img.title()); File subdir = new File(dir.getPath(),doc_img.title().replaceAll("[?/:]","")); subdir.mkdirs(); int k = 1; for( Element img : imgs ) { String img_url = img.attr("src"); if( ! img_url.startsWith("http") ) { img_url = baseUrl + img_url; } String file_name = String.format("img_%04d.jpg",k++); System.out.println( img_url ); FileUtility.urlToFile4(img_url,subdir.getPath() + "/" + file_name); } } } }
댓글 없음:
댓글 쓰기