2020년 6월 24일 수요일

[Java] JSoup - TookKor Example


1. ToonKor Example

package com.zdiv.jlib.app.WebToon;
 
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
 
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
 
import com.zdiv.jlib.base.Encoding;
import com.zdiv.jlib.base.FileUtility;
 
public class TookKor {
  
  final static boolean debug = true;
  final static String filter = null; //"jpg";
  
  public static Document getJsoupDocument(String url) throws InterruptedException {
    whiletrue ) {
      try {
        return Jsoup.connect(url).get();
      } catch ( Exception e ) {
        e.printStackTrace();
        Thread.sleep(1000);
      }
    }
  }
 
  public static void getTookKor(String comicsUrl, String baseUrl, String baseDir)  
      throws InterruptedException, MalformedURLException, IOException {
 
    Document doc_toc = getJsoupDocument(comicsUrl);
    if( debug ) {
      String text = doc_toc.text();
      String html = doc_toc.html();
      //System.out.println(html);
      FileUtility.StringToFile("D:/aa.html",html);
    }
    Element title_table = doc_toc.select("table[class=bt_view2]").first();
    Elements title_list = title_table.select("td[class=bt_title]"); //"td[class=episode__index]"
    String doc_title = title_list.get(0).text();
    System.out.println(doc_title);
 
    Element table = doc_toc.select("table[class=web_list]").first();
    Elements list = table.select("td[class=content__title]"); //"td[class=episode__index]"
    File dir = new File(baseDir,doc_title.replaceAll("[?/:]","_"));
    dir.mkdirs();
    
    int i = 1;
    for( Element e : list ) {
      //if( ++i < 207 ) continue;
      try {
        String url = baseUrl + e.attr("data-role");
        String img_title = e.attr("alt");
        //System.out.println(url);
        System.out.println(img_title);
        
        Document doc_img = getJsoupDocument(url);
        String html_img = doc_img.html();
        if( debug ) {
          //System.out.println(html_img);
          FileUtility.StringToFile(String.format("D:/aa_%03d.html",i++),html_img);
        }
        int begin = html_img.indexOf("var tnimg = '");
        int end = html_img.indexOf("';",begin);
        String data = html_img.substring(begin + 13, end);
        String img_list = new String(Encoding.decodeBase64(data));
 
        Document doc_imgs = Jsoup.parse(img_list);
        Elements imgs = doc_imgs.select("img");
        
        File subdir = new File(dir.getPath(),img_title.replaceAll("[?/:]","_"));
        subdir.mkdirs();
 
        int k = 1;
        for( Element img : imgs ) {
          String img_url = img.attr("src");
          if( filter == null || img_url.endsWith(filter) ) { 
            if( ! img_url.startsWith("http") ) {
              img_url = baseUrl + img_url;
            }
            String file_name = String.format("img_%04d.jpg",k++);
            System.out.println( img_url + " -> " + file_name );
            FileUtility.urlToFile5(img_url,subdir.getPath() + "/" + file_name);
          }
        }
      } catch( Exception e1 ){
        e1.printStackTrace();
      }
    }
  }
  
  public static void main(String[] args) throws InterruptedException, MalformedURLException, IOException {
    String[] url = {
      "https://tkr034.com/webtoon/505",
      //"https://tkor.mobi/%EB%AA%A8%EA%B8%B0%EB%96%BC",
    };
    String iurl = "https://tkr034.com";
    String dir = "D:/Temp3/";
    for( String u : url ) {
      getTookKor(u, iurl, dir);
      System.out.println( "END" );
    }
  }
}



package com.zdiv.jlib.app.WebToon;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.zdiv.jlib.base.Encoding;
import com.zdiv.jlib.base.FileUtility;

public class TookKor {
 
 final static boolean debug = true;
 final static String filter = null; //"jpg";
 
 public static Document getJsoupDocument(String url) throws InterruptedException {
  while( true ) {
   try {
    return Jsoup.connect(url).get();
   } catch ( Exception e ) {
    e.printStackTrace();
    Thread.sleep(1000);
   }
  }
 }

 public static void getTookKor(String comicsUrl, String baseUrl, String baseDir)  
   throws InterruptedException, MalformedURLException, IOException {

  Document doc_toc = getJsoupDocument(comicsUrl);
  if( debug ) {
   String text = doc_toc.text();
   String html = doc_toc.html();
   System.out.println(html);
   FileUtility.StringToFile("D:/aa.html",html);
  }

  Element table = doc_toc.select("table[class=web_list]").first();
  Elements list = table.select("td[class=episode__index]");

  File dir = new File(baseDir,doc_toc.title().replaceAll("[?/:]","_"));
  dir.mkdirs();
  
  System.out.println(doc_toc.title());
  
  int i = 1;
  for( Element e : list ) {
   
   //if( ++i < 97 ) continue;
   
   try {
    String url = baseUrl + e.attr("data-role");
    System.out.println(url);
    Document doc_img = getJsoupDocument(url);
    String html_img = doc_img.html();
    if( debug ) {
     System.out.println(html_img);
     FileUtility.StringToFile(String.format("D:/aa_%03d.html",i++),html_img);
    }
    int begin = html_img.indexOf("var toon_img = '");
    int end = html_img.indexOf("';",begin);
    String data = html_img.substring(begin + 16, end);
    String img_list = new String(Encoding.decodeBase64(data));
    
    Document doc_imgs = Jsoup.parse(img_list);
    Elements imgs = doc_imgs.select("img");
    
    System.out.println(doc_img.title());
    File subdir = new File(dir.getPath(),doc_img.title().replaceAll("[?/:]","_"));
    subdir.mkdirs();
    
    System.out.println(doc_img.title());
    
    int k = 1;
    for( Element img : imgs ) {
     String img_url = img.attr("src");
     if( filter == null || img_url.endsWith(filter) ) { 
      if( ! img_url.startsWith("http") ) {
       img_url = baseUrl + img_url;
      }
      String file_name = String.format("img_%04d.jpg",k++);
      System.out.println( img_url + " -> " + file_name );
      FileUtility.urlToFile5(img_url,subdir.getPath() + "/" + file_name);
     }
    }
   } catch( Exception e1 ){
   
   }
  }
 }
 
 public static void main(String[] args) throws InterruptedException, MalformedURLException, IOException {
  String[] url = {
   //"https://tkor.lol/%EB%8F%84%EB%B0%95%EB%AC%B5%EC%8B%9C%EB%A1%9D_%EC%B9%B4%EC%9D%B4%EC%A7%80", //도박묵시록 카이지
   "https://tkor.lol/%EC%A4%91%EA%B0%84%EA%B4%80%EB%A6%AC%EB%A1%9D_%ED%86%A0%EB%84%A4%EA%B0%80%EC%99%80", //토네가와
   //"https://tkor.lol/%EC%97%B4%ED%98%88%EA%B0%95%ED%98%B8", //열혈강호
   //"https://tkor.lol/%EC%9A%A9%EB%B9%84%EB%B6%88%ED%8C%A8", //용비불패
   //"https://tkor.lol/%EB%93%9C%EB%9E%98%EA%B3%A4%EB%B3%BC", //드래곤볼
   //"https://tkor.lol/%EB%93%9C%EB%9E%98%EA%B3%A4%EB%B3%BC-%EC%8A%88%ED%8D%BC", //드래곤볼 슈퍼
  };
  String iurl = "https://tkor.lol";
  String dir = "D:/Temp2/";
  for( String u : url ) {
   getTookKor(u, iurl, dir);
  }
 }
}



package com.zdiv.jlib.app.WebToon;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.zdiv.jlib.base.Encoding;
import com.zdiv.jlib.base.FileUtility;

public class TookKor {
 
 final static boolean debug = false;
 final static String filter = null; //"jpg";
 
 public static Document getJsoupDocument(String url) throws InterruptedException {
  while( true ) {
   try {
    return Jsoup.connect(url).get();
   } catch ( Exception e ) {
    e.printStackTrace();
    Thread.sleep(1000);
   }
  }
 }

 public static void getTookKor(String comicsUrl, String baseUrl, String baseDir)  
   throws InterruptedException, MalformedURLException, IOException {

  Document doc_toc = getJsoupDocument(comicsUrl);
  if( debug ) {
   String text = doc_toc.text();
   String html = doc_toc.html();
   System.out.println(html);
   FileUtility.StringToFile("D:/aa.html",html);
  }

  Element table = doc_toc.select("table[class=web_list]").first();
  Elements list = table.select("td[class=episode__index]");

  File dir = new File(baseDir,doc_toc.title().replaceAll("[?/:]","_"));
  dir.mkdirs();
  
  int i = 1;
  for( Element e : list ) {
   
   //if( i++ < 38 ) continue;
   
   try {
    String url = baseUrl + e.attr("data-role");
    System.out.println(url);
    Document doc_img = getJsoupDocument(url);
    String html_img = doc_img.html();
    if( debug ) {
     System.out.println(html_img);
     FileUtility.StringToFile(String.format("D:/aa_%03d.html",i++),html_img);
    }
    int begin = html_img.indexOf("var toon_img = '");
    int end = html_img.indexOf("';",begin);
    String data = html_img.substring(begin + 16, end);
    String img_list = new String(Encoding.decodeBase64(data));
    
    Document doc_imgs = Jsoup.parse(img_list);
    Elements imgs = doc_imgs.select("img");
    
    System.out.println(doc_img.title());
    File subdir = new File(dir.getPath(),doc_img.title().replaceAll("[?/:]","_"));
    subdir.mkdirs();
    
    int k = 1;
    for( Element img : imgs ) {
     String img_url = img.attr("src");
     if( filter == null || img_url.endsWith(filter) ) { 
      if( ! img_url.startsWith("http") ) {
       img_url = baseUrl + img_url;
      }
      String file_name = String.format("img_%04d.jpg",k++);
      System.out.println( img_url + " -> " + file_name );
      FileUtility.urlToFile4(img_url,subdir.getPath() + "/" + file_name);
     }
    }
   } catch( Exception e1 ){
   
   }
  }
 }
 
 public static void main(String[] args) throws InterruptedException, MalformedURLException, IOException {
  String url = "https://tkor.lol/%EC%84%B1%EC%9D%B8%EC%9A%A9%ED%92%88%EC%A0%90-%EA%B7%B8%EB%85%80";
  String iurl = "https://tkor.lol";
  String dir = "D:/Temp2/";
  getTookKor(url, iurl, dir);
 }
}



package com.zdiv.jlib.app.WebToon;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.zdiv.jlib.base.Encoding;
import com.zdiv.jlib.base.FileUtility;

public class TookKor {
 public static Document getJsoupDocument(String url) throws InterruptedException {
  while( true ) {
   try {
    return Jsoup.connect(url).get();
   } catch ( Exception e ) {
    e.printStackTrace();
    Thread.sleep(1000);
   }
  }
 }

 public static void main(String[] args) throws InterruptedException, MalformedURLException, IOException {

  String comics = "https://tkor.fit/%ED%8E%B8%EC%9D%98%EC%A0%90-%EC%83%9B%EB%B3%84%EC%9D%B4";
  String baseDir = "D:/Temp2/";
  String baseUrl = "https://tkor.fit"; 
  Document doc_toc = getJsoupDocument(comics);
  String html = doc_toc.html();
  //String text = doc_toc.text();
  System.out.println(html);
  FileUtility.StringToFile("D:/aa.html",html);
  
  Element table = doc_toc.select("table[class=web_list]").first();
  Elements list = table.select("td[class=episode__index]");
  
  File dir = new File(baseDir,doc_toc.title());
  dir.mkdirs();
  
  int i = 1;
  for( Element e : list ) {
   
   //if( i++ < 96 ) continue;
   
   String url = baseUrl + e.attr("data-role");
   System.out.println(url);
   Document doc_img = getJsoupDocument(url);
   String html_img = doc_img.html();
   System.out.println(html_img);
   FileUtility.StringToFile(String.format("D:/aa_%03d.html",i++),html_img);
   
   int begin = html_img.indexOf("var toon_img = '");
   int end = html_img.indexOf("';",begin);
   String data = html_img.substring(begin + 16, end);
   System.out.println(data);
   
   String img_list = new String(Encoding.decodeBase64(data));
   System.out.println(img_list);
   
   Document doc_imgs = Jsoup.parse(img_list);
   Elements imgs = doc_imgs.select("img");
   
   System.out.println(doc_img.title());
   File subdir = new File(dir.getPath(),doc_img.title().replaceAll("[?/:]",""));
   subdir.mkdirs();
   
   int k = 1;
   for( Element img : imgs ) {
    String img_url = img.attr("src");
    if( ! img_url.startsWith("http") ) {
     img_url = baseUrl + img_url;
    }
    String file_name = String.format("img_%04d.jpg",k++);
    System.out.println( img_url );
    FileUtility.urlToFile4(img_url,subdir.getPath() + "/" + file_name);
   }
  }
 }
}



댓글 없음:

댓글 쓰기