1. Wolf.com example
import os import sys sys.path.append(r"d:\Lib\jar\jsoup-1.12.1.jar") from java.io import * from java.net import * from java.util import * from java.lang import * from java.nio.file import * from org.jsoup import * from org.jsoup.nodes import * from org.jsoup.select import * debug = False filter = None #"jpg" def decodeBase64(data): return java.util.Base64.getDecoder().decode(data); def urlToFile(urlStr, fileName, referer): url = URL(urlStr) hc = url.openConnection() hc.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36") hc.setRequestProperty("Referer", referer); status = hc.getResponseCode(); while status != HttpURLConnection.HTTP_OK: # and status != HttpURLConnection.HTTP_NOT_FOUND: if status == HttpURLConnection.HTTP_MOVED_TEMP or status == HttpURLConnection.HTTP_MOVED_PERM or status == HttpURLConnection.HTTP_SEE_OTHER: newUrl = hc.getHeaderField("Location") hc = URL(newUrl).openConnection() hc.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"); status = hc.getResponseCode(); Files.copy(hc.getInputStream(), Paths.get(fileName), StandardCopyOption.REPLACE_EXISTING) def getJsoupDocument(url): while True: try: return Jsoup.connect(url).get() except: Thread.sleep(1000) def getWolfCom(comicsUrl, baseUrl, baseDir): doc_toc = getJsoupDocument(comicsUrl) if debug: print(doc_toc.html()) #doc_toc.text() list = doc_toc.select("div.box > div.group.left-box > div.webtoon-bbs-list.bbs-list > ul > li") if debug: print(list.html()) dir = os.path.join(baseDir,doc_toc.title().replace(":","_")) if not os.path.isdir(dir): os.mkdir(dir) #dir.mkdirs() for e in list: #if( i++ < 38 ) continue; try: url = baseUrl + e.select("a").first().attr("href") if debug: print(url) doc_img = getJsoupDocument(url) imgs = doc_img.select("section.webtoon-body > div.group.image-view > img") print(doc_img.title()) subdir = os.path.join(dir,doc_img.title().replace(":","_")) if not os.path.isdir(subdir): os.mkdir(subdir) #subdir.mkdirs() k = 1; for img in imgs: img_url = img.attr("src") if filter == None or img_url.endswith(filter): if not img_url.startswith("http"): img_url = baseUrl + img_url; file_name = "img_%04d.jpg" % k k = k + 1 print( img_url + " -> " + file_name ) urlToFile(img_url, os.path.join(subdir, file_name), comicsUrl) except: pass if __name__ == "__main__": url = "https://wfwf133.com/list?toon=1052&title=%BA%AE%BF%A1%B3%A2%C0%CE%BF%A9%C0%DA" iurl = "https://wfwf133.com" dir = "D:/Temp2/" getWolfCom(url, iurl, dir)