ToonKor Season 2 All
import bs4, codecs
import requests
import base64
import os
import io
quit_flag = False
import signal
import sys
def signal_handler(sig, frame):
quit_flag = True
print('You pressed Ctrl+C!', quit_flag)
#sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
#print('Press Ctrl+C')
#signal.pause()
target_folder = r"D:/Temp6"
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
image_ext = None
request_headers = {
'User-Agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'),
}
def safeFileName(filename):
filename = filename.replace(":","_")
filename = filename.replace("?","_")
filename = filename.replace("/","_")
filename = filename.replace("*","_")
filename = filename.replace("<","_")
filename = filename.replace(">","_")
filename = filename.replace("\t","_")
return filename.strip()
def getFile(url):
with codecs.open(url,'r', encoding='utf8') as f:
html = f.read()
return bs4.BeautifulSoup(html, 'html.parser')
def getUrl(url, headers={}, params=()):
resp = requests.get(url, verify=False, headers=headers, params=params)
#resp.headers
#html = resp.content.decode('utf8')
html = resp.text
return bs4.BeautifulSoup(html, 'html.parser')
def getUrlHtml(url, headers={}, params=()):
resp = requests.get(url, verify=False, headers=headers, params=params)
return bs4.BeautifulSoup(resp.text, 'html.parser'), resp.content.decode('utf8')
def urlToFile(url, file_name):
resp = requests.get(url, verify=False, headers=request_headers, params=())
with open(file_name, "wb") as f:
f.write(resp.content)
def extractTag(bs,tag):
[s.extract() for s in bs(tag)]
def getToonKor( comicsUrl, baseUrl, baseDir):
while True:
try:
doc = getUrl(comicsUrl)
table = doc.select("table.bt_view2")[0]
elist = table.select("td.bt_title")
title = elist[0].text
break
except:
print( comicsUrl, "-> retry")
if quit_flag: return
continue
table = doc.select("table.web_list")[0]
elist = table.select("td.content__title")
new_dir = os.path.join(baseDir, safeFileName(title))
if not os.path.isdir(new_dir): os.mkdir(new_dir)
else: return
count = 0
for e in elist:
count += 1
url = baseUrl + e['data-role']
title = e['alt']
while True:
try:
bs_img, html_img = getUrlHtml(url, request_headers)
begin = html_img.index("var tnimg = '")
break
except:
print( url, "-> retry")
if quit_flag: return
continue
end = html_img.index("';",begin)
data = html_img[begin + 13: end]
img_list = base64.b64decode(data.encode("UTF-8")).decode("UTF-8")
doc_imgs = bs4.BeautifulSoup(img_list, 'html.parser')
imgs = doc_imgs.select("img")
#sub_dir = os.path.join(new_dir, title.replace(":","_"))
#if not os.path.isdir(sub_dir): os.mkdir(sub_dir)
html_file = os.path.join(new_dir, safeFileName(title) + ".html")
if os.path.isfile(html_file): print(html_file, "-> exists"); continue
print( len(elist), count, html_file)
f = open( html_file, "w" )
f.write('<meta name="referrer" content="no-referrer" /><br>\n')
k = 1;
for img in imgs:
img_url = img.get('src')
if not img_url: continue
if image_ext == None or img_url.endswith(image_ext):
if( not img_url.startswith("http") ):
img_url = baseUrl + img_url
#file_name = "img_%04d.jpg" % k
#urlToFile( img_url, os.path.join( sub_dir, file_name) )
#print( img_url + " -> " + file_name )
#print( img_url )
f.write('<img src="' + img_url + '" /><br>\n')
k = k + 1
f.close()
def saveToonKorComics():
urls = [
"https://tkr035.com/webtoon/1061",
]
iurl = "https://tkr035.com"
bdir = "D:/Temp2/"
for url in urls:
getToonKor(url, iurl, bdir)
if quit_flag: break
print("END")
def getToonKorList(list_url,start=0):
doc = getUrl(list_url)
lists = doc.select("div.section-item-inner")
#print(lists)
i = 0
for l in lists:
i += 1
if i < start: continue
comics = l.select("a")[0]
print(i, len(lists), comics['alt'], comics['href'])
getToonKor(comics['href'], "https://tkr035.com", target_folder)
if quit_flag: break
def get_finished_webtoons():
getToonKorList("https://tkr035.com/wt/%EC%99%84%EA%B2%B0", 36)
getToonKorList("https://tkr035.com/wt/%EC%99%84%EA%B2%B0?gbun=&wpage=&page=2", 0)
getToonKorList("https://tkr035.com/wt/%EC%99%84%EA%B2%B0?gbun=&wpage=&page=3", 0)
getToonKorList("https://tkr035.com/wt/%EC%99%84%EA%B2%B0?gbun=&wpage=&page=4", 130)
getToonKorList("https://tkr035.com/wt/%EC%99%84%EA%B2%B0?gbun=&wpage=&page=5", 0)
getToonKorList("https://tkr035.com/wt/%EC%99%84%EA%B2%B0?gbun=&wpage=&page=6", 195)
getToonKorList("https://tkr035.com/wt/%EC%99%84%EA%B2%B0?gbun=&wpage=&page=7", 0)
getToonKorList("https://tkr035.com/wt/%EC%99%84%EA%B2%B0?gbun=&wpage=&page=8", 0)
getToonKorList("https://tkr035.com/wt/%EC%99%84%EA%B2%B0?gbun=&wpage=&page=9", 0)
getToonKorList("https://tkr035.com/wt/%EC%99%84%EA%B2%B0?gbun=&wpage=&page=10", 0)
getToonKorList("https://tkr035.com/wt/%EC%99%84%EA%B2%B0?gbun=&wpage=&page=11", 0)
getToonKorList("https://tkr035.com/wt/%EC%99%84%EA%B2%B0?gbun=&wpage=&page=12", 0)
getToonKorList("https://tkr035.com/wt/%EC%99%84%EA%B2%B0?gbun=&wpage=&page=13", 0)
def get_continue_webtoons():
getToonKorList("https://tkr035.com/wt/%EC%B5%9C%EC%8B%A0/0/all/%EC%9D%B8%EA%B8%B0//%EC%A0%84%EC%B2%B4", 0)
def get_week_webtoons(week,page,start=0):
global target_folder
target_folder = r"D:/Temp7/" + str(week)
url = "https://tkr035.com/wt/%EC%97%B0%EC%9E%AC%EC%A4%91/"
url += str(week)
url += "/all/%EC%9D%B8%EA%B8%B0/%EC%A0%84%EC%B2%B4?gbun=&wpage=&page="
url += str(page)
getToonKorList(url,start)
def get_week_webtoons_all():
#for i in range(3): get_week_webtoons(1,i+1)
#get_week_webtoons(1,2,150)
#get_week_webtoons(1,3)
for i in range(3): get_week_webtoons(2,i+1)
for i in range(3): get_week_webtoons(3,i+1)
for i in range(3): get_week_webtoons(4,i+1)
for i in range(3): get_week_webtoons(5,i+1)
for i in range(3): get_week_webtoons(6,i+1)
for i in range(3): get_week_webtoons(7,i+1)
for i in range(1): get_week_webtoons(8,i+1)
'''
global target_folder
target_folder = r"D:/Temp7/1"
getToonKorList("https://tkr035.com/wt/%EC%97%B0%EC%9E%AC%EC%A4%91/1/all/%EC%9D%B8%EA%B8%B0/%EC%A0%84%EC%B2%B4")
getToonKorList("https://tkr035.com/wt/%EC%97%B0%EC%9E%AC%EC%A4%91/1/all/%EC%9D%B8%EA%B8%B0/%EC%A0%84%EC%B2%B4?gbun=&wpage=&page=2")
getToonKorList("https://tkr035.com/wt/%EC%97%B0%EC%9E%AC%EC%A4%91/1/all/%EC%9D%B8%EA%B8%B0/%EC%A0%84%EC%B2%B4?gbun=&wpage=&page=3")
'''
if __name__ == "__main__":
#get_continue_webtoons()
get_week_webtoons_all()