1. WolfCom example
import os import clr clr.AddReferenceToFileAndPath("NSoup") import NSoup import System from System.IO import * from System.Net import * debug = False image_ext = None if __name__ == "__main__": url = "https://wfwf104.com/list?toon=1229" baseUrl = "https://wfwf104.com" baseDir = r"D:\Temp3" doc = NSoup.NSoupClient.Connect(url).Get() ''' with open("a.html", "w") as f: f.write(document.Html()) ''' elems = doc.Select("div.box > div.group.left-box > div.webtoon-bbs-list.bbs-list > ul > li"); if debug: print(elems.Html()); print(doc.Title) new_dir = os.path.join(baseDir, doc.Title.replace(":","_")) if not os.path.isdir(new_dir): os.mkdir(new_dir) print(new_dir) for e in elems: url = e.Select("a").First.Attr("href") if not url: continue url = baseUrl + url doc = NSoup.NSoupClient.Connect(url).Get() imgs = doc.Select("section.webtoon-body > div.group.image-view > img"); print(doc.Title) sub_dir = os.path.join(new_dir, doc.Title.replace(":","_")) if not os.path.isdir(sub_dir): os.mkdir(sub_dir) k = 1; for img in imgs: img_url = img.Attr('src') if not img_url: continue if image_ext == None or img_url.endswith(image_ext): if( not img_url.startswith("http") ): img_url = baseUrl + img_url file_name = "img_%04d.jpg" % k WebClient().DownloadFile(img_url, os.path.join( sub_dir, file_name)) print( img_url + " -> " + file_name ) k = k + 1
댓글 없음:
댓글 쓰기