2022년 10월 20일 목요일

[python] Directory / Zip to PDF

from fpdf import FPDF
from PIL import Image
import zipfile
import sys
import os
import io

a4_w = 595.28
a4_h = 841.89
max_img_h = a4_h * 4
max_page_h = a4_h * 12

class Pdf(FPDF):
    def __init__(self,orientation='P', unit='pt', pagesize='A4'): #A4 (595.28,841.89)
        super(Pdf,self).__init__(orientation=orientation, unit=unit, format=pagesize)
        self.set_margins(0,0,0)
        self.set_default_font()
    def set_info(self,title=None,subject=None,author=None,creator=None):
        if title: self.set_title(title)
        if subject: self.set_subject(subject)
        if author: self.set_author(author)
        if creator: self.set_creator(creator)
    def set_default_font(self): self.set_font("Courier", "", 12)
    def set_hangul_font(self):
        self.add_font('malgun', '', 'malgun.ttf', uni=True);
        self.set_font('malgun', '', 12);
    def header(self,title=None,logo=None): pass
    def footer(self): pass
    def page_wdith(self): return self.w
    def page_height(self): return self.h
    def save(self,filename): self.output(name=filename,dest='F') 
    def save_tostring(self): return self.output(dest='S') 
    def add_outline(self,text): self.start_section(text)
    def print_text(self,text,align='L',style="",fontsize=12):
        self.cell(self.get_string_width(text), 
              fontsize, txt=text, ln=0,
              align=align, border=0, fill=False)
    def print_textln(self,text="",style="",fontsize=12):
        self.print_text(text,style,fontsize); self.ln(20)
    def print_multiline_text(self,text,border=0,align="J",fill=False):
        #border(0,1,L,T,R,B) align(L,C,R,J)
        width = self.w - self.l_margin - self.r_margin
        height = 20
        self.multi_cell(width, height, text, border, align, fill)

def key_str_num_2(path):
    import re
    strList = re.split('(\d+)',path)
    strList = [x for x in strList if len(x) > 0]
    newStr = []
    for s in strList:
        try: newStr += "%04d" % int(s)
        except: newStr += s          
    return newStr
    
def key_str_num(strItem):
    import re
    strList = re.split('(\d+)',strItem)
    strList = [x for x in strList if len(x) > 0]
    newList = []
    for s in strList:
        try: newList.append(int(s))
        except: newList.append(s)            
    return newList
    
def image_split_h(img,max_h):
    h = 0
    images = []
    print('image_split_h',img.width,img.height)
    while h < img.height:
        if img.height - h < max_h:
            max_h = img.height - h
        print(h,h+max_h)
        images.append( img.crop( (0,h,img.width,h+max_h) ) )
        h += max_h
    return images
   
def images_info(path,files):
    info = []
    for file in files:
        file_path = os.path.join( path, file )
        img = Image.open(file_path)
        w = a4_w
        h = (w * img.height / img.width) 
        if h > max_img_h:
            imgs = image_split_h(img,max_img_h)
            for im in imgs:
                w = a4_w
                h = (w * im.height / im.width) 
                info.append({'width':w, 'height':h, 'image':im, 'filename':file})
        else:
            info.append({'width':w, 'height':h, 'image':img, 'filename':file})
    return info
    
def page_info(infos):
    pages = []
    page = []
    h = 0
    for info in infos:
        if h + info['height'] > max_page_h:
            pages.append({'height':h, 'images':page})
            page = []
            h = 0
        page.append(info)
        h += info['height']
    if h > 0: pages.append({'height': h, 'images':page})
    return pages
    
def dir2pdf_dynamic(folder):
    error = []
    pdf = Pdf()
    #pdf.add_page(format=(a4_w,a4_h))
    subdirs = os.listdir(folder)
    subdirs.sort()
    for subdir in subdirs:
        path = os.path.join( folder, subdir )
        if not os.path.isdir(path):
            continue
        print(path)
        files = os.listdir( path )
        files.sort()
        infos = images_info(path, files)
        pages = page_info(infos)

        for index, page in enumerate(pages):
            #print(page)
            print(len(page['images']),page['height'])
            pdf.add_page(format=(a4_w,page['height']+100))
            if index == 0: pdf.add_outline(subdir)
            for info in page['images']:
                try: 
                    img_byte_arr = io.BytesIO()
                    info['image'].save(img_byte_arr, format='jpeg')
                    pdf.image( img_byte_arr, w = a4_w)
                except Exception as e: 
                    error.append( (subdir, info['filename'], e) )
                    print(e)
    pdf.save(folder + ".pdf")
    for e in error: print(e)

def dir2pdf(folder, width=None):
    def get_listdir(folder):
        file_list = []
        files = os.listdir(folder)
        for file in files:
            path = os.path.join( folder, file )
            if os.path.isdir(path):
                file_list += get_listdir(path)
            else:
                file_list.append(path)
        return file_list
    def read_file(path):
        with open(path,"rb") as f:
            return f.read()
            
    error = []
    pdf = Pdf()
    pdf.add_page(format=(a4_w,a4_h*10))
    namelist = get_listdir(folder)
    namelist = sorted(namelist, key=key_str_num_2)
    prev_dir = None
    for name in namelist:
        if os.path.isdir(name): continue
        print(name)
        curr_dir = os.path.dirname(name)
        try: 
            image = Image.open(name)
            if width and image.width > width:
                image = image.resize((width,int(width*image.height/image.width)), Image.ANTIALIAS)
        except: continue
        if image.height > max_img_h:
            images =  image_split_h(image,max_img_h)
        else:
            images = [ image ]
        try:
            if prev_dir != curr_dir:
                pdf.add_outline( os.path.basename(curr_dir) )
                prev_dir = curr_dir
            for image in images:
                image_bytes = io.BytesIO()
                image.save(image_bytes, format='jpeg', quality=70)                
                pdf.image(image_bytes,w = a4_w)
        except Exception as e:
            error.append((name,e))
    pdf.save( os.path.join( os.path.dirname(folder), os.path.basename(folder) + '.pdf') )
    for e in error: print(e)
    
    
def zip2pdf(filename, width=None):
    def is_dir(filename):
        return filename.endswith('/') or filename.endswith('\\')
    def is_image(filedata):
        return filename.endswith('.png') or filename.endswith('.jpg') or filename.endswith('.jpeg')
    error = []
    pdf = Pdf()
    pdf.add_page(format=(a4_w,a4_h*10))
    zf = zipfile.ZipFile(filename,mode="r")
    namelist = zf.namelist() #filename
    namelist = sorted(namelist, key=key_str_num_2)
    #infolist = zf.infolist() #filename,size(org,cmp),attr,method
    prev_dir = None
    for name in namelist:
        if is_dir(name): continue
        if not name.endswith(".jpeg") and not name.endswith('.jpg') and not name.endswith('.png'):
            continue
        curr_dir = os.path.dirname(name)
        curr_dir = curr_dir.encode('cp437').decode('euc-kr','ignore')
        image_bytes = zf.read(name)
        try: 
            image = Image.open(io.BytesIO(image_bytes))
            if width and image.width > width:
                image = image.resize((width,int(width*image.height/image.width)), Image.ANTIALIAS)
        except Exception as e: print(len(image_bytes),e); continue
        print(name.encode('cp437').decode('euc-kr','ignore'))
        if image.height > max_img_h:
            try: images =  image_split_h(image,max_img_h)
            except Exception as e: print(e); continue
        else:
            images = [ image ]
        try:
            if prev_dir != curr_dir:
                pdf.add_outline( os.path.basename(curr_dir) )
                prev_dir = curr_dir
            for image in images:
                image_bytes = io.BytesIO()
                image.save(image_bytes, format='jpeg', quality=70)
                pdf.image(image_bytes,w = a4_w)
        except Exception as e:
            error.append((name,e))
    pdf.save(filename + '.pdf')
    for e in error: print(e)

def file2pdf(filename,width=None):
    if os.path.isdir(filename):
        dir2pdf(filename,width)
    else:
        if zipfile.is_zipfile(filename):
            zip2pdf(filename,width)
        else:
            print( filename, 'is neigher direcotry nor zipfile')
            
def show_gui():
    def file_drop(files):
        global file_list
        file_list = []
        table.DeleteAllItems()
        for file in files:
            table.Add( (os.path.basename(file), "Ready") )
            file_list.append(file)  
    def make_handler(ev):
        import threading
        def thread_handler():
            for index, file in enumerate(file_list):
                w.RunLater( lambda: table.SetItem( index, 1, "Proc" ) )
                file2pdf(file)
                w.RunLater( lambda: table.SetItem( index, 1, "OK" ) )
        thread = threading.Thread(target=thread_handler,args=())
        thread.daemon = False
        thread.start()
    def get_panel(parent):
        global table
        panel = w.VBox(parent)
        table = w.Table(panel, ( ('File', 500, -1), ('Status', 70, 0) ), drop=file_drop)
        button = w.Button(panel, "Make", make_handler )
        panel.Add( table, expand=True, fill=True )
        panel.Add( button, expand=False, fill=False, right=True )
        return panel
    import wxez as w
    win = w.WxWin("Image to PDF", 600, 400)
    panel = get_panel(win)
    win.Add(panel, expand=True, fill=True)
    win.Run()
    
if __name__ == "__main__":
    if len(sys.argv) < 2:
        show_gui()
    elif len(sys.argv) == 2:
        file2pdf(sys.argv[1])
    elif len(sys.argv) == 3:
        file2pdf(sys.argv[1],int(sys.argv[2]))

댓글 없음:

댓글 쓰기