2018년 8월 30일 목요일

[Java] PDF Image Extract


1. PDF Image Extract with Apache PDFBox 2.0.x

1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import java.awt.image.RenderedImage;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import javax.imageio.ImageIO;

import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;

public class PdfTools {

    PDDocument document;
    
    public PdfTools() {
    }
    
    public PdfTools(String fileName) throws InvalidPasswordException, IOException {
        load(fileName);
    }
    
    public void load(String fileName) throws InvalidPasswordException, IOException {
        document = PDDocument.load(new File(fileName));
    }
    
    public void extractImagesToPng(File folder) throws IOException {
        if( folder.exists() == false ) {
            folder.mkdirs();
        }
        List<RenderedImage> images = getAllImages();
        for( int i = 0; i < images.size(); i++ ) {
            ImageIO.write( images.get(i), "png", 
                new File(folder, String.format( "image_%04d.png", i)));
        }
    }
    
    public void extractImagesToJpeg(File folder) throws IOException {
        if( folder.exists() == false ) {
            folder.mkdirs();
        }
        List<RenderedImage> images = getAllImages();
        for( int i = 0; i < images.size(); i++ ) {
            ImageIO.write( images.get(i), "jpeg", 
                new File(folder, String.format( "image_%04d.jpg", i)));
        }
    }
    
    public List<RenderedImage> getAllImages() throws IOException {
        List<RenderedImage> images = new ArrayList<>();
        for( PDPage page : document.getPages() ) {
            images.addAll(getImagesFromResources(page.getResources()));
        }
        return images;
    }
    
    private List<RenderedImage> getImagesFromResources(PDResources resources) throws IOException {
        List<RenderedImage> images = new ArrayList<>();
        for( COSName name : resources.getXObjectNames() ) {
            PDXObject obj = resources.getXObject(name);
            if (obj instanceof PDFormXObject) {
                images.addAll(getImagesFromResources(((PDFormXObject) obj).getResources()));
            } else if (obj instanceof PDImageXObject) {
                images.add(((PDImageXObject) obj).getImage());
            }
        }
        return images;
    }

    public static void main(String[] args) throws InvalidPasswordException, IOException {
        PdfTools pdfTools = new PdfTools("D:/Ebook/python.pdf");
        pdfTools.extractImagesToPng(new File("D:/Temp/png"));
        pdfTools.extractImagesToJpeg(new File("D:/Temp/jpg"));
    }
}




댓글 없음:

댓글 쓰기