1. PDF Image Extract with Apache PDFBox 2.0.x
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | import java.awt.image.RenderedImage; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import javax.imageio.ImageIO; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException; import org.apache.pdfbox.pdmodel.graphics.PDXObject; import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; public class PdfTools { PDDocument document; public PdfTools() { } public PdfTools(String fileName) throws InvalidPasswordException, IOException { load(fileName); } public void load(String fileName) throws InvalidPasswordException, IOException { document = PDDocument.load(new File(fileName)); } public void extractImagesToPng(File folder) throws IOException { if( folder.exists() == false ) { folder.mkdirs(); } List<RenderedImage> images = getAllImages(); for( int i = 0; i < images.size(); i++ ) { ImageIO.write( images.get(i), "png", new File(folder, String.format( "image_%04d.png", i))); } } public void extractImagesToJpeg(File folder) throws IOException { if( folder.exists() == false ) { folder.mkdirs(); } List<RenderedImage> images = getAllImages(); for( int i = 0; i < images.size(); i++ ) { ImageIO.write( images.get(i), "jpeg", new File(folder, String.format( "image_%04d.jpg", i))); } } public List<RenderedImage> getAllImages() throws IOException { List<RenderedImage> images = new ArrayList<>(); for( PDPage page : document.getPages() ) { images.addAll(getImagesFromResources(page.getResources())); } return images; } private List<RenderedImage> getImagesFromResources(PDResources resources) throws IOException { List<RenderedImage> images = new ArrayList<>(); for( COSName name : resources.getXObjectNames() ) { PDXObject obj = resources.getXObject(name); if (obj instanceof PDFormXObject) { images.addAll(getImagesFromResources(((PDFormXObject) obj).getResources())); } else if (obj instanceof PDImageXObject) { images.add(((PDImageXObject) obj).getImage()); } } return images; } public static void main(String[] args) throws InvalidPasswordException, IOException { PdfTools pdfTools = new PdfTools("D:/Ebook/python.pdf"); pdfTools.extractImagesToPng(new File("D:/Temp/png")); pdfTools.extractImagesToJpeg(new File("D:/Temp/jpg")); } } |
댓글 없음:
댓글 쓰기