Extract images from PDF

Content extraction, Generate PDF, Images, Shapes
4/15/2011

Downloads

This shows how to extract an image from pdf.

Extract images from an existing PDF document

This code sample illustrates how to iterate through existing content in a PDF document and to save each images found on every page as a new Image file.

static void Main(String[] args)
{
   using (FileStream fileIn = new FileStream("PackingLightBrochure.pdf", FileMode.Open, FileAccess.Read))
   {
      // open pdf document and cycle trhough all pages
      Document document = new Document(fileIn);
      int i = 0;

      foreach (Page page in document.Pages)
      {
         ShapeCollection shapes = page.CreateShapes();
         // go through all images
         saveImageShapes(shapes, i);
         i++;
      }
   }
}

static void saveImageShapes(ShapeCollection shapes, int i)
{
	foreach (Shape shape in shapes)
	{
		ImageShape imageShape = shape as ImageShape;

		if (imageShape != null)//if current shape is an imageshape, save it
		{
			System.Drawing.Bitmap bitmap = imageShape.CreateBitmap();
			bitmap.Save(string.Format(@"..\..\Image_{0}.png", i));
		}
		else
		{
			ShapeCollection shapeCollection = shape as ShapeCollection;
			//if current shape is a ShapeCollection, recurse
			if (shapeCollection != null)
			{
				saveImageShapes(shapeCollection, i++);
			}
		}
	}
}
Sub Main()

    Using fileIn As New FileStream("..\..\..\inputdocuments/PackingLightBrochure.pdf", FileMode.Open, FileAccess.Read)
        'open pdf document and cycle trhough all pages
        Dim document As New Document(fileIn)
        Dim i As Integer = 0

        For Each page As Page In document.Pages
            Dim shapes As ShapeCollection = page.CreateShapes()
            'go through all images
            saveImageShapes(shapes, i)
            i += 1
        Next
    End Using
End Sub

Private Sub saveImageShapes(shapes As ShapeCollection, i As Integer)
    For Each shape As Shape In shapes
        Dim imageShape As ImageShape = TryCast(shape, ImageShape)

        If imageShape IsNot Nothing Then
            'if current shape is an imageshape, save it
            Dim bitmap As Drawing.Bitmap = imageShape.CreateBitmap()
            bitmap.Save(String.Format("..\..\Image_{0}.png", i))
        Else
            Dim shapeCollection As ShapeCollection = TryCast(shape, ShapeCollection)
            'if current shape is a ShapeCollection, recurse
            If shapeCollection IsNot Nothing Then
                saveImageShapes(shapeCollection, System.Math.Max(System.Threading.Interlocked.Increment(i), i - 1))
            End If
        End If
    Next
End Sub