Determine if a PDF only contains images

Content extraction, Convert PDF, Images, Shapes, Manipulate PDF
10/14/2011

Downloads

The following sample code determines whether a PDF document only contains images (and not other shapes such as text fragments).

C# code sample

1 static void Main(string[] args) 2 { 3 string path = @"..\..\input.pdf"; 4 System.Console.WriteLine("File " + ((OnlyImages(path) ? " contains " : " does not contain ")) + "only images"); 5 } 6 7 public static bool OnlyImages(string path) 8 { 9 using (FileStream file = new FileStream(path, FileMode.Open, FileAccess.Read)) 10 { 11 Document document = new Document(file); 12 foreach (Page page in document.Pages) 13 { 14 ShapeCollection shapes = page.CreateShapes(); 15 if (!OnlyImages(shapes)) return false; 16 } 17 } 18 return true; 19 } 20 21 public static bool OnlyImages(ShapeCollection shapes) 22 { 23 foreach (Shape shape in shapes) 24 { 25 if (shape is ImageShape) continue; 26 if (shape is ShapeCollection) // recurse 27 { 28 if (OnlyImages(shape as ShapeCollection)) continue; 29 } 30 return false; 31 } 32 return true; 33 }

VB.NET code sample

1 Sub Main() 2 Dim path As String = "..\..\input.pdf" 3 System.Console.WriteLine("File " + ((If(OnlyImages(path), " contains ", " does not contain "))) + "only images") 4 End Sub 5 6 Public Function OnlyImages(path As String) As Boolean 7 Using file As New FileStream(path, FileMode.Open, FileAccess.Read) 8 Dim document As New Document(file) 9 For Each page As Page In document.Pages 10 Dim shapes As ShapeCollection = page.CreateShapes() 11 If Not OnlyImages(shapes) Then 12 Return False 13 End If 14 Next 15 End Using 16 Return True 17 End Function 18 19 Public Function OnlyImages(shapes As ShapeCollection) As Boolean 20 For Each shape As Shape In shapes 21 If TypeOf shape Is ImageShape Then 22 Continue For 23 End If 24 If TypeOf shape Is ShapeCollection Then 25 ' recurse 26 If OnlyImages(TryCast(shape, ShapeCollection)) Then 27 Continue For 28 End If 29 End If 30 Return False 31 Next 32 Return True 33 End Function