Products
PDFKit.NET 4.0
Created
5/8/2012
Tags
Content extraction

This code sample shows how to use Page.CreateShapes to determine if a PDF document has searchable text; this means that the text is saved in the PDF as glyphs and not, for example, as an image.

C# code sample bool searchable = false; foreach (Page page in document.Pages) { ShapeCollection shapes = page.CreateShapes(); searchable = hasText(shapes); if (searchable) break; } Console.WriteLine("\"{0}\" {1} text.", filename, searchable ? "has" : "does not have"); }

}

static bool hasText(ShapeCollection shapes) { foreach (Shape shape in shapes) { if (shape is TextShape) return true;

    if (shape is ShapeCollection)
    {
        ShapeCollection innerShapes = shape as ShapeCollection;
        if (hasText(innerShapes))
            return true;
    }
}
return false;

}

]]>

VB.NET code sample Dim searchable As Boolean = False For Each page As Page In document.Pages Dim shapes As ShapeCollection = page.CreateShapes() searchable = hasText(shapes) If searchable Then Exit For End If Next Console.WriteLine("""{0}"" {1} text.", filename, If(searchable, "has", "does not have")) End Using

End Sub

Private Function hasText(shapes As ShapeCollection) As Boolean For Each shape As Shape In shapes If TypeOf shape Is TextShape Then Return True End If

    If TypeOf shape Is ShapeCollection Then
        Dim innerShapes As ShapeCollection = TryCast(shape, ShapeCollection)
        If hasText(innerShapes) Then
            Return True
        End If
    End If
Next
Return False

End Function ]]>