Determine if a PDF is searchable

  • Reference KB000168
  • Type Code sample
  • Product PDFKit.NET
  • Category Content extraction
  • Created 5/8/2012

This code sample shows how to use Page.CreateShapes to determine if a PDF document has searchable text.


static void Main(string[] args)
{
  string fileName = "report.pdf";
  using (FileStream file = new FileStream(
    string.Format(@"..\..\{0}", fileName), 
    FileMode.Open, FileAccess.Read))
  {
    Document document = new Document(file);

    bool searchable = false;
    foreach (Page page in document.Pages)
    {
      ShapeCollection shapes = page.CreateShapes();
      searchable = hasText(shapes);
      if (searchable) break;
    }
    Console.WriteLine("\"{0}\" {1} text.", fileName, searchable ? "has" : "does not have");
  }
}

static bool hasText(ShapeCollection shapes)
{
  foreach (Shape shape in shapes)
  {
    TextShape text = shape as TextShape;
    if (null != text) return true;

    ShapeCollection innerShapes = shape as ShapeCollection;
    if (null != innerShapes)
    {
      if (hasText(innerShapes)) return true;
    }
  }
  return false;
}
 1 static void Main(string[] args)
 2 {
 3   string fileName = "report.pdf";
 4   using (FileStream file = new FileStream(
 5     string.Format(@"..\..\{0}", fileName), 
 6     FileMode.Open, FileAccess.Read))
 7   {
 8     Document document = new Document(file);
 9 
10     bool searchable = false;
11     foreach (Page page in document.Pages)
12     {
13       ShapeCollection shapes = page.CreateShapes();
14       searchable = hasText(shapes);
15       if (searchable) break;
16     }
17     Console.WriteLine("\"{0}\" {1} text.", fileName, searchable ? "has" : "does not have");
18   }
19 }
20 
21 static bool hasText(ShapeCollection shapes)
22 {
23   foreach (Shape shape in shapes)
24   {
25     TextShape text = shape as TextShape;
26     if (null != text) return true;
27 
28     ShapeCollection innerShapes = shape as ShapeCollection;
29     if (null != innerShapes)
30     {
31       if (hasText(innerShapes)) return true;
32     }
33   }
34   return false;
35 }