Extract content based on bookmarks

  • Reference KB000161
  • Type Code sample
  • Product PDFKit.NET
  • Categories Content extraction, Manipulate PDF
  • Created 3/5/2012

This article shows how to split document into several parts using bookmarks as splitting points.

We will go through the document and calculate the range of each bookmark, assuming that each bookmark points to another page than the previous bookmark.

C#


private static void ProcessDocument()
        {
            string inputFilePath = "myfile.pdf";

            Document document = new Document();

            using (FileStream fs = new FileStream(inputFilePath, FileMode.Open))
            {
                document = new Document(fs);


                BookmarkCollection bookmarks = document.Bookmarks[0].Bookmarks;
                for (int index = 0; index < bookmarks.Count; index++)
                {
                    // current bookmark
                    Bookmark bookmark = bookmarks[index];

                    // determine starting page of the current bookmark
                    int startIndex = GetDestinationPageIndex(bookmark);

                    // determine starting page of next part
                    int endIndex = -1;

                    if (index == bookmarks.Count - 1)
                    {
                        // last bookmark - append all remaining pages
                        endIndex = document.Pages.Count;
                    }
                    else
                    {
                        // not the last bookmark - append up to the next bookmark
                        endIndex = GetDestinationPageIndex(bookmarks[index + 1]);
                    }

                    // create a new part
                    if (-1 != endIndex && null != bookmark)
                    {
                        Document part = new Document();
                        for (int pageIndex = startIndex; pageIndex < endIndex; pageIndex++)
                        {
                            part.Pages.Add(document.Pages[pageIndex].Clone());
                        }

                        using (
                            FileStream fileOut = new FileStream(
                                bookmark.Title + ".pdf", FileMode.Create, FileAccess.Write))
                        {
                            part.Write(fileOut);
                        }
                    }
                }
            }
        }

        private static int GetDestinationPageIndex(Bookmark bookmark)
        {
            int destinationPageIndex = -1;

            GoToAction action = bookmark.Actions[0] as GoToAction;
            if (null != action)
            {
                InternalDestination destination = action.Destination as InternalDestination;

                if (null != destination)
                {
                    destinationPageIndex = destination.Page.Index;
                }
            }

            return destinationPageIndex;
        }
 1 private static void ProcessDocument()
 2         {
 3             string inputFilePath = "myfile.pdf";
 4 
 5             Document document = new Document();
 6 
 7             using (FileStream fs = new FileStream(inputFilePath, FileMode.Open))
 8             {
 9                 document = new Document(fs);
10 
11 
12                 BookmarkCollection bookmarks = document.Bookmarks[0].Bookmarks;
13                 for (int index = 0; index < bookmarks.Count; index++)
14                 {
15                     // current bookmark
16                     Bookmark bookmark = bookmarks[index];
17 
18                     // determine starting page of the current bookmark
19                     int startIndex = GetDestinationPageIndex(bookmark);
20 
21                     // determine starting page of next part
22                     int endIndex = -1;
23 
24                     if (index == bookmarks.Count - 1)
25                     {
26                         // last bookmark - append all remaining pages
27                         endIndex = document.Pages.Count;
28                     }
29                     else
30                     {
31                         // not the last bookmark - append up to the next bookmark
32                         endIndex = GetDestinationPageIndex(bookmarks[index + 1]);
33                     }
34 
35                     // create a new part
36                     if (-1 != endIndex && null != bookmark)
37                     {
38                         Document part = new Document();
39                         for (int pageIndex = startIndex; pageIndex < endIndex; pageIndex++)
40                         {
41                             part.Pages.Add(document.Pages[pageIndex].Clone());
42                         }
43 
44                         using (
45                             FileStream fileOut = new FileStream(
46                                 bookmark.Title + ".pdf", FileMode.Create, FileAccess.Write))
47                         {
48                             part.Write(fileOut);
49                         }
50                     }
51                 }
52             }
53         }
54 
55         private static int GetDestinationPageIndex(Bookmark bookmark)
56         {
57             int destinationPageIndex = -1;
58 
59             GoToAction action = bookmark.Actions[0] as GoToAction;
60             if (null != action)
61             {
62                 InternalDestination destination = action.Destination as InternalDestination;
63 
64                 if (null != destination)
65                 {
66                     destinationPageIndex = destination.Page.Index;
67                 }
68             }
69 
70             return destinationPageIndex;
71         }