Products
PDFKit.NET 4.0
Created
3/21/2014
Tags
Content extraction

This sample demonstrates how to extract glyph boxes.

This sample creates a bitmap for each page and draws boxes for each glyph. It takes into account the orientation of the page, as well as its cropbox and mediabox so that the bitmap resembles the page as shown by a PDF viewer. The main routine here is CreateBoxesBitmap. It takes a page as an argument and returns a Bitmap with drawn boxes.

For the following PDF page:

page.png

We get the following result:

result.png

C# code sample

static void Main(string[] args)
{
    using (FileStream fileIn = new FileStream(@"..\..\..\inputdocuments\R0.pdf", FileMode.Open, FileAccess.Read))
    {
        //create document
        Document document = new Document(fileIn);

        foreach (Page page in document.Pages)
        {
            System.Drawing.Bitmap bitmap = CreateBoxesBitmap(page);
            bitmap.Save(@"..\..\out.png", System.Drawing.Imaging.ImageFormat.Png);
        }
    }
}

private static System.Drawing.Bitmap CreateBoxesBitmap(Page page)
{
    // Compute the part of the page that is visible in a viewer.
    Rectangle visibleRectangle = GetVisibleRectangle(page);

    // determine the size taking the orientation into account
    int width = (int)Math.Round(visibleRectangle.Width);
    int height = (int)Math.Round(visibleRectangle.Height);

    Orientation orientation = page.Orientation;
    if (orientation == Orientation.Rotate90 || orientation == Orientation.Rotate270)
    {
        // swap width and height.
        int temp = width;
        width = height;
        height = temp;
    }

    // create the resulting bitmap
    var bitmap = new System.Drawing.Bitmap(width, height);
    using (System.Drawing.Graphics graphics = System.Drawing.Graphics.FromImage(bitmap))
    using (System.Drawing.Pen pen = new System.Drawing.Pen(System.Drawing.Color.Red))
    {
        graphics.Clear(System.Drawing.Color.White);

        // retrieve all glyphs on the current page and draw a rectangle for each.
        foreach (Glyph glyph in page.Glyphs)
        {
            // we convert each coordinate into a GDI coordinate
            System.Drawing.PointF bottomLeft = PDFPointToGDI(glyph.BottomLeft, visibleRectangle, orientation);
            System.Drawing.PointF bottomRight = PDFPointToGDI(glyph.BottomRight, visibleRectangle, orientation);
            System.Drawing.PointF topRight = PDFPointToGDI(glyph.TopRight, visibleRectangle, orientation);
            System.Drawing.PointF topLeft = PDFPointToGDI(glyph.TopLeft, visibleRectangle, orientation);

            System.Drawing.PointF[] points = new[] { bottomLeft, bottomRight, topRight, topLeft };

            // draw glyph box
            graphics.DrawPolygon(pen, points);
        }
    }

    return bitmap;
}
Private Sub Main(args As String())
    Using fileIn As New FileStream("..\..\..\inputdocuments\R0.pdf", FileMode.Open, FileAccess.Read)
        'create document
        Dim document As New Document(fileIn)

        For Each page As Page In document.Pages
            Dim bitmap As System.Drawing.Bitmap = CreateBoxesBitmap(page)
            bitmap.Save("..\..\out.png", System.Drawing.Imaging.ImageFormat.Png)
        Next
    End Using
End Sub

Private Function CreateBoxesBitmap(page As Page) As System.Drawing.Bitmap
    ' Compute the part of the page that is visible in a viewer.
    Dim visibleRectangle As Rectangle = GetVisibleRectangle(page)

    ' determine the size taking the orientation into account
    Dim width As Integer = CInt(Math.Round(visibleRectangle.Width))
    Dim height As Integer = CInt(Math.Round(visibleRectangle.Height))

    Dim orientation__1 As Orientation = page.Orientation
    If orientation__1 = Orientation.Rotate90 OrElse orientation__1 = Orientation.Rotate270 Then
        ' swap width and height.
        Dim temp As Integer = width
        width = height
        height = temp
    End If

    ' create the resulting bitmap
    Dim bitmap = New System.Drawing.Bitmap(width, height)
    Using graphics As System.Drawing.Graphics = System.Drawing.Graphics.FromImage(bitmap)
        Using pen As New System.Drawing.Pen(System.Drawing.Color.Red)
            graphics.Clear(System.Drawing.Color.White)

            ' retrieve all glyphs on the current page and draw a rectangle for each.
            For Each glyph As Glyph In page.Glyphs
                ' we convert each coordinate into a GDI coordinate
                Dim bottomLeft As System.Drawing.PointF = PDFPointToGDI(glyph.BottomLeft, visibleRectangle, orientation__1)
                Dim bottomRight As System.Drawing.PointF = PDFPointToGDI(glyph.BottomRight, visibleRectangle, orientation__1)
                Dim topRight As System.Drawing.PointF = PDFPointToGDI(glyph.TopRight, visibleRectangle, orientation__1)
                Dim topLeft As System.Drawing.PointF = PDFPointToGDI(glyph.TopLeft, visibleRectangle, orientation__1)

                Dim points As System.Drawing.PointF() = {bottomLeft, bottomRight, topRight, topLeft}

                ' draw glyph box
                graphics.DrawPolygon(pen, points)
            Next
        End Using
    End Using

    Return bitmap
End Function

Note that we need to convert each coordinate into a GDI coordinate, as PDF has its origin at the bottom left of the page, and the page may be rotated as well. Below is the code of the PdfPointToGdi routine.

C# code sample

static Rectangle GetVisibleRectangle(Page page)
{
    Rectangle rectangle = new Rectangle(0, 0, page.Width, page.Height);

    Rectangle mediaBox = page.MediaBox;
    if (mediaBox != null)
    {
        rectangle = Intersection(rectangle, mediaBox);
    }

    Rectangle cropBox = page.CropBox;
    if (null != cropBox)
    {
        rectangle = Intersection(rectangle, cropBox);
    }
    return rectangle;
}

static System.Drawing.PointF PDFPointToGDI(System.Drawing.PointF point, Rectangle rectangle, Orientation orientation)
{
    // Adjust for origin of the visible rectangle, which may not be at (0,0).
    double x = point.X - rectangle.Left;
    double y = point.Y - rectangle.Bottom;

    switch (orientation)
    {
        case Orientation.Rotate0:
            // just 'flip' the coordinate over the y axis.
            return new System.Drawing.PointF((float)x, (float)(rectangle.Height - y));

        case Orientation.Rotate90:
            // exchange x and y, and perform appropiate flipping.
            return new System.Drawing.PointF((float)(rectangle.Height - y), (float)(rectangle.Width - x));

        case Orientation.Rotate180:
            // Pointwise mirror of Rotate0.
            return new System.Drawing.PointF((float)(rectangle.Width - x), (float)y);

        case Orientation.Rotate270:
            // Pointwise mirror of Rotate90.
            return new System.Drawing.PointF((float)y, (float)x);

        default:
            return point;
    }
}

static Rectangle Intersection(Rectangle rect1, Rectangle rect2)
{
    double minX = Math.Max(rect1.Left, rect2.Left); // maximum of left sides. 
    double maxX = Math.Min(rect1.Left + rect1.Width, rect2.Left + rect2.Width); // minimum of right sides.
    double minY = Math.Max(rect1.Bottom, rect2.Bottom); // maximum of bottom sides. 
    double maxY = Math.Min(rect1.Bottom + rect1.Height, rect2.Bottom + rect2.Height); // minimum of bottom sides.

    return new Rectangle(minX, minY, maxX - minX, maxY - minY);
}
Private Function GetVisibleRectangle(page As Page) As Rectangle
    Dim rectangle As New Rectangle(0, 0, page.Width, page.Height)

    Dim mediaBox As Rectangle = page.MediaBox
    If mediaBox IsNot Nothing Then
        rectangle = Intersection(rectangle, mediaBox)
    End If

    Dim cropBox As Rectangle = page.CropBox
    If cropBox IsNot Nothing Then
        rectangle = Intersection(rectangle, cropBox)
    End If
    Return rectangle
End Function

Private Function PDFPointToGDI(point As System.Drawing.PointF, rectangle As Rectangle, orientation__1 As Orientation) As System.Drawing.PointF
    ' Adjust for origin of the visible rectangle, which may not be at (0,0).
    Dim x As Double = point.X - rectangle.Left
    Dim y As Double = point.Y - rectangle.Bottom

    Select Case orientation__1
        Case Orientation.Rotate0
            ' just 'flip' the coordinate over the y axis.
            Return New System.Drawing.PointF(CSng(x), CSng(rectangle.Height - y))

        Case Orientation.Rotate90
            ' exchange x and y, and perform appropiate flipping.
            Return New System.Drawing.PointF(CSng(rectangle.Height - y), CSng(rectangle.Width - x))

        Case Orientation.Rotate180
            ' Pointwise mirror of Rotate0.
            Return New System.Drawing.PointF(CSng(rectangle.Width - x), CSng(y))

        Case Orientation.Rotate270
            ' Pointwise mirror of Rotate90.
            Return New System.Drawing.PointF(CSng(y), CSng(x))
        Case Else

            Return point
    End Select
End Function

Private Function Intersection(rect1 As Rectangle, rect2 As Rectangle) As Rectangle
    Dim minX As Double = Math.Max(rect1.Left, rect2.Left)
    ' maximum of left sides. 
    Dim maxX As Double = Math.Min(rect1.Left + rect1.Width, rect2.Left + rect2.Width)
    ' minimum of right sides.
    Dim minY As Double = Math.Max(rect1.Bottom, rect2.Bottom)
    ' maximum of bottom sides. 
    Dim maxY As Double = Math.Min(rect1.Bottom + rect1.Height, rect2.Bottom + rect2.Height)
    ' minimum of bottom sides.
    Return New Rectangle(minX, minY, maxX - minX, maxY - minY)
End Function