Extract Table Structure from PDF

Extract Table Structure from PDF | C#

ByteScout PDF Extractor SDK

Free Trial Web API version Licensing Request A Quote

HAVE QUESTIONS OR NEED HELP? SUBMIT THE SUPPORT REQUEST FORM or write email to SUPPORT@BYTESCOUT.COM

Extract Table Structure from PDF | C#

Program.cs:

using Bytescout.PDFExtractor;
using System.Diagnostics;
using System;

namespace TableStructure
{
    class Program
    {
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.StructuredExtractor instance
            StructuredExtractor extractor = new StructuredExtractor();
            extractor.RegistrationName = "demo";
            extractor.RegistrationKey = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile(@".\sample3.pdf");

            for (int pageIndex = 0; pageIndex < extractor.GetPageCount(); pageIndex++)
            {
                Console.WriteLine("Starting extraction from page #" + pageIndex);
                Console.WriteLine();

                extractor.PrepareStructure(pageIndex);

                int rowCount = extractor.GetRowCount(pageIndex);

                for (int row = 0; row < rowCount; row++)
                {
                    int columnCount = extractor.GetColumnCount(pageIndex, row);

                    for (int col = 0; col < columnCount; col++)
                    {
                        Console.WriteLine(extractor.GetCellValue(pageIndex, row, col));
                    }
                }
            }

            // Cleanup
            extractor.Dispose();

            Console.WriteLine();
            Console.WriteLine("Press any key..");
            Console.ReadKey();
        }
    }
}