Extract Text From PDF By Pages | C#ByteScout PDF Extractor SDK

Extract Text From PDF By Pages | C#

Program.cs:

C#
using System;
using Bytescout.PDFExtractor;
using System.Diagnostics;

namespace ExtractTextByPages
{
    class Program
    {
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();
            extractor.RegistrationName = "demo";
            extractor.RegistrationKey = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile(@".\sample2.pdf");

            // Get page count
            int pageCount = extractor.GetPageCount();

            for (int i = 0; i < pageCount; i++)
            {
                string fileName = "page" + i + ".txt";

                // Save extracted page text to file
                extractor.SavePageTextToFile(i, fileName);
            }

            // Cleanup
            extractor.Dispose();

            // Open first output file in default associated application
            ProcessStartInfo processStartInfo = new ProcessStartInfo(@".\page1.txt");
            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }
    }
}