Extract Text from Foldable Brochure Booklet | C#ByteScout PDF Extractor SDK

Extract Text from Foldable Brochure Booklet | C#


using Bytescout.PDFExtractor;
using System.Collections.Generic;
using System.Diagnostics;
using System.Drawing;
using System.IO;
using System.Text;

namespace ExtractTextFromFoldableBrochure
    class Program
        static void Main(string[] args)
            // Set extraction regions
            // Use Bytescout Template Editor / Bytescout PDF Multitool or other tool to know region co-ordinates
            var lstExtractionRegion = new List<RectangleF>();
            lstExtractionRegion.Add(new RectangleF(7.5f, 33.8f, 244.5f, 353.3f));
            lstExtractionRegion.Add(new RectangleF(273.8f, 201.8f, 247.5f, 198.0f));
            lstExtractionRegion.Add(new RectangleF(537.8f, 27.0f, 246.0f, 268.5f));

            // Ouput File
            var resFile = "result.txt";
            var sRes = new StringBuilder(string.Empty);

            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();
            extractor.RegistrationName = "demo";
            extractor.RegistrationKey = "demo";

            // Load sample PDF document

            // Loop through all extraction regions, and extract text
            foreach (var oRegion in lstExtractionRegion)
                var extractedText = GetTextFromRegion(extractor, oRegion);

            // Cleanup

            // Write all reslut to output file
            File.WriteAllText(resFile, sRes.ToString());

            // Open result file in default associated application
            ProcessStartInfo processStartInfo = new ProcessStartInfo(@".\result.txt");
            processStartInfo.UseShellExecute = true;

        /// <summary>
        /// Get text from particular region
        /// </summary>
        private static string GetTextFromRegion(TextExtractor textExtractor, RectangleF extractionRegion, int pageIndex = 0)
            // Set Extraction Area

            // Get Text from that region
            return textExtractor.GetTextFromPage(pageIndex);