Free Trial
Web API version
Licensing
Request A Quote
HAVE QUESTIONS OR NEED HELP? SUBMIT THE SUPPORT REQUEST FORM or write email to SUPPORT@BYTESCOUT.COM
OCR Modes | C#
Program.cs:
C#
using Bytescout.PDFExtractor; using System; // To make OCR work you should add the following references to your project: // 'Bytescout.PDFExtractor.dll', 'Bytescout.PDFExtractor.OCRExtension.dll'. namespace OCRModes { class Program { static void Main(string[] args) { // Input document containing vector, image and font string inputDocument = @".\SampleWith_Vector_Image_Font.pdf"; // Extracting text with different OCRModes // 1. TextFromImagesOnly (Plain Mode) Console.WriteLine("---------------------------------\nExtraction Mode: TextFromImagesOnly \n---------------------------------"); var resultText = _ExtractTextWithSpecificOCRMode(inputDocument, OCRMode.TextFromImagesOnly); Console.WriteLine(resultText); // 2. TextFromVectorOnly (Plain Mode) Console.WriteLine("---------------------------------\nExtraction Mode: TextFromVectorOnly \n---------------------------------"); resultText = _ExtractTextWithSpecificOCRMode(inputDocument, OCRMode.TextFromVectorsOnly); Console.WriteLine(resultText); // 3. TextFromImagesAndFonts (Combined Mode) Console.WriteLine("---------------------------------\nExtraction Mode: TextFromImagesAndFonts \n---------------------------------"); resultText = _ExtractTextWithSpecificOCRMode(inputDocument, OCRMode.TextFromImagesAndFonts); Console.WriteLine(resultText); // 4. TextFromImagesAndVectorsAndFonts (Combined Mode) Console.WriteLine("---------------------------------\nExtraction Mode: TextFromImagesAndVectorsAndFonts \n---------------------------------"); resultText = _ExtractTextWithSpecificOCRMode(inputDocument, OCRMode.TextFromImagesAndVectorsAndFonts); Console.WriteLine(resultText); Console.ReadLine(); } /// <summary> /// Extract text from document with specific Ocr Mode /// </summary> /// <param name="inputDocument"></param> /// <param name="oCRMode"></param> /// <returns></returns> private static string _ExtractTextWithSpecificOCRMode(string inputDocument, OCRMode ocrMode) { // Location of language data files string ocrLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\"; // OCR language string ocrLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "ocrdata" folder // Find more language files at https://github.com/bytescout/ocrdata // Create TextExtractor instance using (TextExtractor textExtractor = new TextExtractor("demo", "demo")) { // Load document to TextExtractor textExtractor.LoadDocumentFromFile(inputDocument); // Specify Ocr Mode textExtractor.OCRMode = ocrMode; // Ocr language data folder path and language textExtractor.OCRLanguageDataFolder = ocrLanguageDataFolder; textExtractor.OCRLanguage = ocrLanguage; // Return extracted text return textExtractor.GetText(); } } } }