Free Trial
Web API version
Licensing
Request A Quote
HAVE QUESTIONS OR NEED HELP? SUBMIT THE SUPPORT REQUEST FORM or write email to SUPPORT@BYTESCOUT.COM
Parse Simple Document | C#
Program.cs:
C#
using System; using System.Text; using ByteScout.DocumentParser; // This example demonstrates document data parsing to JSON, YAML and XML formats. namespace GeneralExample { class Program { static void Main(string[] args) { var templates = new string[] { "DigitalOcean.yml", "AmazonAWS.yml", "Google.yml" } ; string inputDocument1 = @".\DigitalOcean.pdf"; string inputDocument2 = @".\AmazonAWS.pdf"; string inputDocument3 = @".\Google.pdf"; // Create DocumentParser instance using (DocumentParser documentParser = new DocumentParser("demo", "demo")) { Console.WriteLine($"Loading templates..."); foreach (var template in templates) { documentParser.AddTemplate(template); } Console.WriteLine($"Templates loaded."); Console.WriteLine(); Console.WriteLine($"Parsing \"{inputDocument1}\"..."); Console.WriteLine(); // Parse document data in JSON format string jsonString = documentParser.ParseDocument(inputDocument1, OutputFormat.JSON); // Display parsed data in console Console.WriteLine("Parsing results in JSON format:"); Console.WriteLine(); Console.WriteLine(jsonString); Console.WriteLine(); Console.WriteLine($"Parsing \"{inputDocument2}\"..."); Console.WriteLine(); // Parse document data in YAML format string yamlString = documentParser.ParseDocument(inputDocument2, OutputFormat.YAML); // Display parsed data in console Console.WriteLine("Parsing results in YAML format:"); Console.WriteLine(); Console.WriteLine(yamlString); Console.WriteLine(); Console.WriteLine($"Parsing \"{inputDocument3}\"..."); Console.WriteLine(); // Parse document data in CSV format string xmlString = documentParser.ParseDocument(inputDocument3, OutputFormat.CSV, new CSVOptions { GenerateColumnHeaders = true, SeparatorCharacter = ",", QuotationCharacter = "\"", Encoding = Encoding.UTF8 }); // Display parsed data in console Console.WriteLine("Parsing results in XML format:"); Console.WriteLine(); Console.WriteLine(xmlString); } Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); } } }
AmazonAWS.yml:
templateName: Amazon Web Services Invoice templateVersion: 4 templatePriority: 0 detectionRules: keywords: - Amazon Web Services - ATTN - Invoice objects: - name: total objectType: field fieldProperties: fieldType: macros expression: TOTAL AMOUNT DUE ON{{Anything}}{{Dollar}}({{Number}}) regex: true dataType: decimal - name: subTotal objectType: field fieldProperties: fieldType: macros expression: '{{LineStart}}{{Spaces}}Charges{{Spaces}}{{Dollar}}({{Number}})' regex: true dataType: decimal - name: dateIssued objectType: field fieldProperties: fieldType: macros expression: Invoice Date:{{Spaces}}({{Anything}}){{LineEnd}} regex: true dataType: date dateFormat: MMMM d , yyyy - name: invoiceId objectType: field fieldProperties: fieldType: macros expression: Invoice Number:{{Spaces}}({{Digits}}) regex: true - name: companyName objectType: field fieldProperties: fieldType: static expression: Amazon Web Services, Inc. regex: true - name: companyWebsite objectType: field fieldProperties: fieldType: static expression: aws.amazon.com regex: true - name: billTo objectType: field fieldProperties: fieldType: rectangle expression: Bill to Address:{{ToggleSingleLineMode}}({{AnythingGreedy}}) regex: true rectangle: - 33 - 115.5 - 213.75 - 72.75 pageIndex: 0 - name: currency objectType: field fieldProperties: fieldType: static expression: USD regex: true - name: table1 objectType: table tableProperties: start: expression: '{{LineStart}}{{Spaces}}Detail{{LineEnd}}' regex: true end: expression: '{{EndOfPage}}' regex: true row: expression: '{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}{{Dollar}}(?<unitPrice>{{Number}}){{LineEnd}}' regex: true columns: - name: unitPrice dataType: decimal
DigitalOcean.yml:
templateName: DigitalOcean Invoice templateVersion: 4 templatePriority: 0 detectionRules: keywords: - DigitalOcean - 101 Avenue of the Americas - Invoice Number objects: - name: companyName objectType: field fieldProperties: fieldType: static expression: DigitalOcean regex: true - name: invoiceId objectType: field fieldProperties: fieldType: macros expression: 'Invoice Number: ({{Digits}})' regex: true - name: dateIssued objectType: field fieldProperties: fieldType: macros expression: 'Date Issued: ({{SmartDate}})' regex: true dataType: date dateFormat: auto-mdy - name: total objectType: field fieldProperties: fieldType: macros expression: 'Total: {{Dollar}}({{Number}})' regex: true dataType: decimal - name: currency objectType: field fieldProperties: fieldType: static expression: USD regex: true - name: table1 objectType: table tableProperties: start: expression: Description{{Spaces}}Hours regex: true end: expression: 'Total:' regex: true row: expression: '{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<hours>{{Digits}}){{Spaces}}(?<start>{{2Digits}}{{Minus}}{{2Digits}}{{Space}}{{2Digits}}{{Colon}}{{2Digits}}){{Spaces}}(?<end>{{2Digits}}{{Minus}}{{2Digits}}{{Space}}{{2Digits}}{{Colon}}{{2Digits}}){{Spaces}}{{Dollar}}(?<unitPrice>{{Number}})' regex: true columns: - name: hours dataType: integer - name: unitPrice dataType: decimal
Google.yml:
templateName: Google Invoice templateVersion: 4 templatePriority: 0 detectionRules: keywords: - Google - 77-0493581 - Invoice objects: - name: invoiceId objectType: field fieldProperties: expression: Invoice number:{{Spaces}}({{Digits}}) regex: true - name: dateIssued objectType: field fieldProperties: expression: Issue date:{{Spaces}}({{SmartDate}}) regex: true dataType: date dateFormat: MMM d, yyyy - name: total objectType: field fieldProperties: expression: Amount due in USD:{{Spaces}}{{Number}} regex: true dataType: decimal - name: subTotal objectType: field fieldProperties: expression: Subtotal in USD:{{Spaces}}{{Number}} regex: true dataType: decimal - name: taxRate objectType: field fieldProperties: expression: State sales tax {{OpeningParenthesis}}{{Digits}}{{Percent}}{{ClosingParenthesis}} regex: true dataType: integer - name: tax objectType: field fieldProperties: expression: State sales tax{{Anything}}{{Number}}{{LineEnd}} regex: true dataType: decimal - name: companyName objectType: field fieldProperties: fieldType: static expression: Google LLC regex: true - name: billTo objectType: field fieldProperties: fieldType: rectangle regex: true rectangle: - 0 - 152 - 280 - 72 pageIndex: 0 - name: billingId objectType: field fieldProperties: expression: Billing ID:{{Spaces}}({{DigitsOrSymbols}}) regex: true - name: currency objectType: field fieldProperties: fieldType: static expression: USD regex: true - name: table1 objectType: table tableProperties: start: expression: Description{{Spaces}}Interval{{Spaces}}Quantity{{Spaces}}Amount regex: true end: expression: Subtotal in USD regex: true row: expression: '{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<interval>{{3Letters}}{{Space}}{{Digits}}{{Space}}{{Minus}}{{Space}}{{3Letters}}{{Space}}{{Digits}}){{Spaces}}(?<quantity>{{Digits}}){{Spaces}}(?<amount>{{Number}})' regex: true columns: - name: quantity dataType: integer - name: amount dataType: decimal