Free Trial
Web API version
Licensing
Request A Quote
HAVE QUESTIONS OR NEED HELP? SUBMIT THE SUPPORT REQUEST FORM or write email to SUPPORT@BYTESCOUT.COM
Parse Multipage Table | C#
Program.cs:
C#
using System; using ByteScout.DocumentParser; // This example demonstrates parsing of multipage tables in two different approaches. // See comments in the code of templates. namespace ParseMultipageTable { class Program { static void Main(string[] args) { string inputDocument = @".\MultiPageTable.pdf"; string template1 = @".\MultiPageTable-template1.yml"; string template2 = @".\MultiPageTable-template2.yml"; using (DocumentParser documentParser = new DocumentParser("demo", "demo")) { Console.WriteLine($"Loading template 1..."); documentParser.AddTemplate(template1); Console.WriteLine($"Template 1 loaded."); Console.WriteLine(); Console.WriteLine($"Parsing \"{inputDocument}\"..."); Console.WriteLine(); // Parse document data in JSON format documentParser.ParseDocument(inputDocument, "result1.json", OutputFormat.JSON); Console.WriteLine("Parsing results saved to `result1.json`."); Console.WriteLine(); } using (DocumentParser documentParser = new DocumentParser("demo", "demo")) { Console.WriteLine($"Loading template 2..."); documentParser.AddTemplate(template2); Console.WriteLine($"Template 2 loaded."); Console.WriteLine(); Console.WriteLine($"Parsing \"{inputDocument}\"..."); Console.WriteLine(); // Parse document data in JSON format documentParser.ParseDocument(inputDocument, "result2.json", OutputFormat.JSON); Console.WriteLine("Parsing results saved to `result2.json`."); Console.WriteLine(); } Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); } } }
MultiPageTable-template1.yml:
templateName: Multipage Table Test templateVersion: 4 templatePriority: 0 detectionRules: keywords: - Sample document with multi-page table objects: - name: total objectType: field fieldProperties: fieldType: macros expression: TOTAL{{Spaces}}({{Number}}) regex: true dataType: decimal - name: table1 objectType: table tableProperties: start: expression: Item{{Spaces}}Description{{Spaces}}Price regex: true end: expression: TOTAL{{Spaces}}{{Number}} regex: true row: expression: '{{LineStart}}{{Spaces}}(?<itemNo>{{Digits}}){{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<price>{{Number}}){{Spaces}}(?<qty>{{Digits}}){{Spaces}}(?<extPrice>{{Number}})' regex: true columns: - name: itemNo dataType: integer - name: description dataType: string - name: price dataType: decimal - name: qty dataType: integer - name: extPrice dataType: decimal multipage: true
MultiPageTable-template2.yml:
templateName: Multipage Table Test templateVersion: 4 templatePriority: 0 detectionRules: keywords: - Sample document with multi-page table objects: - name: total objectType: field fieldProperties: fieldType: regex expression: TOTAL{{Spaces}}({{Number}}) regex: true dataType: decimal - name: table1 objectType: table tableProperties: start: expression: Item{{Spaces}}Description{{Spaces}}Price regex: true end: expression: (Page {{Digits}} of {{Digits}})|(TOTAL{{Spaces}}{{Number}}) regex: true left: 51 right: 528 columns: - x: 51 name: itemNo dataType: integer - x: 102 name: description dataType: string - x: 324 name: price dataType: decimal - x: 396 name: qty dataType: integer - x: 441 name: extPrice dataType: decimal multipage: true