Free Trial
Web API version
Licensing
Request A Quote
HAVE QUESTIONS OR NEED HELP? SUBMIT THE SUPPORT REQUEST FORM or write email to SUPPORT@BYTESCOUT.COM
Parse Multipage Table | C#
Program.cs:
C#
using System; using ByteScout.DocumentParser; // This example demonstrates parsing of multipage tables in two different approaches. // See comments in the code of templates. namespace ParseMultipageTable { class Program { static void Main(string[] args) { string inputDocument = @".\MultiPageTable.pdf"; string template1 = @".\MultiPageTable-template1.yml"; string template2 = @".\MultiPageTable-template2.yml"; using (DocumentParser documentParser = new DocumentParser("demo", "demo")) { Console.WriteLine($"Loading template 1..."); documentParser.AddTemplate(template1); Console.WriteLine($"Template 1 loaded."); Console.WriteLine(); Console.WriteLine($"Parsing \"{inputDocument}\"..."); Console.WriteLine(); // Parse document data in JSON format documentParser.ParseDocument(inputDocument, "result1.json", OutputFormat.JSON); Console.WriteLine("Parsing results saved to `result1.json`."); Console.WriteLine(); } using (DocumentParser documentParser = new DocumentParser("demo", "demo")) { Console.WriteLine($"Loading template 2..."); documentParser.AddTemplate(template2); Console.WriteLine($"Template 2 loaded."); Console.WriteLine(); Console.WriteLine($"Parsing \"{inputDocument}\"..."); Console.WriteLine(); // Parse document data in JSON format documentParser.ParseDocument(inputDocument, "result2.json", OutputFormat.JSON); Console.WriteLine("Parsing results saved to `result2.json`."); Console.WriteLine(); } Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); } } }
MultiPageTable-template1.yml:
templateName: Multipage Table Test
templateVersion: 4
templatePriority: 0
detectionRules:
keywords:
- Sample document with multi-page table
objects:
- name: total
objectType: field
fieldProperties:
fieldType: macros
expression: TOTAL{{Spaces}}({{Number}})
regex: true
dataType: decimal
- name: table1
objectType: table
tableProperties:
start:
expression: Item{{Spaces}}Description{{Spaces}}Price
regex: true
end:
expression: TOTAL{{Spaces}}{{Number}}
regex: true
row:
expression: '{{LineStart}}{{Spaces}}(?<itemNo>{{Digits}}){{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<price>{{Number}}){{Spaces}}(?<qty>{{Digits}}){{Spaces}}(?<extPrice>{{Number}})'
regex: true
columns:
- name: itemNo
dataType: integer
- name: description
dataType: string
- name: price
dataType: decimal
- name: qty
dataType: integer
- name: extPrice
dataType: decimal
multipage: true
MultiPageTable-template2.yml:
templateName: Multipage Table Test
templateVersion: 4
templatePriority: 0
detectionRules:
keywords:
- Sample document with multi-page table
objects:
- name: total
objectType: field
fieldProperties:
fieldType: regex
expression: TOTAL{{Spaces}}({{Number}})
regex: true
dataType: decimal
- name: table1
objectType: table
tableProperties:
start:
expression: Item{{Spaces}}Description{{Spaces}}Price
regex: true
end:
expression: (Page {{Digits}} of {{Digits}})|(TOTAL{{Spaces}}{{Number}})
regex: true
left: 51
right: 528
columns:
- x: 51
name: itemNo
dataType: integer
- x: 102
name: description
dataType: string
- x: 324
name: price
dataType: decimal
- x: 396
name: qty
dataType: integer
- x: 441
name: extPrice
dataType: decimal
multipage: true