Get Word Coordinates in XML | VB.NETByteScout PDF Extractor SDK

Get Word Coordinates in XML | VB.NET

Module1.vb:

VB
Imports Bytescout.PDFExtractor

Namespace PDF2XML

    Class Program

        Shared Sub Main(ByVal args As String())

            ' Create Bytescout.PDFExtractor.XMLExtractor instance
            Dim extractor As New XMLExtractor()
            extractor.RegistrationName = "demo"
            extractor.RegistrationKey = "demo"

            ' Load sample PDF document
            extractor.LoadDocumentFromFile("sample3.pdf")

            ' Add the following params to get clean data with word nodes only
            extractor.DetectNewColumnBySpacesRatio = 0.1  ' this splits all text into words
            extractor.PreserveFormattingOnTextExtraction = False  ' Get rid Of empty nodes

            extractor.SaveXMLToFile("output.XML")

            ' Cleanup
            extractor.Dispose()

            Console.WriteLine()
            Console.WriteLine("Data has been extracted to 'output.XML' file.")
            Console.WriteLine()
            Console.WriteLine("Press any key...")
            Console.ReadKey()

        End Sub

    End Class

End Namespace