Extract CSV from PDF and Fill Database in SQL Server | VB.NETByteScout PDF Extractor SDK

Extract CSV from PDF and Fill Database in SQL Server | VB.NET

Program.vb:

VB
Imports System.Data.SqlClient
Imports Bytescout.PDFExtractor

Module Program

    Sub Main()

        Try

            ' Step-1: Get Datatable
            Dim oDataTable = GetDataTableFromDocument("sample.pdf")

            ' PLEASE NOTE Please Replace With your connection String, You need to have "PersonData" table into your database.
            ' You can find that table from Scripts.sql file
            Dim connectionString As String = "Data Source=DESKTOP-92VMCQG\SQLEXPRESS;Initial Catalog=SampleDatabase;Persist Security Info=True;User ID=sa;Password=Hiren@009"

            ' Step-2: Insert into database
            InsertIntoSqlServerDatabase(oDataTable, connectionString)

            ' Step-3: Fetch from database and display results
            DisplayDatabaseResults(connectionString)

        Catch ex As Exception
            Console.WriteLine(ex.Message)
        End Try

        Console.WriteLine("Press enter key to exit...")
        Console.ReadLine()
    End Sub

    ''' <summary>
    ''' Inserts into Sql Server database
    ''' </summary>
    Private Sub InsertIntoSqlServerDatabase(ByVal oDataTable As DataTable, ByVal connectionString As String)

        Using con As SqlConnection = New SqlConnection(connectionString)

            ' Open connection
            con.Open()

            ' Sql query to insert data
            Dim cmdInsert As String = "Insert into PersonData (id, first_name, last_name, email, gender, ip_address) values (@id, @first_name, @last_name, @email, @gender, @ip_address)"

            For Each itmRow As DataRow In oDataTable.Rows

                ' Prepare sql command
                Dim cmd As SqlCommand = New SqlCommand(cmdInsert, con)
                cmd.CommandType = CommandType.Text

                cmd.Parameters.Add(New SqlParameter("@id", Convert.ToString(itmRow("id"))))
                cmd.Parameters.Add(New SqlParameter("@first_name", Convert.ToString(itmRow("first_name"))))
                cmd.Parameters.Add(New SqlParameter("@last_name", Convert.ToString(itmRow("last_name"))))
                cmd.Parameters.Add(New SqlParameter("@email", Convert.ToString(itmRow("email"))))
                cmd.Parameters.Add(New SqlParameter("@gender", Convert.ToString(itmRow("gender"))))
                cmd.Parameters.Add(New SqlParameter("@ip_address", Convert.ToString(itmRow("ip_address"))))

                ' Execute sql command
                cmd.ExecuteNonQuery()
            Next

            ' Close connection
            con.Close()

        End Using

    End Sub

    ' Displays inserted database results
    Private Sub DisplayDatabaseResults(ByVal connectionString As String)

        ' Person data holder
        Dim personDataTable As DataTable = New DataTable()

        Using con As SqlConnection = New SqlConnection(connectionString)

            ' Sql query to fetch data
            Dim cmdInsert As String = "SELECT id, first_name, last_name, email, gender, ip_address FROM PersonData;"

            ' Prepare sql command
            Dim cmd As SqlCommand = New SqlCommand(cmdInsert, con)
            cmd.CommandType = CommandType.Text

            ' Prepare DataAdapter 
            Dim dataAdapter As SqlDataAdapter = New SqlDataAdapter(cmd)

            ' Fill person dataTable
            dataAdapter.Fill(personDataTable)

        End Using

        ' Display all person data if any
        If personDataTable IsNot Nothing AndAlso personDataTable.Rows.Count > 0 Then

            ' Print all columns
            For Each column As DataColumn In personDataTable.Columns
                Console.Write("{0} | ", column.ColumnName)
            Next
            Console.WriteLine()

            ' Print all data
            For Each dataRow As DataRow In personDataTable.Rows

                For Each column As DataColumn In personDataTable.Columns
                    Console.Write("{0} | ", dataRow(column.ColumnName))
                Next

                Console.WriteLine()
            Next
        Else
            Console.WriteLine("No data retrieved..")
        End If
    End Sub

    ''' <summary>
    ''' Get DataTable from Document
    ''' </summary>
    Private Function GetDataTableFromDocument(ByVal fileName As String) As DataTable
        Dim oDataTable As DataTable = Nothing

        ' Initialise table detector
        Using tableDetector As TableDetector = New TableDetector("demo", "demo")

            Using CSVExtractor As CSVExtractor = New CSVExtractor("demo", "demo")

                ' Set table detection mode to "bordered tables" - best for tables with closed solid borders.
                tableDetector.ColumnDetectionMode = ColumnDetectionMode.BorderedTables

                ' We should define what kind of tables we should detect.
                ' So we set min required number of columns to 2 ...
                tableDetector.DetectionMinNumberOfColumns = 2
                ' ... and we set min required number of rows to 2
                tableDetector.DetectionMinNumberOfRows = 2

                ' Load PDF document
                tableDetector.LoadDocumentFromFile(fileName)
                CSVExtractor.LoadDocumentFromFile(fileName)

                ' Get page count
                Dim pageCount As Integer = tableDetector.GetPageCount()

                If tableDetector.FindTable(0) Then
                    ' Set extraction area for CSV extractor to rectangle received from the table detector
                    CSVExtractor.SetExtractionArea(tableDetector.FoundTableLocation)

                    ' Generate CSV data
                    Dim allCsvData = CSVExtractor.GetCSV()

                    ' Generate Datatable
                    oDataTable = GetDataTableFromCSV(allCsvData)
                End If
            End Using
        End Using

        Return oDataTable
    End Function

    ''' <summary>
    ''' Get Datatable from CSV
    ''' </summary>
    Private Function GetDataTableFromCSV(ByVal allCsvData As String) As DataTable

        Dim oRetDataTable = New DataTable()
        oRetDataTable.Columns.Add("id")
        oRetDataTable.Columns.Add("first_name")
        oRetDataTable.Columns.Add("last_name")
        oRetDataTable.Columns.Add("email")
        oRetDataTable.Columns.Add("gender")
        oRetDataTable.Columns.Add("ip_address")

        Dim rows = allCsvData.Split(vbLf)

        For iRow As Integer = 1 To rows.Length - 1

            ' Get all column data
            Dim columns = rows(iRow).Split(","c)

            If columns.Length >= 5 Then
                ' Prepare new row
                Dim oRow = oRetDataTable.NewRow()
                oRow("id") = columns(0)
                oRow("first_name") = columns(1)
                oRow("last_name") = columns(2)
                oRow("email") = columns(3)
                oRow("gender") = columns(4)
                oRow("ip_address") = columns(5)

                ' Add row back to datatable
                oRetDataTable.Rows.Add(oRow)
            End If
        Next

        ' Return DataTable
        Return oRetDataTable
    End Function

End Module