Free Trial
Web API version
Licensing
Request A Quote
HAVE QUESTIONS OR NEED HELP? SUBMIT THE SUPPORT REQUEST FORM or write email to SUPPORT@BYTESCOUT.COM
Advanced Conversion Options With Rotated Input | VB.NET | PDF To XML API
Module1.vb:
VB
Imports System.IO Imports System.Net Imports Newtonsoft.Json Imports Newtonsoft.Json.Linq Module Module1 ' The authentication key (API Key). ' Get your own by registering at https://app.pdf.co Const API_KEY As String = "******************************" ' Source PDF file Const SourceFile As String = ".\sample-rotated.pdf" ' Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'. Const Pages As String = "" ' PDF document password. Leave empty for unprotected documents. Const Password As String = "" ' Destination XML file name Const DestinationFile As String = ".\result.xml" ' Some of advanced options available through profiles: ' (it can be single/double-quoted and contain comments.) ' { ' "profiles": [ ' { ' "profile1": { ' "SaveImages": "None", // Whether to extract images. Values: "None", "Embed". ' "ImageFormat": "PNG", // Image format for extracted images. Values: "PNG", "JPEG", "GIF", "BMP". ' "SaveVectors": false, // Whether to extract vector objects (vertical and horizontal lines). Values: true / false ' "ExtractInvisibleText": true, // Invisible text extraction. Values: true / false ' "ExtractShadowLikeText": true, // Shadow-like text extraction. Values: true / false ' "LineGroupingMode": "None", // Values: "None", "GroupByRows", "GroupByColumns", "JoinOrphanedRows" ' "ColumnDetectionMode": "ContentGroupsAndBorders", // Values: "ContentGroupsAndBorders", "ContentGroups", "Borders", "BorderedTables" ' "Unwrap": false, // Unwrap grouped text in table cells. Values: true / false ' "ShrinkMultipleSpaces": false, // Shrink multiple spaces in table cells that affect column detection. Values: true / false ' "DetectNewColumnBySpacesRatio": 1, // Spacing ratio that affects column detection. ' "CustomExtractionColumns": [ 0, 50, 150, 200, 250, 300 ], // Explicitly specify columns coordinates for table extraction. ' "CheckPermissions": true, // Ignore document permissions. Values: true / false ' } ' } ' ] ' } ' Sample profile that sets advanced conversion options. ' Advanced options are properties of XMLExtractor class from ByteScout PDF Extractor SDK used in the back-end: ' https://cdn.bytescout.com/help/BytescoutPDFExtractorSDK/html/87ce5fa6-3143-167d-abbd-bc7b5e160fe5.htm 'Valid RotationAngle values '0 - no rotation '1 - 90 degrees '2 - 180 degrees '3 - 270 degrees ReadOnly Profiles As String = File.ReadAllText("profile.json") Sub Main() ' Create standard .NET web client instance Dim webClient As WebClient = New WebClient() ' Set API Key webClient.Headers.Add("x-api-key", API_KEY) ' 1. RETRIEVE THE PRESIGNED URL TO UPLOAD THE FILE. ' * If you already have a direct file URL, skip to the step 3. ' Prepare URL for `Get Presigned URL` API call Dim query As String = Uri.EscapeUriString(String.Format( "https://api.pdf.co/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name={0}", Path.GetFileName(SourceFile))) Try ' Execute request Dim response As String = webClient.DownloadString(query) ' Parse JSON response Dim json As JObject = JObject.Parse(response) If json("error").ToObject(Of Boolean) = False Then ' Get URL to use for the file upload Dim uploadUrl As String = json("presignedUrl").ToString() ' Get URL of uploaded file to use with later API calls Dim uploadedFileUrl As String = json("url").ToString() ' 2. UPLOAD THE FILE TO CLOUD. webClient.Headers.Add("content-type", "application/octet-stream") webClient.UploadFile(uploadUrl, "PUT", SourceFile) ' You can use UploadData() instead if your file is byte array or Stream ' Set JSON content type webClient.Headers.Add("Content-Type", "application/json") ' 3. CONVERT UPLOADED PDF FILE TO XML ' Prepare URL for `PDF To XML` API call Dim url As String = "https://api.pdf.co/v1/pdf/convert/to/xml" ' Prepare requests params as JSON ' See documentation: https : //apidocs.pdf.co Dim parameters As New Dictionary(Of String, Object) parameters.Add("name", Path.GetFileName(DestinationFile)) parameters.Add("password", Password) parameters.Add("pages", Pages) parameters.Add("url", uploadedFileUrl) parameters.Add("profiles", Profiles) ' Convert dictionary of params to JSON Dim jsonPayload As String = JsonConvert.SerializeObject(parameters) ' Execute POST request with JSON payload response = webClient.UploadString(url, jsonPayload) ' Parse JSON response json = JObject.Parse(response) If json("error").ToObject(Of Boolean) = False Then ' Get URL of generated XML file Dim resultFileUrl As String = json("url").ToString() ' Download XML file webClient.DownloadFile(resultFileUrl, DestinationFile) Console.WriteLine("Generated XML file saved as ""{0}"" file.", DestinationFile) Else Console.WriteLine(json("message").ToString()) End If End If Catch ex As WebException Console.WriteLine(ex.ToString()) End Try webClient.Dispose() Console.WriteLine() Console.WriteLine("Press any key...") Console.ReadKey() End Sub End Module