Free Trial
Web API version
Licensing
Request A Quote
HAVE QUESTIONS OR NEED HELP? SUBMIT THE SUPPORT REQUEST FORM or write email to SUPPORT@BYTESCOUT.COM
PDF To JSON | Powershell
pdf-to-json.bat:
@echo off if "%~1"=="" ( echo ----------------------------------------------------- echo Invalid parameter! echo ----------------------------------------------------- echo Usage: pdf-to-json.bat folder_path echo Example: pdf-to-json.bat "c:\documents" echo ----------------------------------------------------- if not "%NOPAUSE%"=="1" pause exit /b 1 ) powershell -NoProfile -ExecutionPolicy Bypass -Command "& .\pdf-to-json.ps1" "%1" echo Script finished with errorlevel=%errorlevel% pause
pdf-to-json.ps1:
Param( [Parameter(Mandatory = $true)] [string] $InputFolder = "" ) # Add reference to Bytescout.PDFExtractor.dll assembly Add-Type -Path "c:\Program Files\Bytescout PDF Extractor SDK\net4.00\Bytescout.PDFExtractor.dll" # Check input folder exists if ((Test-Path $InputFolder) -eq $false) { Write-Host "Target folder does not exist." -ForegroundColor Red exit 0 } # Create and activate JSONExtractor instance $jsonExtractor = New-Object Bytescout.PDFExtractor.JSONExtractor $jsonExtractor.RegistrationName = "demo" $jsonExtractor.RegistrationKey = "demo" try { # Get PDF files from input folder $files = Get-ChildItem -Path $InputFolder -Recurse -Include "*.pdf" foreach ($file in $files) { Write-Host "Input file" $file.FullName # Construct output file name $jsonFileName = [System.IO.Path]::ChangeExtension($file.FullName, "json") Write-Host " Output file" $jsonFileName # Load PDF document $jsonExtractor.LoadDocumentFromFile($file.FullName) # Disable the formatting reconstruction $jsonExtractor.PreserveFormattingOnTextExtraction = $false # Extract first page to JSON $jsonExtractor.SaveJSONToFile(0, $jsonFileName) # Reset extractor $jsonExtractor.Reset() } } catch { Write-Host $_.Exception.Message } $jsonExtractor.Dispose()