forked from jourdant/powershell-paperless
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtesseractlib.psm1
61 lines (49 loc) · 1.86 KB
/
tesseractlib.psm1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#
# Title: tesseractlib.psm1
# Author: Jourdan Templeton
# Email: [email protected]
# Modified: 04/01/2015 08:30PM NZDT
#
Add-Type -AssemblyName "System.Drawing"
Add-Type -Path "$PSScriptRoot\Lib\Tesseract.dll"
$tesseract = New-Object Tesseract.TesseractEngine((Get-Item ".\Lib\tessdata").FullName, "eng", [Tesseract.EngineMode]::Default, $null)
<#
.SYNOPSIS
This cmdlet loads either a file path or image and returns the text contained with the confidence.
.DESCRIPTION
This cmdlet loads either a file path or image and returns the text contained with the confidence.
You can pipe in either System.Drawing.Image file or a child-item object.
.PARAMETER Image
The image file already loaded into memory.
.PARAMETER FullName
The path to the image to be processed.
.EXAMPLE
$image = New-Object System.Drawing.Bitmap("c:\test.jpg")
Get-TessTextFromImage -Image $image
.EXAMPLE
New-Object System.Drawing.Bitmap("C:\test.jpg") | Get-TessTextFromImage
.EXAMPLE
$image = New-Object System.Drawing.Bitmap("c:\test.jpg")
Get-TessTextFromImage -Image $image
#>
Function Get-TessTextFromImage()
{
Param(
[Parameter(Mandatory=$true, ValueFromPipeline=$true, ParameterSetName="ImageObject")][System.Drawing.Image]$Image,
[Parameter(Mandatory=$true, ValueFromPipeline=$true, ParameterSetName="FilePath")][Alias("FullName")][String]$Path
)
Process {
#load image if path is a param
If ($PsCmdlet.ParameterSetName -eq "FilePath") { $Image = New-Object System.Drawing.Bitmap((Get-Item $path).Fullname) }
#perform OCR on image
$pix = [Tesseract.PixConverter]::ToPix($image)
$page = $tesseract.Process($pix)
#build return object
$ret = New-Object PSObject -Property @{"Text"= $page.GetText();
"Confidence"= $page.GetMeanConfidence()}
#clean up references
$page.Dispose()
If ($PsCmdlet.ParameterSetName -eq "FilePath") { $image.Dispose() }
return $ret
}
}