-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathOCR.ahk
177 lines (154 loc) · 5.5 KB
/
OCR.ahk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/**
* OCR library by camerb
* v0.96 - 2013-03-21
*
* This OCR lib provides an easy way to check a part of the screen for
* machine-readable text. You should note that OCR isn't a perfect technology,
* and will frequently make mistakes, but it can give you a general idea of
* what text is in a given area. For example, a common mistake that this OCR
* function makes is that it frequently interprets slashes, lowercase L,
* lowercase I, and the number 1 interchangably. Results can also vary
* greatly based upon where the outer bounds of the area to scan are placed.
*
* Future plans include a function that will check if a given string is
* displayed within the given coordinates on the screen.
*
* Home thread: http://www.autohotkey.com/forum/viewtopic.php?t=74227
* With inspiration from: http://www.autohotkey.com/forum/viewtopic.php?p=93526#93526
*/
#Include GDIp.ahk
; the options parameter is a string and can contain any combination of the following:
; debug - for use to show errors that GOCR spits out (not helpful for daily use)
; numeric (or numeral, or number) - the text being scanned should be limited to
; numbers only (no letters or special characters)
GetOCR(topLeftX="", topLeftY="", widthToScan="", heightToScan="", options="")
{
;TODO validate to ensure that the coords are numbers
prevBatchLines := A_BatchLines
SetBatchlines, -1 ;cuts the average time down from 140ms to 115ms for small areas
;set defaults
isActiveWindowMode := false
if (topLeftY == "" AND widthToScan == "" AND heightToScan == "")
{
;no coordinates were provided
isSingleParamMode := true
options := topLeftX
}
;process options from the options param, if they are there
if options
{
if InStr(options, "debug")
isDebugMode:=true
if InStr(options, "numeral")
isNumericMode:=true
if InStr(options, "numeric")
isNumericMode:=true
if InStr(options, "lighter")
isLighterMode:=true
if InStr(options, "activeWindow")
isActiveWindowMode:=true
;if InStr(options, "screenCoord")
; isActiveWindowMode:=false
}
if isSingleParamMode
{
;TODO throw error if not in the right coordmode
;or perhaps we can just process the entire screen
topLeftX := 0
topLeftY := 0
if isActiveWindowMode
{
WinGetActiveStats, no, winWidth, winHeight, no, no
widthToScan := winWidth
heightToScan := winHeight
}
else
{
;TODO fix this so that it gets the full width and full height across all monitors
widthToScan := A_ScreenWidth
heightToScan := A_ScreenHeight
}
}
if isActiveWindowMode
{
WinGetPos, xOffset, yOffset, no, no, A
topLeftX += xOffset
topLeftY += yOffset
}
;need to figure out if changing the coordmode will mess things up
;CoordMode, Mouse, Window
;else
;CoordMode, Mouse, Screen
filenameJpg := "in.jpg"
filenameTif := "in.tif"
filenameTxt := "in.txt"
jpegQuality := 100
convertPath=convert.exe
tesseractPath=%A_ScriptDir%\Tesseract-OCR\tesseract.exe
;take a screenshot of the specified area
pToken:=Gdip_Startup()
pBitmap:=Gdip_BitmapFromScreen(topLeftX "|" topLeftY "|" widthToScan "|" heightToScan)
Gdip_SaveBitmapToFile(pBitmap, filenameJpg, jpegQuality)
Gdip_Shutdown(pToken)
; Wait for jpg file to exist
while NOT FileExist(filenameJpg)
Sleep, 10
;ensure the exes are there
if NOT FileExist(convertPath)
return "ERROR: convert.exe not found in expected location"
if NOT FileExist(tesseractPath)
return "ERROR: tesseract.exe not found in expected location"
;convert the jpg file to tiff
;NOTE maybe converting to greyscale isn't the best idea
; ... does it increase reliability or speed?
if(isLighterMode = true) {
convertCmd=convert.exe %filenameJpg% -colorspace gray +dither -threshold 75`% -normalize -negate -bordercolor white -border 20x20 -resize 200`% %filenameTif%
} else {
convertCmd=convert.exe %filenameJpg% -colorspace gray +dither -threshold 85`% -normalize -negate -bordercolor white -border 20x20 -resize 200`% %filenameTif%
}
Runwait, %comspec% /c %convertCmd%,, Hide
; Wait for tif file to exist
while NOT FileExist(filenameTif)
Sleep, 10
;run the OCR command using my mixed cmdret hack
if isNumericMode
additionalParams .= "-C 0-9 "
runCmd=%tesseractPath% %filenameTif% in digits
Runwait, %comspec% /c %runCmd% ,, Hide
while NOT FileExist(filenameTxt)
Sleep, 10
FileRead, result, %filenameTxt%
FileDelete, %filenameTxt%
;suppress warnings from GOCR (we don't care, give us nothing)
if InStr(result, "NOT NORMAL")
gocrError:=true
if InStr(result, "strong rotation angle detected")
gocrError:=true
if InStr(result, "# no boxes found - stopped") ;multiple warnings show up with this in the string
gocrError:=true
if gocrError
{
if NOT isDebugMode
result=
}
if isNumericMode
{
result := RegExReplace(result, "[ _]+", " ")
}
if NOT isDebugMode
{
; Cleanup (preserve the files if in debug mode)
FileDelete, %filenameTif%
FileDelete, %filenameJpg%
}
else
{
;copy to an archive folder if in debug mode
FileCreateDir, ocr-archive
FormatTime, timestamp,, yyyy-MM-dd_HH-mm-ss
FileCopy, %filenameJpg%, ocr-archive\%timestamp%.jpg
}
;return to previous speed
SetBatchlines, %prevBatchLines%
return result
}