[Share] OCR math to latex from image annotations #220
Replies: 5 comments 21 replies
-
请问讯飞的设置是把脚本中的XUNFEI对应的ID、SECRET和KEY替换成自己的就可以了吗?我替换之后每次操作都显示ORC failed,我要怎么才能知道为什么不成功呢?我在讯飞网页端试了一下是可以识别的,那么或许不是讯飞的问题? |
Beta Was this translation helpful? Give feedback.
-
Beta Was this translation helpful? Give feedback.
-
It works nice on my macOS, but when I use it under windows, there is no reaction when I click on the 'OCR math'. Maybe there is any way to solve this? |
Beta Was this translation helpful? Give feedback.
-
how can i insert the script in the better notes add on at zotero, as a template and which parts in the Code do i have to change for simpletext ocr? |
Beta Was this translation helpful? Give feedback.
-
Hey everyone! 🎉 Just switched the script to the Gemini API, leaving Bing behind and diving into the LLM era! 🚀 Gemini's OCR is top-notch, plus there's a free plan! Use it for fun! /**
* OCR math to latex from image annotations
* @author windingwind, invisprints
* @usage in annotation menu
* @link https://github.com/windingwind/zotero-actions-tags/discussions/220
* @see https://github.com/windingwind/zotero-actions-tags/discussions/220
*/
/** { 👍 "mathpix" | 👍 "simpletex" | "xunfei" | "gemini" } service provider */
const SERVICE = "gemini";
// https://mathpix.com/
const MATHPIX = {
APP_ID: "NOT SET",
APP_KEY: "NOT SET",
};
// https://www.xfyun.cn/service/formula-discern
const XUNFEI = {
APP_ID: "NOT SET",
API_KEY: "NOT SET",
API_SECRET: "NOT SET",
};
// https://simpletex.cn/
const SIMPLETEX = {
APP_ID: "NOT SET",
APP_SECRET: "NOT SET",
};
// Google Gemini API
const GEMINI = {
API_KEY: "NOT SET",
};
if (!item) {
return;
}
const FormData = Zotero.getMainWindow().FormData;
return await OCRImageAnnotation(item);
async function OCRImageAnnotation(annotationItem) {
if (
!annotationItem.isAnnotation() ||
annotationItem.annotationType !== "image"
) {
return "Not an image annotation item";
}
const src = await Zotero.File.generateDataURI(
Zotero.Annotations.getCacheImagePath(annotationItem),
"image/png"
);
let result;
let success;
switch (SERVICE) {
case "mathpix":
({ result, success } = await mathpix(src));
break;
case "xunfei":
({ result, success } = await xunfei(src));
break;
case "simpletex":
({ result, success } = await simpletex(src));
break;
case "gemini":
({ result, success } = await gemini(src));
break;
default:
result = "OCR Engine Not Found";
success = false;
}
if (success) {
annotationItem.annotationComment = `${
annotationItem.annotationComment
? `${annotationItem.annotationComment}\n`
: ""
}${result}`;
return `OCR Result: ${result}`;
} else {
return `OCR Error: ${result}`;
}
}
async function gemini(src) {
try {
const imageData = src.split(",").pop(); // Remove data URI prefix
const requestBody = {
contents: [{
parts: [
{ text: "Render this mathematical equation in LaTeX. Use `$` delimiters for inline mode and `$$` delimiters for display mode. Do not include any explanation or surrounding text."
},
{
inline_data: {
mime_type: "image/png",
data: imageData
}
}
]
}]
};
const response = await Zotero.HTTP.request(
"POST",
`https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=${GEMINI.API_KEY}`,
{
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(requestBody),
responseType: "json",
}
);
if (response?.status === 200 && response?.response?.candidates?.[0]?.content?.parts?.[0]?.text) {
let latex = response.response.candidates[0].content.parts[0].text.trim();
// If the response doesn't already have delimiters, add them
if (!latex.startsWith("$") && !latex.startsWith("\\[")) {
latex = `$${latex}$`;
}
return {
success: true,
result: latex
};
} else {
return {
success: false,
result: "Failed to parse Gemini API response"
};
}
} catch (error) {
return {
success: false,
result: `Gemini API Error: ${error.message || error}`
};
}
}
async function mathpix(src) {
const xhr = await Zotero.HTTP.request(
"POST",
"https://api.mathpix.com/v3/text",
{
headers: {
"Content-Type": "application/json; charset=utf-8",
app_id: MATHPIX.APP_ID,
app_key: MATHPIX.APP_KEY,
},
body: JSON.stringify({
src,
math_inline_delimiters: ["$", "$"],
math_display_delimiters: ["$$", "$$"],
rm_spaces: true,
}),
responseType: "json",
}
);
if (xhr && xhr.status && xhr.status === 200 && xhr.response.text) {
return {
success: true,
result: xhr.response.text,
};
} else {
return {
result: xhr.status === 200 ? xhr.response.error : `${xhr.status} Error`,
success: false,
};
}
}
async function xunfei(src) {
/**
* 1.Doc:https://www.xfyun.cn/doc/words/formula-discern/API.html
* 2.Error code:https://www.xfyun.cn/document/error-code
* @author iflytek
*/
const config = {
hostUrl: "https://rest-api.xfyun.cn/v2/itr",
host: "rest-api.xfyun.cn",
appid: XUNFEI.APP_ID,
apiSecret: XUNFEI.API_SECRET,
apiKey: XUNFEI.API_KEY,
uri: "/v2/itr",
};
const date = new Date().toUTCString();
const postBody = {
common: {
app_id: config.appid,
},
business: {
ent: "teach-photo-print",
aue: "raw",
},
data: {
image: src.split(",").pop(),
},
};
const digest = "SHA-256=" + arrayBufferToBase64(await sha256(postBody));
const signatureOrigin = `host: ${config.host}\ndate: ${date}\nPOST ${config.uri} HTTP/1.1\ndigest: ${digest}`;
const signature = arrayBufferToBase64(
await hmacSha256Digest(signatureOrigin, config.apiSecret)
);
const authorizationOrigin = `api_key="${config.apiKey}", algorithm="hmac-sha256", headers="host date request-line digest", signature="${signature}"`;
const xhr = await Zotero.HTTP.request("POST", config.hostUrl, {
headers: {
"Content-Type": "application/json",
Accept: "application/json,version=1.0",
Host: config.host,
Date: date,
Digest: digest,
Authorization: authorizationOrigin,
},
body: JSON.stringify(postBody),
responseType: "json",
});
if (xhr?.response?.code === 0) {
result = success = true;
return {
success: true,
result: xhr.response.data.region
.filter((r) => r.type === "text")
.map((r) => r.recog.content)
.join(" ")
.replace(/ifly-latex-(begin)?(end)?/g, "$"),
};
} else {
return {
result:
xhr.status === 200
? `${xhr.response.code} ${xhr.response.message}`
: `${xhr.status} Error`,
success: false,
};
}
}
async function simpletex(src) {
const image = base64ToArrayBuffer(src.split(",").pop());
// send image to server
const formData = new FormData();
// generate random string length = 16, including 0-9, a-z, A-Z
const randomString = generateRandomString(16);
// timestamp in seconds
const timestamp = Math.floor(Date.now() / 1000);
let signature = `app-id=${SIMPLETEX.APP_ID}&random-str=${randomString}×tamp=${timestamp}&secret=${SIMPLETEX.APP_SECRET}`;
// md5
signature = Zotero.Utilities.Internal.md5(signature);
const headers = {
timestamp: timestamp,
"random-str": randomString,
"app-id": SIMPLETEX.APP_ID,
sign: signature,
};
const configs = {
headers: headers,
};
formData.append("file", new Blob([image]), "annotation.png");
const response = await fetch("https://server.simpletex.cn/api/latex_ocr", {
method: "POST",
body: formData,
...configs,
});
const result = await response.json();
if (result.status) {
return {
result: `$${result.res.latex}$`,
success: true,
};
}
return {
result: "OCR failed",
success: false,
};
}
function base64ToArrayBuffer(base64) {
const binaryString = atob(base64);
const bytes = new Uint8Array(binaryString.length);
for (let i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes.buffer;
}
function generateRandomString(length) {
const characters =
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
let result = "";
for (let i = 0; i < length; i++) {
result += characters.charAt(Math.floor(Math.random() * characters.length));
}
return result;
}
function arrayBufferToBase64(buffer) {
const bytes = new Uint8Array(buffer);
let binary = "";
for (let i = 0; i < bytes.length; i++) {
binary += String.fromCharCode(bytes[i]);
}
return btoa(binary);
}
async function sha256(str) {
const enc = new TextEncoder();
const hashBuffer = await crypto.subtle.digest("SHA-256", enc.encode(str));
return hashBuffer;
}
async function hmacSha256Digest(stringToSign, secretKey) {
const enc = new TextEncoder();
const key = await crypto.subtle.importKey(
"raw",
enc.encode(secretKey),
{
name: "HMAC",
hash: "SHA-256",
},
false,
["sign"]
);
return crypto.subtle.sign("HMAC", key, enc.encode(stringToSign));
} |
Beta Was this translation helpful? Give feedback.
-
Description
OCR the math to latex from image annotations. Inherited from implementation of Better Notes plugin
Support
bing
,mathpix
(need setup),simpletex
(need setup), andxunfei
(need setup).For services that need setup, please modify the corresponding lines in the script.
Event
None
Operation
Script
Data
Anything else
Beta Was this translation helpful? Give feedback.
All reactions