123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281 |
- const resolvePaths = require('./utils/resolvePaths');
- const circularize = require('./utils/circularize');
- const createJob = require('./createJob');
- const { log } = require('./utils/log');
- const getId = require('./utils/getId');
- const OEM = require('./constants/OEM');
- const {
- defaultOptions,
- spawnWorker,
- terminateWorker,
- onMessage,
- loadImage,
- send,
- } = require('./worker/node');
- let workerCounter = 0;
- module.exports = async (langs = 'eng', oem = OEM.LSTM_ONLY, _options = {}, config = {}) => {
- const id = getId('Worker', workerCounter);
- const {
- logger,
- errorHandler,
- ...options
- } = resolvePaths({
- ...defaultOptions,
- ..._options,
- });
- const resolves = {};
- const rejects = {};
- // Current langs, oem, and config file.
- // Used if the user ever re-initializes the worker using `worker.reinitialize`.
- const currentLangs = typeof langs === 'string' ? langs.split('+') : langs;
- let currentOem = oem;
- let currentConfig = config;
- const lstmOnlyCore = [OEM.DEFAULT, OEM.LSTM_ONLY].includes(oem) && !options.legacyCore;
- let workerResReject;
- let workerResResolve;
- const workerRes = new Promise((resolve, reject) => {
- workerResResolve = resolve;
- workerResReject = reject;
- });
- const workerError = (event) => { workerResReject(event.message); };
- let worker = spawnWorker(options);
- worker.onerror = workerError;
- workerCounter += 1;
- const setResolve = (promiseId, res) => {
- resolves[promiseId] = res;
- };
- const setReject = (promiseId, rej) => {
- rejects[promiseId] = rej;
- };
- const startJob = ({ id: jobId, action, payload }) => (
- new Promise((resolve, reject) => {
- log(`[${id}]: Start ${jobId}, action=${action}`);
- // Using both `action` and `jobId` in case user provides non-unique `jobId`.
- const promiseId = `${action}-${jobId}`;
- setResolve(promiseId, resolve);
- setReject(promiseId, reject);
- send(worker, {
- workerId: id,
- jobId,
- action,
- payload,
- });
- })
- );
- const load = () => (
- console.warn('`load` is depreciated and should be removed from code (workers now come pre-loaded)')
- );
- const loadInternal = (jobId) => (
- startJob(createJob({
- id: jobId, action: 'load', payload: { options: { lstmOnly: lstmOnlyCore, corePath: options.corePath, logging: options.logging } },
- }))
- );
- const writeText = (path, text, jobId) => (
- startJob(createJob({
- id: jobId,
- action: 'FS',
- payload: { method: 'writeFile', args: [path, text] },
- }))
- );
- const readText = (path, jobId) => (
- startJob(createJob({
- id: jobId,
- action: 'FS',
- payload: { method: 'readFile', args: [path, { encoding: 'utf8' }] },
- }))
- );
- const removeFile = (path, jobId) => (
- startJob(createJob({
- id: jobId,
- action: 'FS',
- payload: { method: 'unlink', args: [path] },
- }))
- );
- const FS = (method, args, jobId) => (
- startJob(createJob({
- id: jobId,
- action: 'FS',
- payload: { method, args },
- }))
- );
- const loadLanguage = () => (
- console.warn('`loadLanguage` is depreciated and should be removed from code (workers now come with language pre-loaded)')
- );
- const loadLanguageInternal = (_langs, jobId) => startJob(createJob({
- id: jobId,
- action: 'loadLanguage',
- payload: {
- langs: _langs,
- options: {
- langPath: options.langPath,
- dataPath: options.dataPath,
- cachePath: options.cachePath,
- cacheMethod: options.cacheMethod,
- gzip: options.gzip,
- lstmOnly: [OEM.DEFAULT, OEM.LSTM_ONLY].includes(currentOem)
- && !options.legacyLang,
- },
- },
- }));
- const initialize = () => (
- console.warn('`initialize` is depreciated and should be removed from code (workers now come pre-initialized)')
- );
- const initializeInternal = (_langs, _oem, _config, jobId) => (
- startJob(createJob({
- id: jobId,
- action: 'initialize',
- payload: { langs: _langs, oem: _oem, config: _config },
- }))
- );
- const reinitialize = (langs = 'eng', oem, config, jobId) => { // eslint-disable-line
- if (lstmOnlyCore && [OEM.TESSERACT_ONLY, OEM.TESSERACT_LSTM_COMBINED].includes(oem)) throw Error('Legacy model requested but code missing.');
- const _oem = oem || currentOem;
- currentOem = _oem;
- const _config = config || currentConfig;
- currentConfig = _config;
- // Only load langs that are not already loaded.
- // This logic fails if the user downloaded the LSTM-only English data for a language
- // and then uses `worker.reinitialize` to switch to the Legacy engine.
- // However, the correct data will still be downloaded after initialization fails
- // and this can be avoided entirely if the user loads the correct data ahead of time.
- const langsArr = typeof langs === 'string' ? langs.split('+') : langs;
- const _langs = langsArr.filter((x) => !currentLangs.includes(x));
- currentLangs.push(..._langs);
- if (_langs.length > 0) {
- return loadLanguageInternal(_langs, jobId)
- .then(() => initializeInternal(langs, _oem, _config, jobId));
- }
- return initializeInternal(langs, _oem, _config, jobId);
- };
- const setParameters = (params = {}, jobId) => (
- startJob(createJob({
- id: jobId,
- action: 'setParameters',
- payload: { params },
- }))
- );
- const recognize = async (image, opts = {}, output = {
- blocks: true, text: true, hocr: true, tsv: true,
- }, jobId) => (
- startJob(createJob({
- id: jobId,
- action: 'recognize',
- payload: { image: await loadImage(image), options: opts, output },
- }))
- );
- const getPDF = (title = 'Tesseract OCR Result', textonly = false, jobId) => {
- console.log('`getPDF` function is depreciated. `recognize` option `savePDF` should be used instead.');
- return startJob(createJob({
- id: jobId,
- action: 'getPDF',
- payload: { title, textonly },
- }));
- };
- const detect = async (image, jobId) => {
- if (lstmOnlyCore) throw Error('`worker.detect` requires Legacy model, which was not loaded.');
- return startJob(createJob({
- id: jobId,
- action: 'detect',
- payload: { image: await loadImage(image) },
- }));
- };
- const terminate = async () => {
- if (worker !== null) {
- /*
- await startJob(createJob({
- id: jobId,
- action: 'terminate',
- }));
- */
- terminateWorker(worker);
- worker = null;
- }
- return Promise.resolve();
- };
- onMessage(worker, ({
- workerId, jobId, status, action, data,
- }) => {
- const promiseId = `${action}-${jobId}`;
- if (status === 'resolve') {
- log(`[${workerId}]: Complete ${jobId}`);
- let d = data;
- if (action === 'recognize') {
- d = circularize(data);
- } else if (action === 'getPDF') {
- d = Array.from({ ...data, length: Object.keys(data).length });
- }
- resolves[promiseId]({ jobId, data: d });
- } else if (status === 'reject') {
- rejects[promiseId](data);
- if (action === 'load') workerResReject(data);
- if (errorHandler) {
- errorHandler(data);
- } else {
- throw Error(data);
- }
- } else if (status === 'progress') {
- logger({ ...data, userJobId: jobId });
- }
- });
- const resolveObj = {
- id,
- worker,
- setResolve,
- setReject,
- load,
- writeText,
- readText,
- removeFile,
- FS,
- loadLanguage,
- initialize,
- reinitialize,
- setParameters,
- recognize,
- getPDF,
- detect,
- terminate,
- };
- loadInternal()
- .then(() => loadLanguageInternal(langs))
- .then(() => initializeInternal(langs, oem, config))
- .then(() => workerResResolve(resolveObj))
- .catch(() => {});
- return workerRes;
- };
|