createWorker.js 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. const resolvePaths = require('./utils/resolvePaths');
  2. const circularize = require('./utils/circularize');
  3. const createJob = require('./createJob');
  4. const { log } = require('./utils/log');
  5. const getId = require('./utils/getId');
  6. const OEM = require('./constants/OEM');
  7. const {
  8. defaultOptions,
  9. spawnWorker,
  10. terminateWorker,
  11. onMessage,
  12. loadImage,
  13. send,
  14. } = require('./worker/node');
  15. let workerCounter = 0;
  16. module.exports = async (langs = 'eng', oem = OEM.LSTM_ONLY, _options = {}, config = {}) => {
  17. const id = getId('Worker', workerCounter);
  18. const {
  19. logger,
  20. errorHandler,
  21. ...options
  22. } = resolvePaths({
  23. ...defaultOptions,
  24. ..._options,
  25. });
  26. const resolves = {};
  27. const rejects = {};
  28. // Current langs, oem, and config file.
  29. // Used if the user ever re-initializes the worker using `worker.reinitialize`.
  30. const currentLangs = typeof langs === 'string' ? langs.split('+') : langs;
  31. let currentOem = oem;
  32. let currentConfig = config;
  33. const lstmOnlyCore = [OEM.DEFAULT, OEM.LSTM_ONLY].includes(oem) && !options.legacyCore;
  34. let workerResReject;
  35. let workerResResolve;
  36. const workerRes = new Promise((resolve, reject) => {
  37. workerResResolve = resolve;
  38. workerResReject = reject;
  39. });
  40. const workerError = (event) => { workerResReject(event.message); };
  41. let worker = spawnWorker(options);
  42. worker.onerror = workerError;
  43. workerCounter += 1;
  44. const setResolve = (promiseId, res) => {
  45. resolves[promiseId] = res;
  46. };
  47. const setReject = (promiseId, rej) => {
  48. rejects[promiseId] = rej;
  49. };
  50. const startJob = ({ id: jobId, action, payload }) => (
  51. new Promise((resolve, reject) => {
  52. log(`[${id}]: Start ${jobId}, action=${action}`);
  53. // Using both `action` and `jobId` in case user provides non-unique `jobId`.
  54. const promiseId = `${action}-${jobId}`;
  55. setResolve(promiseId, resolve);
  56. setReject(promiseId, reject);
  57. send(worker, {
  58. workerId: id,
  59. jobId,
  60. action,
  61. payload,
  62. });
  63. })
  64. );
  65. const load = () => (
  66. console.warn('`load` is depreciated and should be removed from code (workers now come pre-loaded)')
  67. );
  68. const loadInternal = (jobId) => (
  69. startJob(createJob({
  70. id: jobId, action: 'load', payload: { options: { lstmOnly: lstmOnlyCore, corePath: options.corePath, logging: options.logging } },
  71. }))
  72. );
  73. const writeText = (path, text, jobId) => (
  74. startJob(createJob({
  75. id: jobId,
  76. action: 'FS',
  77. payload: { method: 'writeFile', args: [path, text] },
  78. }))
  79. );
  80. const readText = (path, jobId) => (
  81. startJob(createJob({
  82. id: jobId,
  83. action: 'FS',
  84. payload: { method: 'readFile', args: [path, { encoding: 'utf8' }] },
  85. }))
  86. );
  87. const removeFile = (path, jobId) => (
  88. startJob(createJob({
  89. id: jobId,
  90. action: 'FS',
  91. payload: { method: 'unlink', args: [path] },
  92. }))
  93. );
  94. const FS = (method, args, jobId) => (
  95. startJob(createJob({
  96. id: jobId,
  97. action: 'FS',
  98. payload: { method, args },
  99. }))
  100. );
  101. const loadLanguage = () => (
  102. console.warn('`loadLanguage` is depreciated and should be removed from code (workers now come with language pre-loaded)')
  103. );
  104. const loadLanguageInternal = (_langs, jobId) => startJob(createJob({
  105. id: jobId,
  106. action: 'loadLanguage',
  107. payload: {
  108. langs: _langs,
  109. options: {
  110. langPath: options.langPath,
  111. dataPath: options.dataPath,
  112. cachePath: options.cachePath,
  113. cacheMethod: options.cacheMethod,
  114. gzip: options.gzip,
  115. lstmOnly: [OEM.DEFAULT, OEM.LSTM_ONLY].includes(currentOem)
  116. && !options.legacyLang,
  117. },
  118. },
  119. }));
  120. const initialize = () => (
  121. console.warn('`initialize` is depreciated and should be removed from code (workers now come pre-initialized)')
  122. );
  123. const initializeInternal = (_langs, _oem, _config, jobId) => (
  124. startJob(createJob({
  125. id: jobId,
  126. action: 'initialize',
  127. payload: { langs: _langs, oem: _oem, config: _config },
  128. }))
  129. );
  130. const reinitialize = (langs = 'eng', oem, config, jobId) => { // eslint-disable-line
  131. if (lstmOnlyCore && [OEM.TESSERACT_ONLY, OEM.TESSERACT_LSTM_COMBINED].includes(oem)) throw Error('Legacy model requested but code missing.');
  132. const _oem = oem || currentOem;
  133. currentOem = _oem;
  134. const _config = config || currentConfig;
  135. currentConfig = _config;
  136. // Only load langs that are not already loaded.
  137. // This logic fails if the user downloaded the LSTM-only English data for a language
  138. // and then uses `worker.reinitialize` to switch to the Legacy engine.
  139. // However, the correct data will still be downloaded after initialization fails
  140. // and this can be avoided entirely if the user loads the correct data ahead of time.
  141. const langsArr = typeof langs === 'string' ? langs.split('+') : langs;
  142. const _langs = langsArr.filter((x) => !currentLangs.includes(x));
  143. currentLangs.push(..._langs);
  144. if (_langs.length > 0) {
  145. return loadLanguageInternal(_langs, jobId)
  146. .then(() => initializeInternal(langs, _oem, _config, jobId));
  147. }
  148. return initializeInternal(langs, _oem, _config, jobId);
  149. };
  150. const setParameters = (params = {}, jobId) => (
  151. startJob(createJob({
  152. id: jobId,
  153. action: 'setParameters',
  154. payload: { params },
  155. }))
  156. );
  157. const recognize = async (image, opts = {}, output = {
  158. blocks: true, text: true, hocr: true, tsv: true,
  159. }, jobId) => (
  160. startJob(createJob({
  161. id: jobId,
  162. action: 'recognize',
  163. payload: { image: await loadImage(image), options: opts, output },
  164. }))
  165. );
  166. const getPDF = (title = 'Tesseract OCR Result', textonly = false, jobId) => {
  167. console.log('`getPDF` function is depreciated. `recognize` option `savePDF` should be used instead.');
  168. return startJob(createJob({
  169. id: jobId,
  170. action: 'getPDF',
  171. payload: { title, textonly },
  172. }));
  173. };
  174. const detect = async (image, jobId) => {
  175. if (lstmOnlyCore) throw Error('`worker.detect` requires Legacy model, which was not loaded.');
  176. return startJob(createJob({
  177. id: jobId,
  178. action: 'detect',
  179. payload: { image: await loadImage(image) },
  180. }));
  181. };
  182. const terminate = async () => {
  183. if (worker !== null) {
  184. /*
  185. await startJob(createJob({
  186. id: jobId,
  187. action: 'terminate',
  188. }));
  189. */
  190. terminateWorker(worker);
  191. worker = null;
  192. }
  193. return Promise.resolve();
  194. };
  195. onMessage(worker, ({
  196. workerId, jobId, status, action, data,
  197. }) => {
  198. const promiseId = `${action}-${jobId}`;
  199. if (status === 'resolve') {
  200. log(`[${workerId}]: Complete ${jobId}`);
  201. let d = data;
  202. if (action === 'recognize') {
  203. d = circularize(data);
  204. } else if (action === 'getPDF') {
  205. d = Array.from({ ...data, length: Object.keys(data).length });
  206. }
  207. resolves[promiseId]({ jobId, data: d });
  208. } else if (status === 'reject') {
  209. rejects[promiseId](data);
  210. if (action === 'load') workerResReject(data);
  211. if (errorHandler) {
  212. errorHandler(data);
  213. } else {
  214. throw Error(data);
  215. }
  216. } else if (status === 'progress') {
  217. logger({ ...data, userJobId: jobId });
  218. }
  219. });
  220. const resolveObj = {
  221. id,
  222. worker,
  223. setResolve,
  224. setReject,
  225. load,
  226. writeText,
  227. readText,
  228. removeFile,
  229. FS,
  230. loadLanguage,
  231. initialize,
  232. reinitialize,
  233. setParameters,
  234. recognize,
  235. getPDF,
  236. detect,
  237. terminate,
  238. };
  239. loadInternal()
  240. .then(() => loadLanguageInternal(langs))
  241. .then(() => initializeInternal(langs, oem, config))
  242. .then(() => workerResResolve(resolveObj))
  243. .catch(() => {});
  244. return workerRes;
  245. };