download-pdf.html 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. <html>
  2. <head>
  3. <script src="/dist/tesseract.min.js"></script>
  4. </head>
  5. <body>
  6. <div>
  7. <input type="file" id="uploader">
  8. <button id="download-pdf" disabled="true">Download PDF</button>
  9. </div>
  10. <textarea id="board" readonly rows="8" cols="80">Upload an image file</textarea>
  11. <script type="module">
  12. const { createWorker } = Tesseract;
  13. const worker = await createWorker("eng", 1, {
  14. corePath: '/node_modules/tesseract.js-core',
  15. workerPath: "/dist/worker.min.js",
  16. logger: m => console.log(m),
  17. });
  18. const uploader = document.getElementById('uploader');
  19. const dlBtn = document.getElementById('download-pdf');
  20. let pdf;
  21. const recognize = async ({ target: { files } }) => {
  22. const res = await worker.recognize(files[0],{pdfTitle: "Example PDF"},{pdf: true});
  23. pdf = res.data.pdf;
  24. const text = res.data.text;
  25. const board = document.getElementById('board');
  26. board.value = text;
  27. dlBtn.disabled = false;
  28. };
  29. const downloadPDF = async () => {
  30. const filename = 'tesseract-ocr-result.pdf';
  31. const blob = new Blob([new Uint8Array(pdf)], { type: 'application/pdf' });
  32. if (navigator.msSaveBlob) {
  33. // IE 10+
  34. navigator.msSaveBlob(blob, filename);
  35. } else {
  36. const link = document.createElement('a');
  37. if (link.download !== undefined) {
  38. const url = URL.createObjectURL(blob);
  39. link.setAttribute('href', url);
  40. link.setAttribute('download', filename);
  41. link.style.visibility = 'hidden';
  42. document.body.appendChild(link);
  43. link.click();
  44. document.body.removeChild(link);
  45. }
  46. }
  47. };
  48. uploader.addEventListener('change', recognize);
  49. dlBtn.addEventListener('click', downloadPDF);
  50. </script>
  51. </body>
  52. </html>