ajihakim201 commited on
Commit
90eec1c
·
verified ·
1 Parent(s): e06475d

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +27 -34
server.js CHANGED
@@ -2,9 +2,9 @@ const express = require("express");
2
  const Tesseract = require("tesseract.js");
3
  const fs = require("fs");
4
  const path = require("path");
5
- const { PDF2Pic } = require("pdf2pic");
6
- const { PDFDocument } = require("pdf-lib"); // untuk hitung jumlah halaman
7
- const gm = require("gm").subClass({ imageMagick: true });
8
 
9
  const app = express();
10
  app.use(express.json({ limit: "50mb" }));
@@ -22,46 +22,39 @@ app.post("/ocr", async (req, res) => {
22
  const pdfPath = path.join(uploadDir, `file_${Date.now()}.pdf`);
23
  fs.writeFileSync(pdfPath, Buffer.from(file, "base64"));
24
 
25
- // Hitung jumlah halaman PDF
26
- const pdfBuffer = fs.readFileSync(pdfPath);
27
- const pdfDoc = await PDFDocument.load(pdfBuffer);
28
- const totalPages = pdfDoc.getPageCount();
29
-
30
- const { fromPath } = require("pdf2pic");
31
-
32
- const { PDF2Pic } = require("pdf2pic");
33
-
34
- const pdf2pic = new PDF2Pic({
35
- density: 150,
36
- saveFilename: "page",
37
- savePath: uploadDir,
38
- format: "png",
39
- width: 1024,
40
- height: 1024
41
- });
42
-
43
- const result = await pdf2pic.convert(pdfPath, 1);
44
-
45
- // Loop semua halaman PDF
46
- for (let page = 1; page <= totalPages; page++) {
47
- const result = await pdf2pic.convert(pdfPath, page);
48
- const { data: { text } } = await Tesseract.recognize(result.path, "ind+eng");
49
- allText += `\n\n--- Halaman ${page} ---\n${text}`;
50
-
51
- // Hapus file gambar sementara
52
- if (fs.existsSync(result.path)) fs.unlinkSync(result.path);
53
  }
54
 
55
- // Hapus file PDF sementara
56
  fs.unlinkSync(pdfPath);
57
 
58
- res.json({ success: true, text: allText });
59
  } catch (error) {
 
60
  res.status(500).json({ success: false, error: error.message });
61
  }
62
  });
63
 
64
  const PORT = process.env.PORT || 7860;
65
- app.listen(PORT, "0.0.0.0", () => {
66
  console.log(`🚀 OCR server running on http://0.0.0.0:${PORT}`);
67
  });
 
2
  const Tesseract = require("tesseract.js");
3
  const fs = require("fs");
4
  const path = require("path");
5
+ const { fromPath } = require("pdf2pic");
6
+ // gunakan ImageMagick mode agar gm tidak cari binary gm sendiri
7
+ const gm = require('gm').subClass({ imageMagick: true });
8
 
9
  const app = express();
10
  app.use(express.json({ limit: "50mb" }));
 
22
  const pdfPath = path.join(uploadDir, `file_${Date.now()}.pdf`);
23
  fs.writeFileSync(pdfPath, Buffer.from(file, "base64"));
24
 
25
+ // Konfigurasi pdf2pic
26
+ const pdf2pic = new PDF2Pic({
27
+ density: 150,
28
+ saveFilename: "page",
29
+ savePath: uploadDir,
30
+ format: "png",
31
+ width: 1024,
32
+ height: 1024
33
+ });
34
+
35
+ // Ambil jumlah halaman dari pdf-poppler (atau simple trick dengan pdf2pic)
36
+ // pdf2pic bisa langsung convert seluruh halaman
37
+ const convertAllPages = await pdf2pic.convertBulk(pdfPath, -1);
38
+
39
+ let finalText = "";
40
+ for (const page of convertAllPages) {
41
+ const { data: { text } } = await Tesseract.recognize(page.path, "ind+eng");
42
+ finalText += `\n--- Page ${page.page} ---\n${text}`;
43
+ // hapus PNG tiap halaman setelah OCR
44
+ fs.unlinkSync(page.path);
 
 
 
 
 
 
 
 
45
  }
46
 
47
+ // Hapus file PDF
48
  fs.unlinkSync(pdfPath);
49
 
50
+ res.json({ success: true, text: finalText.trim() });
51
  } catch (error) {
52
+ console.error("OCR Error:", error);
53
  res.status(500).json({ success: false, error: error.message });
54
  }
55
  });
56
 
57
  const PORT = process.env.PORT || 7860;
58
+ app.listen(PORT, '0.0.0.0', () => {
59
  console.log(`🚀 OCR server running on http://0.0.0.0:${PORT}`);
60
  });