const express = require("express"); const Tesseract = require("tesseract.js"); const fs = require("fs"); const path = require("path"); //const { fromPath } = require("pdf2pic"); //const PDF2Pic = require("pdf2pic"); // gunakan cara baru // gunakan ImageMagick mode agar gm tidak cari binary gm sendiri const gm = require('gm').subClass({ imageMagick: true }); const app = express(); app.use(express.json({ limit: "50mb" })); // Buat folder uploads (dalam /tmp agar bisa ditulis di HuggingFace) const uploadDir = path.join("/tmp", "uploads"); if (!fs.existsSync(uploadDir)) fs.mkdirSync(uploadDir, { recursive: true }); app.post("/ocr", async (req, res) => { try { const { file } = req.body; if (!file) return res.status(400).json({ success: false, error: "Base64 file string not provided" }); // Simpan base64 → PDF const pdfPath = path.join(uploadDir, `file_${Date.now()}.pdf`); fs.writeFileSync(pdfPath, Buffer.from(file, "base64")); // Konfigurasi pdf2pic const { fromPath } = require("pdf2pic"); const options = { density: 150, saveFilename: "page", savePath: uploadDir, format: "png", width: 1024, height: 1024 }; const storeAsImage = fromPath(pdfPath, options); // convert semua halaman (misalnya 3 halaman) const results = await storeAsImage.bulk(-1); let finalText = ""; for (const page of results) { const { data: { text } } = await Tesseract.recognize(page.path, "ind+eng"); finalText += `\n--- Page ${page.page} ---\n${text}`; fs.unlinkSync(page.path); } // Hapus file PDF fs.unlinkSync(pdfPath); res.json({ success: true, text: finalText.trim() }); } catch (error) { console.error("OCR Error:", error); res.status(500).json({ success: false, error: error.message }); } }); const PORT = process.env.PORT || 7860; app.listen(PORT, '0.0.0.0', () => { console.log(`🚀 OCR server running on http://0.0.0.0:${PORT}`); });