Spaces:
Sleeping
Sleeping
| const express = require("express"); | |
| const Tesseract = require("tesseract.js"); | |
| const fs = require("fs"); | |
| const path = require("path"); | |
| const PDF2Pic = require("pdf2pic"); | |
| const { PDFDocument } = require("pdf-lib"); // untuk hitung jumlah halaman | |
| const gm = require("gm").subClass({ imageMagick: true }); | |
| const app = express(); | |
| app.use(express.json({ limit: "50mb" })); | |
| // Buat folder uploads (dalam /tmp agar bisa ditulis di HuggingFace) | |
| const uploadDir = path.join("/tmp", "uploads"); | |
| if (!fs.existsSync(uploadDir)) fs.mkdirSync(uploadDir, { recursive: true }); | |
| app.post("/ocr", async (req, res) => { | |
| try { | |
| const { file } = req.body; | |
| if (!file) return res.status(400).json({ success: false, error: "Base64 file string not provided" }); | |
| // Simpan base64 β PDF | |
| const pdfPath = path.join(uploadDir, `file_${Date.now()}.pdf`); | |
| fs.writeFileSync(pdfPath, Buffer.from(file, "base64")); | |
| // Hitung jumlah halaman PDF | |
| const pdfBuffer = fs.readFileSync(pdfPath); | |
| const pdfDoc = await PDFDocument.load(pdfBuffer); | |
| const totalPages = pdfDoc.getPageCount(); | |
| // Konfigurasi pdf2pic | |
| const pdf2pic = new PDF2Pic({ | |
| density: 150, | |
| saveFilename: "page", | |
| savePath: uploadDir, | |
| format: "png", | |
| width: 1024, | |
| height: 1024, | |
| }); | |
| let allText = ""; | |
| // Loop semua halaman PDF | |
| for (let page = 1; page <= totalPages; page++) { | |
| const result = await pdf2pic.convert(pdfPath, page); | |
| const { data: { text } } = await Tesseract.recognize(result.path, "ind+eng"); | |
| allText += `\n\n--- Halaman ${page} ---\n${text}`; | |
| // Hapus file gambar sementara | |
| if (fs.existsSync(result.path)) fs.unlinkSync(result.path); | |
| } | |
| // Hapus file PDF sementara | |
| fs.unlinkSync(pdfPath); | |
| res.json({ success: true, text: allText }); | |
| } catch (error) { | |
| res.status(500).json({ success: false, error: error.message }); | |
| } | |
| }); | |
| const PORT = process.env.PORT || 7860; | |
| app.listen(PORT, "0.0.0.0", () => { | |
| console.log(`π OCR server running on http://0.0.0.0:${PORT}`); | |
| }); | |