Spaces:
Sleeping
Sleeping
File size: 1,968 Bytes
da5945b e99d0ff 6ac3e3b 90eec1c 0ed7467 75331cf 3c1f26f 75331cf 3c1f26f da5945b 90eec1c 6ac3e3b 90eec1c 6ac3e3b da5945b 90eec1c da5945b 90eec1c da5945b 90eec1c da5945b 152d0b8 57a9713 90eec1c da5945b 0ed7467 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
const express = require("express");
const Tesseract = require("tesseract.js");
const fs = require("fs");
const path = require("path");
//const { fromPath } = require("pdf2pic");
//const PDF2Pic = require("pdf2pic"); // gunakan cara baru
// gunakan ImageMagick mode agar gm tidak cari binary gm sendiri
const gm = require('gm').subClass({ imageMagick: true });
const app = express();
app.use(express.json({ limit: "50mb" }));
// Buat folder uploads (dalam /tmp agar bisa ditulis di HuggingFace)
const uploadDir = path.join("/tmp", "uploads");
if (!fs.existsSync(uploadDir)) fs.mkdirSync(uploadDir, { recursive: true });
app.post("/ocr", async (req, res) => {
try {
const { file } = req.body;
if (!file) return res.status(400).json({ success: false, error: "Base64 file string not provided" });
// Simpan base64 β PDF
const pdfPath = path.join(uploadDir, `file_${Date.now()}.pdf`);
fs.writeFileSync(pdfPath, Buffer.from(file, "base64"));
// Konfigurasi pdf2pic
const { fromPath } = require("pdf2pic");
const options = {
density: 150,
saveFilename: "page",
savePath: uploadDir,
format: "png",
width: 1024,
height: 1024
};
const storeAsImage = fromPath(pdfPath, options);
// convert semua halaman (misalnya 3 halaman)
const results = await storeAsImage.bulk(-1);
let finalText = "";
for (const page of results) {
const { data: { text } } = await Tesseract.recognize(page.path, "ind+eng");
finalText += `\n--- Page ${page.page} ---\n${text}`;
fs.unlinkSync(page.path);
}
// Hapus file PDF
fs.unlinkSync(pdfPath);
res.json({ success: true, text: finalText.trim() });
} catch (error) {
console.error("OCR Error:", error);
res.status(500).json({ success: false, error: error.message });
}
});
const PORT = process.env.PORT || 7860;
app.listen(PORT, '0.0.0.0', () => {
console.log(`π OCR server running on http://0.0.0.0:${PORT}`);
});
|