ocr-server / server.js
ajihakim201's picture
Update server.js
f6b3f0c verified
raw
history blame
1.75 kB
const express = require("express");
const Tesseract = require("tesseract.js");
const fs = require("fs");
const path = require("path");
const { fromPath } = require("pdf2pic");
// gunakan ImageMagick mode agar gm tidak cari binary gm sendiri
const gm = require('gm').subClass({ imageMagick: true });
const app = express();
app.use(express.json({ limit: '50mb' }));
// Buat folder uploads (dalam /tmp agar bisa ditulis di HuggingFace)
const uploadDir = path.join('/tmp', 'uploads');
if (!fs.existsSync(uploadDir)) fs.mkdirSync(uploadDir, { recursive: true });
app.post("/ocr", async (req, res) => {
try {
const { file } = req.body;
if (!file) return res.status(400).json({ success: false, error: "Base64 file string not provided" });
// Simpan base64 β†’ PDF
const pdfPath = path.join(uploadDir, `file_${Date.now()}.pdf`);
fs.writeFileSync(pdfPath, Buffer.from(file, "base64"));
// Convert PDF β†’ PNG (halaman pertama)
const options = {
density: 150,
saveFilename: "page",
savePath: uploadDir,
format: "png",
width: 1024,
height: 1024
};
const storeAsImage = fromPath(pdfPath, options);
const pageToConvertAsImage = 1;
const result = await storeAsImage(pageToConvertAsImage);
// OCR dengan Tesseract
const { data: { text } } = await Tesseract.recognize(result.path, "ind+eng");
// Hapus file sementara
fs.unlinkSync(pdfPath);
fs.unlinkSync(result.path);
res.json({ success: true, text });
} catch (error) {
res.status(500).json({ success: false, error: error.message });
}
});
const PORT = process.env.PORT || 5000;
app.listen(PORT, "0.0.0.0", () => {
console.log(`πŸš€ OCR server running on http://0.0.0.0:${PORT}`);
});