File size: 1,968 Bytes
da5945b
 
 
 
e99d0ff
6ac3e3b
90eec1c
 
0ed7467
 
75331cf
3c1f26f
 
75331cf
3c1f26f
 
da5945b
 
 
 
 
 
 
 
 
90eec1c
6ac3e3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90eec1c
6ac3e3b
 
 
 
 
da5945b
90eec1c
da5945b
 
90eec1c
da5945b
90eec1c
da5945b
 
152d0b8
 
57a9713
90eec1c
da5945b
0ed7467
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
const express = require("express");
const Tesseract = require("tesseract.js");
const fs = require("fs");
const path = require("path");
//const { fromPath } = require("pdf2pic");
//const PDF2Pic = require("pdf2pic"); // gunakan cara baru
// gunakan ImageMagick mode agar gm tidak cari binary gm sendiri
const gm = require('gm').subClass({ imageMagick: true });

const app = express();
app.use(express.json({ limit: "50mb" }));

// Buat folder uploads (dalam /tmp agar bisa ditulis di HuggingFace)
const uploadDir = path.join("/tmp", "uploads");
if (!fs.existsSync(uploadDir)) fs.mkdirSync(uploadDir, { recursive: true });

app.post("/ocr", async (req, res) => {
  try {
    const { file } = req.body;
    if (!file) return res.status(400).json({ success: false, error: "Base64 file string not provided" });

    // Simpan base64 β†’ PDF
    const pdfPath = path.join(uploadDir, `file_${Date.now()}.pdf`);
    fs.writeFileSync(pdfPath, Buffer.from(file, "base64"));

    // Konfigurasi pdf2pic
    const { fromPath } = require("pdf2pic");

    const options = {
    density: 150,
    saveFilename: "page",
    savePath: uploadDir,
    format: "png",
    width: 1024,
    height: 1024
  };

    const storeAsImage = fromPath(pdfPath, options);

    // convert semua halaman (misalnya 3 halaman)
    const results = await storeAsImage.bulk(-1);
    
    let finalText = "";
    for (const page of results) {
    const { data: { text } } = await Tesseract.recognize(page.path, "ind+eng");
    finalText += `\n--- Page ${page.page} ---\n${text}`;
    fs.unlinkSync(page.path);
  }

    // Hapus file PDF
    fs.unlinkSync(pdfPath);

    res.json({ success: true, text: finalText.trim() });
  } catch (error) {
    console.error("OCR Error:", error);
    res.status(500).json({ success: false, error: error.message });
  }
});

const PORT = process.env.PORT || 7860;
app.listen(PORT, '0.0.0.0', () => {
  console.log(`πŸš€ OCR server running on http://0.0.0.0:${PORT}`);
});