ajihakim201 commited on
Commit
3c1f26f
·
verified ·
1 Parent(s): 152d0b8

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +50 -32
server.js CHANGED
@@ -1,37 +1,55 @@
1
- const express = require("express");
2
- const multer = require("multer");
3
- const tesseract = require("node-tesseract-ocr");
4
- const fs = require("fs");
5
- const path = require("path");
6
 
7
  const app = express();
8
-
9
- // Pakai /tmp untuk folder upload (selalu bisa di-write di Linux container)
10
- const uploadDir = "/tmp/uploads";
11
- if (!fs.existsSync(uploadDir)) {
12
- fs.mkdirSync(uploadDir, { recursive: true });
13
- }
14
-
15
- const storage = multer.diskStorage({
16
- destination: (req, file, cb) => cb(null, uploadDir),
17
- filename: (req, file, cb) => cb(null, Date.now() + path.extname(file.originalname))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  });
19
 
20
- const upload = multer({ storage });
21
-
22
- app.post("/ocr", upload.single("file"), async (req, res) => {
23
- try {
24
- const config = { lang: "eng+ind" };
25
- const text = await tesseract.recognize(req.file.path, config);
26
-
27
- // Hapus file setelah OCR
28
- fs.unlinkSync(req.file.path);
29
-
30
- res.json({ success: true, text });
31
- } catch (err) {
32
- res.status(500).json({ success: false, error: err.message });
33
- }
34
  });
35
-
36
- const port = process.env.PORT || 7860;
37
- app.listen(port, () => console.log(`🚀 OCR server running on port ${port}`));
 
1
+ const express = require('express');
2
+ const Tesseract = require('tesseract.js');
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+ const { convert } = require('pdf-poppler');
6
 
7
  const app = express();
8
+ app.use(express.json({ limit: '50mb' }));
9
+
10
+ // Buat folder uploads (dalam /tmp agar bisa ditulis di HuggingFace)
11
+ const uploadDir = path.join('/tmp', 'uploads');
12
+ if (!fs.existsSync(uploadDir)) fs.mkdirSync(uploadDir, { recursive: true });
13
+
14
+ app.post('/ocr', async (req, res) => {
15
+ try {
16
+ const { file } = req.body;
17
+ if (!file) {
18
+ return res.status(400).json({ success: false, error: 'Base64 file string not provided' });
19
+ }
20
+
21
+ // Simpan base64 → PDF
22
+ const pdfPath = path.join(uploadDir, `file_${Date.now()}.pdf`);
23
+ fs.writeFileSync(pdfPath, Buffer.from(file, 'base64'));
24
+
25
+ // Convert PDF → PNG (halaman pertama saja)
26
+ const outputBase = pdfPath.replace('.pdf', '');
27
+ await convert(pdfPath, {
28
+ format: 'png',
29
+ out_dir: uploadDir,
30
+ out_prefix: path.basename(outputBase),
31
+ page: 1,
32
+ scale: 1024,
33
+ });
34
+
35
+ const imagePath = path.join(uploadDir, `${path.basename(outputBase)}-1.png`);
36
+
37
+ // OCR dengan Tesseract
38
+ const { data: { text } } = await Tesseract.recognize(imagePath, 'ind+eng');
39
+
40
+ // Bersihkan file sementara
41
+ fs.unlinkSync(pdfPath);
42
+ fs.unlinkSync(imagePath);
43
+
44
+ res.json({ success: true, text });
45
+ } catch (error) {
46
+ console.error('OCR Error:', error);
47
+ res.status(500).json({ success: false, error: error.message });
48
+ }
49
  });
50
 
51
+ // ⚠️ HuggingFace hanya menerima listen ke process.env.PORT
52
+ const PORT = process.env.PORT || 7860;
53
+ app.listen(PORT, '0.0.0.0', () => {
54
+ console.log(`🚀 OCR server running on http://0.0.0.0:${PORT}`);
 
 
 
 
 
 
 
 
 
 
55
  });