ajihakim201 commited on
Commit
0ed7467
Β·
verified Β·
1 Parent(s): d751c80

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +52 -52
server.js CHANGED
@@ -1,52 +1,52 @@
1
- const express = require('express');
2
- const Tesseract = require('tesseract.js');
3
- const fs = require('fs');
4
- const path = require('path');
5
- const { convert } = require('pdf-poppler');
6
-
7
- const app = express();
8
- app.use(express.json({ limit: '50mb' }));
9
-
10
- // Buat folder uploads kalau belum ada
11
- const uploadDir = path.join(__dirname, 'uploads');
12
- if (!fs.existsSync(uploadDir)) fs.mkdirSync(uploadDir);
13
-
14
- app.post('/ocr', async (req, res) => {
15
- try {
16
- const { file } = req.body;
17
- if (!file) return res.status(400).json({ success: false, error: 'Base64 file string not provided' });
18
-
19
- // Simpan base64 β†’ PDF
20
- const pdfPath = path.join(uploadDir, `file_${Date.now()}.pdf`);
21
- fs.writeFileSync(pdfPath, Buffer.from(file, 'base64'));
22
-
23
- // Convert PDF β†’ PNG (halaman pertama)
24
- const outputBase = pdfPath.replace('.pdf', '');
25
- await convert(pdfPath, {
26
- format: 'png',
27
- out_dir: uploadDir,
28
- out_prefix: path.basename(outputBase),
29
- page: 1, // ambil halaman pertama
30
- scale: 1024 // resolusi
31
- });
32
-
33
- const imagePath = path.join(uploadDir, `${path.basename(outputBase)}-1.png`);
34
-
35
- // OCR dengan Tesseract
36
- const { data: { text } } = await Tesseract.recognize(imagePath, 'ind+eng');
37
-
38
- // Bersihkan file sementara
39
- fs.unlinkSync(pdfPath);
40
- fs.unlinkSync(imagePath);
41
-
42
- res.json({ success: true, text });
43
- } catch (error) {
44
- res.status(500).json({ success: false, error: error.message });
45
- }
46
- });
47
-
48
- // Hugging Face Spaces pakai port 7860
49
- const PORT = process.env.PORT || 7860;
50
- app.listen(PORT, "0.0.0.0", () => {
51
- console.log(`πŸš€ OCR server running on http://0.0.0.0:${PORT}`);
52
- });
 
1
+ const express = require('express');
2
+ const tesseract = require("node-tesseract-ocr");
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+ const { convert } = require('pdf-poppler');
6
+
7
+ const app = express();
8
+ app.use(express.json({ limit: '50mb' }));
9
+
10
+ // Buat folder uploads kalau belum ada
11
+ const uploadDir = path.join(__dirname, 'uploads');
12
+ if (!fs.existsSync(uploadDir)) fs.mkdirSync(uploadDir);
13
+
14
+ app.post('/ocr', async (req, res) => {
15
+ try {
16
+ const { file } = req.body;
17
+ if (!file) return res.status(400).json({ success: false, error: 'Base64 file string not provided' });
18
+
19
+ // Simpan base64 β†’ PDF
20
+ const pdfPath = path.join(uploadDir, `file_${Date.now()}.pdf`);
21
+ fs.writeFileSync(pdfPath, Buffer.from(file, 'base64'));
22
+
23
+ // Convert PDF β†’ PNG (halaman pertama)
24
+ const outputBase = pdfPath.replace('.pdf', '');
25
+ await convert(pdfPath, {
26
+ format: 'png',
27
+ out_dir: uploadDir,
28
+ out_prefix: path.basename(outputBase),
29
+ page: 1, // ambil halaman pertama
30
+ scale: 1024 // resolusi
31
+ });
32
+
33
+ const imagePath = path.join(uploadDir, `${path.basename(outputBase)}-1.png`);
34
+
35
+ // OCR dengan Tesseract
36
+ const { data: { text } } = await Tesseract.recognize(imagePath, 'ind+eng');
37
+
38
+ // Bersihkan file sementara
39
+ fs.unlinkSync(pdfPath);
40
+ fs.unlinkSync(imagePath);
41
+
42
+ res.json({ success: true, text });
43
+ } catch (error) {
44
+ res.status(500).json({ success: false, error: error.message });
45
+ }
46
+ });
47
+
48
+ // Hugging Face Spaces pakai port 7860
49
+ const PORT = process.env.PORT || 7860;
50
+ app.listen(PORT, "0.0.0.0", () => {
51
+ console.log(`πŸš€ OCR server running on http://0.0.0.0:${PORT}`);
52
+ });