ajihakim201 commited on
Commit
da5945b
Β·
verified Β·
1 Parent(s): 0d06043

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +41 -44
server.js CHANGED
@@ -1,8 +1,8 @@
1
- const express = require('express');
2
- const Tesseract = require('tesseract.js');
3
- const fs = require('fs');
4
- const path = require('path');
5
- const { convert } = require('pdf-poppler');
6
 
7
  const app = express();
8
  app.use(express.json({ limit: '50mb' }));
@@ -11,45 +11,42 @@ app.use(express.json({ limit: '50mb' }));
11
  const uploadDir = path.join('/tmp', 'uploads');
12
  if (!fs.existsSync(uploadDir)) fs.mkdirSync(uploadDir, { recursive: true });
13
 
14
- app.post('/ocr', async (req, res) => {
15
- try {
16
- const { file } = req.body;
17
- if (!file) {
18
- return res.status(400).json({ success: false, error: 'Base64 file string not provided' });
19
- }
20
-
21
- // Simpan base64 β†’ PDF
22
- const pdfPath = path.join(uploadDir, `file_${Date.now()}.pdf`);
23
- fs.writeFileSync(pdfPath, Buffer.from(file, 'base64'));
24
-
25
- // Convert PDF β†’ PNG (halaman pertama saja)
26
- const outputBase = pdfPath.replace('.pdf', '');
27
- await convert(pdfPath, {
28
- format: 'png',
29
- out_dir: uploadDir,
30
- out_prefix: path.basename(outputBase),
31
- page: 1,
32
- scale: 1024,
33
- });
34
-
35
- const imagePath = path.join(uploadDir, `${path.basename(outputBase)}-1.png`);
36
-
37
- // OCR dengan Tesseract
38
- const { data: { text } } = await Tesseract.recognize(imagePath, 'ind+eng');
39
-
40
- // Bersihkan file sementara
41
- fs.unlinkSync(pdfPath);
42
- fs.unlinkSync(imagePath);
43
-
44
- res.json({ success: true, text });
45
- } catch (error) {
46
- console.error('OCR Error:', error);
47
- res.status(500).json({ success: false, error: error.message });
48
- }
49
  });
50
 
51
- // ⚠️ HuggingFace hanya menerima listen ke process.env.PORT
52
- const PORT = process.env.PORT || 7860;
53
- app.listen(PORT, '0.0.0.0', () => {
54
- console.log(`πŸš€ OCR server running on http://0.0.0.0:${PORT}`);
55
  });
 
1
+ const express = require("express");
2
+ const Tesseract = require("tesseract.js");
3
+ const fs = require("fs");
4
+ const path = require("path");
5
+ const { fromPath } = require("pdf2pic");
6
 
7
  const app = express();
8
  app.use(express.json({ limit: '50mb' }));
 
11
  const uploadDir = path.join('/tmp', 'uploads');
12
  if (!fs.existsSync(uploadDir)) fs.mkdirSync(uploadDir, { recursive: true });
13
 
14
+ app.post("/ocr", async (req, res) => {
15
+ try {
16
+ const { file } = req.body;
17
+ if (!file) return res.status(400).json({ success: false, error: "Base64 file string not provided" });
18
+
19
+ // Simpan base64 β†’ PDF
20
+ const pdfPath = path.join(uploadDir, `file_${Date.now()}.pdf`);
21
+ fs.writeFileSync(pdfPath, Buffer.from(file, "base64"));
22
+
23
+ // Convert PDF β†’ PNG (halaman pertama)
24
+ const options = {
25
+ density: 150,
26
+ saveFilename: "page",
27
+ savePath: uploadDir,
28
+ format: "png",
29
+ width: 1024,
30
+ height: 1024
31
+ };
32
+ const storeAsImage = fromPath(pdfPath, options);
33
+ const pageToConvertAsImage = 1;
34
+ const result = await storeAsImage(pageToConvertAsImage);
35
+
36
+ // OCR dengan Tesseract
37
+ const { data: { text } } = await Tesseract.recognize(result.path, "ind+eng");
38
+
39
+ // Hapus file sementara
40
+ fs.unlinkSync(pdfPath);
41
+ fs.unlinkSync(result.path);
42
+
43
+ res.json({ success: true, text });
44
+ } catch (error) {
45
+ res.status(500).json({ success: false, error: error.message });
46
+ }
 
 
47
  });
48
 
49
+ const PORT = process.env.PORT || 5000;
50
+ app.listen(PORT, "0.0.0.0", () => {
51
+ console.log(`πŸš€ OCR server running on http://0.0.0.0:${PORT}`);
 
52
  });