zaidulhassan79 commited on
Commit
4814915
·
verified ·
1 Parent(s): f735aaa

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +24 -0
  2. app.py +40 -0
  3. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Install system dependencies required by PaddleOCR
4
+ RUN apt-get update && apt-get install -y \
5
+ libgl1 \
6
+ libglib2.0-0 \
7
+ libsm6 \
8
+ libxrender1 \
9
+ libxext6 \
10
+ libgomp1 \
11
+ wget \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ WORKDIR /app
15
+
16
+ COPY requirements.txt .
17
+
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ COPY app.py .
21
+
22
+ EXPOSE 7860
23
+
24
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, Form
2
+ from paddleocr import PaddleOCR
3
+ import tempfile
4
+ import os
5
+
6
+ app = FastAPI(title="Paddle OCR API")
7
+
8
+ ocr_models = {}
9
+
10
+ def get_ocr(lang: str):
11
+ if lang not in ocr_models:
12
+ ocr_models[lang] = PaddleOCR(
13
+ use_angle_cls=True,
14
+ lang=lang
15
+ )
16
+ return ocr_models[lang]
17
+
18
+
19
+ @app.post("/ocr")
20
+ async def ocr_api(
21
+ file: UploadFile = File(...),
22
+ lang: str = Form("en")
23
+ ):
24
+ with tempfile.NamedTemporaryFile(delete=False) as tmp:
25
+ tmp.write(await file.read())
26
+ tmp_path = tmp.name
27
+
28
+ ocr = get_ocr(lang)
29
+ result = ocr.ocr(tmp_path)
30
+
31
+ os.remove(tmp_path)
32
+
33
+ text = []
34
+ for line in result[0]:
35
+ text.append(line[1][0])
36
+
37
+ return {
38
+ "language": lang,
39
+ "lines": text
40
+ }
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ paddleocr==2.10.0
4
+ paddlepaddle
5
+ python-multipart