GexSay commited on
Commit
581d33f
·
verified ·
1 Parent(s): 9d8ca47

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -0
app.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from rapidocr_onnxruntime import RapidOCR
3
+ import cv2
4
+ import numpy as np
5
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
6
+ from pydantic import BaseModel
7
+ from typing import Optional
8
+ import io
9
+ from PIL import Image
10
+ import uvicorn
11
+
12
+ engine = RapidOCR()
13
+
14
+ # Tạo FastAPI app
15
+ fastapi_app = FastAPI(title="OCR API", description="API for OCR recognition using RapidOCR")
16
+
17
+ def process_ocr(image: np.ndarray, use_det: bool, use_cls: bool, use_rec: bool) -> list:
18
+ """Xử lý OCR và trả về kết quả"""
19
+ img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
20
+ result, _ = engine(img_bgr, use_det=use_det, use_cls=use_cls, use_rec=use_rec)
21
+
22
+ if not result:
23
+ return []
24
+
25
+ texts = []
26
+ for item in result:
27
+ if len(item) == 3:
28
+ box, text, score = item
29
+ texts.append({
30
+ "text": text,
31
+ "confidence": float(score),
32
+ "bbox": box.tolist() if hasattr(box, 'tolist') else box
33
+ })
34
+ elif len(item) == 2:
35
+ _, text = item
36
+ texts.append({
37
+ "text": str(text),
38
+ "confidence": None,
39
+ "bbox": None
40
+ })
41
+
42
+ return texts
43
+
44
+ def recognize_text_gradio(image, use_det, use_cls, use_rec):
45
+ """Hàm cho Gradio interface"""
46
+ if image is None:
47
+ return "No image uploaded"
48
+
49
+ results = process_ocr(image, use_det, use_cls, use_rec)
50
+
51
+ if not results:
52
+ return "No text found"
53
+
54
+ output_lines = []
55
+ for item in results:
56
+ if item['confidence']:
57
+ output_lines.append(f"{item['text']} (score: {item['confidence']:.3f})")
58
+ else:
59
+ output_lines.append(item['text'])
60
+
61
+ return "\n".join(output_lines)
62
+
63
+ # FastAPI Endpoints
64
+ @fastapi_app.post("/ocr")
65
+ async def ocr_endpoint(
66
+ file: UploadFile = File(..., description="Image file to process"),
67
+ use_det: bool = Form(True, description="Use detection"),
68
+ use_cls: bool = Form(True, description="Use classification"),
69
+ use_rec: bool = Form(True, description="Use recognition")
70
+ ):
71
+ """
72
+ OCR endpoint that accepts image file upload
73
+ """
74
+ # Check file type
75
+ if not file.content_type.startswith('image/'):
76
+ raise HTTPException(status_code=400, detail="File must be an image")
77
+
78
+ try:
79
+ # Read image file
80
+ contents = await file.read()
81
+ image = Image.open(io.BytesIO(contents))
82
+
83
+ # Convert to RGB numpy array
84
+ if image.mode != 'RGB':
85
+ image = image.convert('RGB')
86
+ img_np = np.array(image)
87
+
88
+ # Process OCR
89
+ results = process_ocr(img_np, use_det, use_cls, use_rec)
90
+
91
+ return {
92
+ "success": True,
93
+ "texts": [item["text"] for item in results],
94
+ "details": results,
95
+ "num_texts": len(results)
96
+ }
97
+
98
+ except Exception as e:
99
+ raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")
100
+
101
+ @fastapi_app.get("/health")
102
+ async def health_check():
103
+ """Health check endpoint"""
104
+ return {"status": "healthy", "service": "OCR API"}
105
+
106
+ class OCRURLRequest(BaseModel):
107
+ url: str
108
+ use_det: Optional[bool] = True
109
+ use_cls: Optional[bool] = True
110
+ use_rec: Optional[bool] = True
111
+
112
+ @fastapi_app.post("/ocr/url")
113
+ async def ocr_from_url(request: OCRURLRequest):
114
+ """
115
+ OCR endpoint that accepts image URL
116
+ """
117
+ import requests
118
+
119
+ try:
120
+ # Download image from URL
121
+ response = requests.get(request.url, timeout=10)
122
+ response.raise_for_status()
123
+
124
+ image = Image.open(io.BytesIO(response.content))
125
+
126
+ # Convert to RGB numpy array
127
+ if image.mode != 'RGB':
128
+ image = image.convert('RGB')
129
+ img_np = np.array(image)
130
+
131
+ # Process OCR
132
+ results = process_ocr(img_np, request.use_det, request.use_cls, request.use_rec)
133
+
134
+ return {
135
+ "success": True,
136
+ "texts": [item["text"] for item in results],
137
+ "details": results,
138
+ "num_texts": len(results)
139
+ }
140
+
141
+ except Exception as e:
142
+ raise HTTPException(status_code=500, detail=f"Error processing image from URL: {str(e)}")
143
+
144
+ # Tạo Gradio interface
145
+ gradio_interface = gr.Interface(
146
+ fn=recognize_text_gradio,
147
+ inputs=[
148
+ gr.Image(label="Upload Image", type="numpy"),
149
+ gr.Checkbox(label="use_det", value=True),
150
+ gr.Checkbox(label="use_cls", value=True),
151
+ gr.Checkbox(label="use_rec", value=True),
152
+ ],
153
+ outputs=gr.Textbox(label="OCR Results", lines=10),
154
+ title="OCR with RapidOCR",
155
+ description="Upload an image to extract text using RapidOCR"
156
+ )
157
+
158
+ # Mount Gradio app to FastAPI
159
+ app = gr.mount_gradio_app(fastapi_app, gradio_interface, path="/")
160
+
161
+ if __name__ == "__main__":
162
+ uvicorn.run(app, host="0.0.0.0", port=7860)