LogicGoInfotechSpaces commited on
Commit
3d6e38a
·
verified ·
1 Parent(s): ac4948e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -36
app.py CHANGED
@@ -18,34 +18,30 @@ BUCKET_NAME = "ocr_images"
18
 
19
  app = FastAPI(title="RapidOCR FastAPI", version="1.0")
20
 
21
- # -------------------------------------------------------------------
22
- # DB + GRIDFS INIT
23
- # -------------------------------------------------------------------
24
  _client = AsyncIOMotorClient(MONGO_URI)
25
  _db = _client[DB_NAME]
26
- _gridfs = AsyncIOMotorGridFSBucket(_db, bucket_name=BUCKET_NAME)
27
 
28
- # RapidOCR ONNX model (loaded once)
29
  ocr_engine = RapidOCR()
30
-
31
- # In-memory dictionary to store OCR results
32
  OCR_RESULTS = {}
33
 
34
- # -------------------------------------------------------------------
35
- # HEALTH CHECK
36
- # -------------------------------------------------------------------
37
  @app.get("/health")
38
  async def health():
39
  return {"status": "ok"}
40
 
 
41
  # -------------------------------------------------------------------
42
- # UPLOAD IMAGE → GRIDFS
43
  # -------------------------------------------------------------------
44
  @app.post("/upload")
45
  async def upload_image(file: UploadFile = File(...)):
46
  try:
47
  data = await file.read()
48
- oid = await _gridfs.upload_from_stream(
 
 
 
49
  file.filename,
50
  data,
51
  metadata={"contentType": file.content_type},
@@ -54,70 +50,60 @@ async def upload_image(file: UploadFile = File(...)):
54
  except Exception as e:
55
  raise HTTPException(status_code=500, detail=str(e))
56
 
 
57
  # -------------------------------------------------------------------
58
- # HELPER: LOAD IMAGE FROM GRIDFS
59
  # -------------------------------------------------------------------
60
  async def load_image_from_gridfs(image_id: str):
61
  try:
62
  oid = ObjectId(image_id)
63
- stream = await _gridfs.open_download_stream(oid)
 
 
64
  data = await stream.read()
65
  await stream.close()
66
  return data
67
- except:
68
  raise HTTPException(status_code=404, detail="Image not found")
69
 
 
70
  # -------------------------------------------------------------------
71
- # GENERATE OCR FROM IMAGE ID
72
  # -------------------------------------------------------------------
73
  @app.post("/generate/{image_id}")
74
  async def generate(image_id: str):
75
  raw_bytes = await load_image_from_gridfs(image_id)
76
 
77
- # Convert to image
78
  img_array = np.frombuffer(raw_bytes, np.uint8)
79
  img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
80
 
81
  if img is None:
82
  raise HTTPException(status_code=400, detail="Unable to decode image")
83
 
84
- # Run RapidOCR
85
  result, times = ocr_engine(img)
86
 
87
  if not result:
88
  raise HTTPException(status_code=500, detail="OCR returned empty result")
89
 
90
- # Extract text
91
- lines = []
92
- for box, text, score in result:
93
- lines.append(text)
94
-
95
- full_text = "\n".join(lines)
96
 
97
  OCR_RESULTS[image_id] = {
98
  "text": full_text,
99
  "details": result,
100
- "timing": {
101
- "det": times[0],
102
- "cls": times[1],
103
- "rec": times[2],
104
- },
105
  }
106
 
107
  return {"image_id": image_id, "text": full_text}
108
 
109
- # -------------------------------------------------------------------
110
- # VIEW STORED OCR RESULT
111
- # -------------------------------------------------------------------
112
  @app.get("/view/{image_id}")
113
  async def view_details(image_id: str):
114
  if image_id not in OCR_RESULTS:
115
- raise HTTPException(status_code=404, detail="No OCR result found. Use /generate first.")
116
 
117
  return OCR_RESULTS[image_id]
118
 
119
- # -------------------------------------------------------------------
120
- # MAIN
121
- # -------------------------------------------------------------------
122
  if __name__ == "__main__":
123
  uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
 
18
 
19
  app = FastAPI(title="RapidOCR FastAPI", version="1.0")
20
 
21
+ # DB INIT
 
 
22
  _client = AsyncIOMotorClient(MONGO_URI)
23
  _db = _client[DB_NAME]
 
24
 
 
25
  ocr_engine = RapidOCR()
 
 
26
  OCR_RESULTS = {}
27
 
28
+
 
 
29
  @app.get("/health")
30
  async def health():
31
  return {"status": "ok"}
32
 
33
+
34
  # -------------------------------------------------------------------
35
+ # UPLOAD
36
  # -------------------------------------------------------------------
37
  @app.post("/upload")
38
  async def upload_image(file: UploadFile = File(...)):
39
  try:
40
  data = await file.read()
41
+
42
+ fs = AsyncIOMotorGridFSBucket(_db, bucket_name=BUCKET_NAME)
43
+
44
+ oid = await fs.upload_from_stream(
45
  file.filename,
46
  data,
47
  metadata={"contentType": file.content_type},
 
50
  except Exception as e:
51
  raise HTTPException(status_code=500, detail=str(e))
52
 
53
+
54
  # -------------------------------------------------------------------
55
+ # LOAD IMAGE
56
  # -------------------------------------------------------------------
57
  async def load_image_from_gridfs(image_id: str):
58
  try:
59
  oid = ObjectId(image_id)
60
+ fs = AsyncIOMotorGridFSBucket(_db, bucket_name=BUCKET_NAME)
61
+
62
+ stream = await fs.open_download_stream(oid)
63
  data = await stream.read()
64
  await stream.close()
65
  return data
66
+ except Exception as e:
67
  raise HTTPException(status_code=404, detail="Image not found")
68
 
69
+
70
  # -------------------------------------------------------------------
71
+ # GENERATE OCR
72
  # -------------------------------------------------------------------
73
  @app.post("/generate/{image_id}")
74
  async def generate(image_id: str):
75
  raw_bytes = await load_image_from_gridfs(image_id)
76
 
 
77
  img_array = np.frombuffer(raw_bytes, np.uint8)
78
  img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
79
 
80
  if img is None:
81
  raise HTTPException(status_code=400, detail="Unable to decode image")
82
 
 
83
  result, times = ocr_engine(img)
84
 
85
  if not result:
86
  raise HTTPException(status_code=500, detail="OCR returned empty result")
87
 
88
+ extracted = [text for box, text, score in result]
89
+ full_text = "\n".join(extracted)
 
 
 
 
90
 
91
  OCR_RESULTS[image_id] = {
92
  "text": full_text,
93
  "details": result,
94
+ "timing": {"det": times[0], "cls": times[1], "rec": times[2]},
 
 
 
 
95
  }
96
 
97
  return {"image_id": image_id, "text": full_text}
98
 
99
+
 
 
100
  @app.get("/view/{image_id}")
101
  async def view_details(image_id: str):
102
  if image_id not in OCR_RESULTS:
103
+ raise HTTPException(status_code=404, detail="No OCR result found")
104
 
105
  return OCR_RESULTS[image_id]
106
 
107
+
 
 
108
  if __name__ == "__main__":
109
  uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)