LogicGoInfotechSpaces commited on
Commit
8c694d8
Β·
verified Β·
1 Parent(s): 6d59d9b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -16
app.py CHANGED
@@ -1,15 +1,17 @@
 
1
  import io
2
  import uvicorn
3
  import numpy as np
4
  import cv2
 
5
  from fastapi import FastAPI, UploadFile, File, HTTPException
6
  from bson import ObjectId
7
  from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorGridFSBucket
8
  from rapidocr_onnxruntime import RapidOCR
9
 
10
- # ------------------------------
11
  # CONFIG
12
- # ------------------------------
13
  MONGO_URI = (
14
  "mongodb+srv://harilogicgo_db_user:jFhyDM4oA4dklUsp"
15
  "@api-logs.i7rqf9p.mongodb.net/ocr_fastapi"
@@ -17,33 +19,37 @@ MONGO_URI = (
17
  )
18
 
19
  DB_NAME = "ocr_fastapi"
20
- BUCKET_NAME = "ocr_images"
21
 
22
  app = FastAPI()
23
 
24
- # DB INIT
25
  _client = AsyncIOMotorClient(MONGO_URI)
26
  _db = _client[DB_NAME]
27
 
28
  def gridfs():
 
29
  return AsyncIOMotorGridFSBucket(_db, bucket_name=BUCKET_NAME)
30
 
31
  ocr_engine = RapidOCR()
32
  OCR_RESULTS = {}
33
 
34
- # ------------------------------
35
  @app.get("/health")
36
  async def health():
37
  return {"status": "ok"}
38
 
39
- # ------------------------------
40
  @app.post("/upload")
41
  async def upload_image(file: UploadFile = File(...)):
 
 
 
42
  try:
43
  data = await file.read()
44
-
45
  fs = gridfs()
46
 
 
47
  oid = await fs.upload_from_stream(
48
  file.filename,
49
  data,
@@ -52,24 +58,39 @@ async def upload_image(file: UploadFile = File(...)):
52
 
53
  print("πŸ“Œ Uploaded ID:", oid)
54
 
 
 
 
 
55
  return {"image_id": str(oid)}
56
 
57
  except Exception as e:
58
- raise HTTPException(status_code=500, detail=str(e))
59
 
60
- # ------------------------------
61
  async def load_image_from_gridfs(image_id: str):
 
 
 
62
  try:
63
- print("πŸ” Fetching ID:", image_id)
64
 
65
  oid = ObjectId(image_id)
66
  fs = gridfs()
67
 
 
 
 
 
 
 
 
 
68
  stream = await fs.open_download_stream(oid)
69
  data = await stream.read()
70
  await stream.close()
71
 
72
- print("βœ… Found image in GridFS")
73
 
74
  return data
75
 
@@ -77,26 +98,32 @@ async def load_image_from_gridfs(image_id: str):
77
  print("❌ GridFS error:", e)
78
  raise HTTPException(status_code=404, detail="Image not found")
79
 
80
-
81
- # ------------------------------
82
  @app.post("/generate/{image_id}")
83
  async def generate(image_id: str):
 
 
 
84
  raw_bytes = await load_image_from_gridfs(image_id)
85
 
 
86
  img_array = np.frombuffer(raw_bytes, np.uint8)
87
  img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
88
 
89
  if img is None:
90
  raise HTTPException(status_code=400, detail="Unable to decode image")
91
 
 
92
  result, times = ocr_engine(img)
93
 
94
  if not result:
95
  raise HTTPException(status_code=500, detail="OCR returned empty result")
96
 
 
97
  extracted = [text for box, text, score in result]
98
  full_text = "\n".join(extracted)
99
 
 
100
  OCR_RESULTS[image_id] = {
101
  "text": full_text,
102
  "details": result,
@@ -105,13 +132,17 @@ async def generate(image_id: str):
105
 
106
  return {"image_id": image_id, "text": full_text}
107
 
108
- # ------------------------------
109
  @app.get("/view/{image_id}")
110
  async def view_details(image_id: str):
 
 
 
111
  if image_id not in OCR_RESULTS:
112
- raise HTTPException(status_code=404, detail="No OCR result found")
113
  return OCR_RESULTS[image_id]
114
 
115
 
 
116
  if __name__ == "__main__":
117
- uvicorn.run("app:app", host="0.0.0.0", port=8000)
 
1
+ # app.py
2
  import io
3
  import uvicorn
4
  import numpy as np
5
  import cv2
6
+ import re
7
  from fastapi import FastAPI, UploadFile, File, HTTPException
8
  from bson import ObjectId
9
  from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorGridFSBucket
10
  from rapidocr_onnxruntime import RapidOCR
11
 
12
+ # --------------------------------------------------
13
  # CONFIG
14
+ # --------------------------------------------------
15
  MONGO_URI = (
16
  "mongodb+srv://harilogicgo_db_user:jFhyDM4oA4dklUsp"
17
  "@api-logs.i7rqf9p.mongodb.net/ocr_fastapi"
 
19
  )
20
 
21
  DB_NAME = "ocr_fastapi"
22
+ BUCKET_NAME = "fs" # <-- FIXED (use default GridFS bucket)
23
 
24
  app = FastAPI()
25
 
26
+ # MongoDB Init
27
  _client = AsyncIOMotorClient(MONGO_URI)
28
  _db = _client[DB_NAME]
29
 
30
  def gridfs():
31
+ # Always use default "fs" bucket (fs.files + fs.chunks)
32
  return AsyncIOMotorGridFSBucket(_db, bucket_name=BUCKET_NAME)
33
 
34
  ocr_engine = RapidOCR()
35
  OCR_RESULTS = {}
36
 
37
+ # --------------------------------------------------
38
  @app.get("/health")
39
  async def health():
40
  return {"status": "ok"}
41
 
42
+ # --------------------------------------------------
43
  @app.post("/upload")
44
  async def upload_image(file: UploadFile = File(...)):
45
+ """
46
+ Uploads image to MongoDB GridFS
47
+ """
48
  try:
49
  data = await file.read()
 
50
  fs = gridfs()
51
 
52
+ # Upload to GridFS
53
  oid = await fs.upload_from_stream(
54
  file.filename,
55
  data,
 
58
 
59
  print("πŸ“Œ Uploaded ID:", oid)
60
 
61
+ # Debug: Confirm stored in fs.files
62
+ stored = await _db[f"{BUCKET_NAME}.files"].count_documents({"_id": oid})
63
+ print("πŸ“¦ Stored in DB:", stored)
64
+
65
  return {"image_id": str(oid)}
66
 
67
  except Exception as e:
68
+ raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
69
 
70
+ # --------------------------------------------------
71
  async def load_image_from_gridfs(image_id: str):
72
+ """
73
+ Loads the image bytes from GridFS bucket
74
+ """
75
  try:
76
+ print("πŸ” Fetching from GridFS:", image_id)
77
 
78
  oid = ObjectId(image_id)
79
  fs = gridfs()
80
 
81
+ # Debug: check existence
82
+ file_exists = await _db[f"{BUCKET_NAME}.files"].count_documents({"_id": oid})
83
+ print("πŸ“‚ Exists in fs.files:", file_exists)
84
+
85
+ if file_exists == 0:
86
+ raise HTTPException(status_code=404, detail="Image not found in GridFS")
87
+
88
+ # Read file
89
  stream = await fs.open_download_stream(oid)
90
  data = await stream.read()
91
  await stream.close()
92
 
93
+ print("βœ… Loaded image bytes from GridFS")
94
 
95
  return data
96
 
 
98
  print("❌ GridFS error:", e)
99
  raise HTTPException(status_code=404, detail="Image not found")
100
 
101
+ # --------------------------------------------------
 
102
  @app.post("/generate/{image_id}")
103
  async def generate(image_id: str):
104
+ """
105
+ Reads image β†’ performs OCR β†’ returns extracted text
106
+ """
107
  raw_bytes = await load_image_from_gridfs(image_id)
108
 
109
+ # Decode image
110
  img_array = np.frombuffer(raw_bytes, np.uint8)
111
  img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
112
 
113
  if img is None:
114
  raise HTTPException(status_code=400, detail="Unable to decode image")
115
 
116
+ # Perform OCR
117
  result, times = ocr_engine(img)
118
 
119
  if not result:
120
  raise HTTPException(status_code=500, detail="OCR returned empty result")
121
 
122
+ # Extract plain text
123
  extracted = [text for box, text, score in result]
124
  full_text = "\n".join(extracted)
125
 
126
+ # Cache in memory for view endpoint
127
  OCR_RESULTS[image_id] = {
128
  "text": full_text,
129
  "details": result,
 
132
 
133
  return {"image_id": image_id, "text": full_text}
134
 
135
+ # --------------------------------------------------
136
  @app.get("/view/{image_id}")
137
  async def view_details(image_id: str):
138
+ """
139
+ Returns cached OCR result
140
+ """
141
  if image_id not in OCR_RESULTS:
142
+ raise HTTPException(status_code=404, detail="No OCR result found for this ID")
143
  return OCR_RESULTS[image_id]
144
 
145
 
146
+ # --------------------------------------------------
147
  if __name__ == "__main__":
148
+ uvicorn.run("app:app", host="0.0.0.0", port=7860)