LogicGoInfotechSpaces commited on
Commit
09328e0
Β·
verified Β·
1 Parent(s): 223bd89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -36
app.py CHANGED
@@ -1,9 +1,7 @@
1
  # app.py
2
- import io
3
  import uvicorn
4
  import numpy as np
5
  import cv2
6
- import re
7
  from fastapi import FastAPI, UploadFile, File, HTTPException
8
  from bson import ObjectId
9
  from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorGridFSBucket
@@ -19,16 +17,15 @@ MONGO_URI = (
19
  )
20
 
21
  DB_NAME = "ocr_fastapi"
22
- BUCKET_NAME = "fs" # <-- FIXED (use default GridFS bucket)
23
 
24
  app = FastAPI()
25
 
26
- # MongoDB Init
27
  _client = AsyncIOMotorClient(MONGO_URI)
28
  _db = _client[DB_NAME]
29
 
30
  def gridfs():
31
- # Always use default "fs" bucket (fs.files + fs.chunks)
32
  return AsyncIOMotorGridFSBucket(_db, bucket_name=BUCKET_NAME)
33
 
34
  ocr_engine = RapidOCR()
@@ -42,14 +39,11 @@ async def health():
42
  # --------------------------------------------------
43
  @app.post("/upload")
44
  async def upload_image(file: UploadFile = File(...)):
45
- """
46
- Uploads image to MongoDB GridFS
47
- """
48
  try:
49
  data = await file.read()
50
  fs = gridfs()
51
 
52
- # Upload to GridFS
53
  oid = await fs.upload_from_stream(
54
  file.filename,
55
  data,
@@ -58,9 +52,9 @@ async def upload_image(file: UploadFile = File(...)):
58
 
59
  print("πŸ“Œ Uploaded ID:", oid)
60
 
61
- # Debug: Confirm stored in fs.files
62
  stored = await _db[f"{BUCKET_NAME}.files"].count_documents({"_id": oid})
63
- print("πŸ“¦ Stored in DB:", stored)
64
 
65
  return {"image_id": str(oid)}
66
 
@@ -69,23 +63,20 @@ async def upload_image(file: UploadFile = File(...)):
69
 
70
  # --------------------------------------------------
71
  async def load_image_from_gridfs(image_id: str):
72
- """
73
- Loads the image bytes from GridFS bucket
74
- """
75
  try:
76
  print("πŸ” Fetching from GridFS:", image_id)
77
 
78
  oid = ObjectId(image_id)
79
  fs = gridfs()
80
 
81
- # Debug: check existence
82
  file_exists = await _db[f"{BUCKET_NAME}.files"].count_documents({"_id": oid})
83
- print("πŸ“‚ Exists in fs.files:", file_exists)
84
 
85
  if file_exists == 0:
86
- raise HTTPException(status_code=404, detail="Image not found in GridFS")
87
 
88
- # Read file
89
  stream = await fs.open_download_stream(oid)
90
  data = await stream.read()
91
  await stream.close()
@@ -101,29 +92,27 @@ async def load_image_from_gridfs(image_id: str):
101
  # --------------------------------------------------
102
  @app.post("/generate/{image_id}")
103
  async def generate(image_id: str):
104
- """
105
- Reads image β†’ performs OCR β†’ returns extracted text
106
- """
107
  raw_bytes = await load_image_from_gridfs(image_id)
108
 
109
- # Decode image
110
  img_array = np.frombuffer(raw_bytes, np.uint8)
111
  img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
112
 
113
  if img is None:
114
  raise HTTPException(status_code=400, detail="Unable to decode image")
115
 
116
- # Perform OCR
117
  result, times = ocr_engine(img)
118
 
119
  if not result:
120
  raise HTTPException(status_code=500, detail="OCR returned empty result")
121
 
122
- # Extract plain text
123
  extracted = [text for box, text, score in result]
124
  full_text = "\n".join(extracted)
125
 
126
- # Cache in memory for view endpoint
127
  OCR_RESULTS[image_id] = {
128
  "text": full_text,
129
  "details": result,
@@ -136,20 +125,11 @@ async def generate(image_id: str):
136
  @app.get("/view/{image_id}")
137
  async def view_details(image_id: str):
138
  """
139
- Returns cached OCR result
140
  """
141
  if image_id not in OCR_RESULTS:
142
- raise HTTPException(status_code=404, detail="No OCR result found for this ID")
143
  return OCR_RESULTS[image_id]
144
- @app.get("/debug/db")
145
- async def debug_db():
146
- return {"db_name": _db.name, "collections": await _db.list_collection_names()}
147
-
148
- @app.get("/debug/test-write")
149
- async def test_write():
150
- res = await _db["debug_test"].insert_one({"ping": "ok"})
151
- return {"inserted_id": str(res.inserted_id)}
152
-
153
 
154
  # --------------------------------------------------
155
  if __name__ == "__main__":
 
1
  # app.py
 
2
  import uvicorn
3
  import numpy as np
4
  import cv2
 
5
  from fastapi import FastAPI, UploadFile, File, HTTPException
6
  from bson import ObjectId
7
  from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorGridFSBucket
 
17
  )
18
 
19
  DB_NAME = "ocr_fastapi"
20
+ BUCKET_NAME = "ocr_images" # βœ” USE THIS BUCKET
21
 
22
  app = FastAPI()
23
 
24
+ # Init MongoDB & GridFS
25
  _client = AsyncIOMotorClient(MONGO_URI)
26
  _db = _client[DB_NAME]
27
 
28
  def gridfs():
 
29
  return AsyncIOMotorGridFSBucket(_db, bucket_name=BUCKET_NAME)
30
 
31
  ocr_engine = RapidOCR()
 
39
  # --------------------------------------------------
40
  @app.post("/upload")
41
  async def upload_image(file: UploadFile = File(...)):
 
 
 
42
  try:
43
  data = await file.read()
44
  fs = gridfs()
45
 
46
+ # Upload image bytes to GridFS
47
  oid = await fs.upload_from_stream(
48
  file.filename,
49
  data,
 
52
 
53
  print("πŸ“Œ Uploaded ID:", oid)
54
 
55
+ # Debug: Confirm stored in ocr_images.files
56
  stored = await _db[f"{BUCKET_NAME}.files"].count_documents({"_id": oid})
57
+ print("πŸ“¦ Stored in ocr_images.files:", stored)
58
 
59
  return {"image_id": str(oid)}
60
 
 
63
 
64
  # --------------------------------------------------
65
  async def load_image_from_gridfs(image_id: str):
 
 
 
66
  try:
67
  print("πŸ” Fetching from GridFS:", image_id)
68
 
69
  oid = ObjectId(image_id)
70
  fs = gridfs()
71
 
72
+ # Check file existence
73
  file_exists = await _db[f"{BUCKET_NAME}.files"].count_documents({"_id": oid})
74
+ print("πŸ“‚ Exists in ocr_images.files:", file_exists)
75
 
76
  if file_exists == 0:
77
+ raise HTTPException(status_code=404, detail="Image not found")
78
 
79
+ # Read file from GridFS
80
  stream = await fs.open_download_stream(oid)
81
  data = await stream.read()
82
  await stream.close()
 
92
  # --------------------------------------------------
93
  @app.post("/generate/{image_id}")
94
  async def generate(image_id: str):
95
+ # Load the image data
 
 
96
  raw_bytes = await load_image_from_gridfs(image_id)
97
 
98
+ # Decode into OpenCV format
99
  img_array = np.frombuffer(raw_bytes, np.uint8)
100
  img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
101
 
102
  if img is None:
103
  raise HTTPException(status_code=400, detail="Unable to decode image")
104
 
105
+ # Run OCR
106
  result, times = ocr_engine(img)
107
 
108
  if not result:
109
  raise HTTPException(status_code=500, detail="OCR returned empty result")
110
 
111
+ # Extract recognized text only
112
  extracted = [text for box, text, score in result]
113
  full_text = "\n".join(extracted)
114
 
115
+ # Cache OCR results for viewing
116
  OCR_RESULTS[image_id] = {
117
  "text": full_text,
118
  "details": result,
 
125
  @app.get("/view/{image_id}")
126
  async def view_details(image_id: str):
127
  """
128
+ Returns OCR results stored in memory.
129
  """
130
  if image_id not in OCR_RESULTS:
131
+ raise HTTPException(status_code=404, detail="No OCR result found")
132
  return OCR_RESULTS[image_id]
 
 
 
 
 
 
 
 
 
133
 
134
  # --------------------------------------------------
135
  if __name__ == "__main__":