LogicGoInfotechSpaces commited on
Commit
34d316c
·
verified ·
1 Parent(s): 09328e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -71
app.py CHANGED
@@ -2,32 +2,34 @@
2
  import uvicorn
3
  import numpy as np
4
  import cv2
 
 
5
  from fastapi import FastAPI, UploadFile, File, HTTPException
6
- from bson import ObjectId
7
- from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorGridFSBucket
8
  from rapidocr_onnxruntime import RapidOCR
9
 
10
  # --------------------------------------------------
11
- # CONFIG
12
  # --------------------------------------------------
13
- MONGO_URI = (
14
- "mongodb+srv://harilogicgo_db_user:jFhyDM4oA4dklUsp"
15
- "@api-logs.i7rqf9p.mongodb.net/ocr_fastapi"
16
- "?retryWrites=true&w=majority&appName=API-LOGS"
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  )
18
 
19
- DB_NAME = "ocr_fastapi"
20
- BUCKET_NAME = "ocr_images" # ✔ USE THIS BUCKET
21
-
22
  app = FastAPI()
23
-
24
- # Init MongoDB & GridFS
25
- _client = AsyncIOMotorClient(MONGO_URI)
26
- _db = _client[DB_NAME]
27
-
28
- def gridfs():
29
- return AsyncIOMotorGridFSBucket(_db, bucket_name=BUCKET_NAME)
30
-
31
  ocr_engine = RapidOCR()
32
  OCR_RESULTS = {}
33
 
@@ -40,79 +42,52 @@ async def health():
40
  @app.post("/upload")
41
  async def upload_image(file: UploadFile = File(...)):
42
  try:
43
- data = await file.read()
44
- fs = gridfs()
45
-
46
- # Upload image bytes to GridFS
47
- oid = await fs.upload_from_stream(
48
- file.filename,
49
- data,
50
- metadata={"contentType": file.content_type},
 
 
51
  )
52
 
53
- print("📌 Uploaded ID:", oid)
54
-
55
- # Debug: Confirm stored in ocr_images.files
56
- stored = await _db[f"{BUCKET_NAME}.files"].count_documents({"_id": oid})
57
- print("📦 Stored in ocr_images.files:", stored)
58
-
59
- return {"image_id": str(oid)}
60
 
61
  except Exception as e:
62
- raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
63
 
64
  # --------------------------------------------------
65
- async def load_image_from_gridfs(image_id: str):
 
66
  try:
67
- print("🔍 Fetching from GridFS:", image_id)
68
-
69
- oid = ObjectId(image_id)
70
- fs = gridfs()
71
-
72
- # Check file existence
73
- file_exists = await _db[f"{BUCKET_NAME}.files"].count_documents({"_id": oid})
74
- print("📂 Exists in ocr_images.files:", file_exists)
75
-
76
- if file_exists == 0:
77
- raise HTTPException(status_code=404, detail="Image not found")
78
-
79
- # Read file from GridFS
80
- stream = await fs.open_download_stream(oid)
81
- data = await stream.read()
82
- await stream.close()
83
-
84
- print("✅ Loaded image bytes from GridFS")
85
-
86
- return data
87
 
88
  except Exception as e:
89
- print(" GridFS error:", e)
90
- raise HTTPException(status_code=404, detail="Image not found")
91
-
92
- # --------------------------------------------------
93
- @app.post("/generate/{image_id}")
94
- async def generate(image_id: str):
95
- # Load the image data
96
- raw_bytes = await load_image_from_gridfs(image_id)
97
 
98
- # Decode into OpenCV format
99
  img_array = np.frombuffer(raw_bytes, np.uint8)
100
  img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
101
 
102
  if img is None:
103
  raise HTTPException(status_code=400, detail="Unable to decode image")
104
 
105
- # Run OCR
106
  result, times = ocr_engine(img)
107
 
108
  if not result:
109
  raise HTTPException(status_code=500, detail="OCR returned empty result")
110
 
111
- # Extract recognized text only
112
  extracted = [text for box, text, score in result]
113
  full_text = "\n".join(extracted)
114
 
115
- # Cache OCR results for viewing
116
  OCR_RESULTS[image_id] = {
117
  "text": full_text,
118
  "details": result,
@@ -122,11 +97,8 @@ async def generate(image_id: str):
122
  return {"image_id": image_id, "text": full_text}
123
 
124
  # --------------------------------------------------
125
- @app.get("/view/{image_id}")
126
  async def view_details(image_id: str):
127
- """
128
- Returns OCR results stored in memory.
129
- """
130
  if image_id not in OCR_RESULTS:
131
  raise HTTPException(status_code=404, detail="No OCR result found")
132
  return OCR_RESULTS[image_id]
 
2
  import uvicorn
3
  import numpy as np
4
  import cv2
5
+ import boto3
6
+ import os
7
  from fastapi import FastAPI, UploadFile, File, HTTPException
 
 
8
  from rapidocr_onnxruntime import RapidOCR
9
 
10
  # --------------------------------------------------
11
+ # CONFIG FROM ENV
12
  # --------------------------------------------------
13
+ DO_KEY_ID = os.getenv("DO_SPACES_KEY_ID")
14
+ DO_SECRET_KEY = os.getenv("DO_SPACES_SECRET_KEY")
15
+ DO_REGION = os.getenv("DO_SPACES_REGION", "blr1")
16
+ DO_ENDPOINT = os.getenv("DO_SPACES_ENDPOINT")
17
+ DO_BUCKET = os.getenv("DO_SPACES_BUCKET", "milestone")
18
+ FOLDER = "OCR_Images"
19
+
20
+ if not (DO_KEY_ID and DO_SECRET_KEY and DO_ENDPOINT):
21
+ raise RuntimeError("Missing DigitalOcean Spaces credentials!")
22
+
23
+ # S3 client
24
+ s3 = boto3.client(
25
+ "s3",
26
+ region_name=DO_REGION,
27
+ endpoint_url=DO_ENDPOINT,
28
+ aws_access_key_id=DO_KEY_ID,
29
+ aws_secret_access_key=DO_SECRET_KEY,
30
  )
31
 
 
 
 
32
  app = FastAPI()
 
 
 
 
 
 
 
 
33
  ocr_engine = RapidOCR()
34
  OCR_RESULTS = {}
35
 
 
42
  @app.post("/upload")
43
  async def upload_image(file: UploadFile = File(...)):
44
  try:
45
+ file_bytes = await file.read()
46
+ image_key = f"{FOLDER}/{file.filename}"
47
+
48
+ # Upload to DigitalOcean Spaces
49
+ s3.put_object(
50
+ Bucket=DO_BUCKET,
51
+ Key=image_key,
52
+ Body=file_bytes,
53
+ ContentType=file.content_type,
54
+ ACL="private"
55
  )
56
 
57
+ return {
58
+ "image_id": image_key,
59
+ "message": "Uploaded successfully"
60
+ }
 
 
 
61
 
62
  except Exception as e:
63
+ raise HTTPException(status_code=500, detail=str(e))
64
 
65
  # --------------------------------------------------
66
+ @app.post("/generate/{image_id:path}")
67
+ async def generate(image_id: str):
68
  try:
69
+ # Download from Spaces
70
+ obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
71
+ raw_bytes = obj["Body"].read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  except Exception as e:
74
+ raise HTTPException(status_code=404, detail="Image not found in Spaces")
 
 
 
 
 
 
 
75
 
 
76
  img_array = np.frombuffer(raw_bytes, np.uint8)
77
  img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
78
 
79
  if img is None:
80
  raise HTTPException(status_code=400, detail="Unable to decode image")
81
 
82
+ # OCR
83
  result, times = ocr_engine(img)
84
 
85
  if not result:
86
  raise HTTPException(status_code=500, detail="OCR returned empty result")
87
 
 
88
  extracted = [text for box, text, score in result]
89
  full_text = "\n".join(extracted)
90
 
 
91
  OCR_RESULTS[image_id] = {
92
  "text": full_text,
93
  "details": result,
 
97
  return {"image_id": image_id, "text": full_text}
98
 
99
  # --------------------------------------------------
100
+ @app.get("/view/{image_id:path}")
101
  async def view_details(image_id: str):
 
 
 
102
  if image_id not in OCR_RESULTS:
103
  raise HTTPException(status_code=404, detail="No OCR result found")
104
  return OCR_RESULTS[image_id]