WalletSyncOCR

Sleeping

App Files Files Community

LogicGoInfotechSpaces commited on Nov 14, 2025

Commit

34d316c

verified ·

1 Parent(s): 09328e0

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -71

app.py CHANGED Viewed

@@ -2,32 +2,34 @@
 import uvicorn
 import numpy as np
 import cv2
 from fastapi import FastAPI, UploadFile, File, HTTPException
-from bson import ObjectId
-from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorGridFSBucket
 from rapidocr_onnxruntime import RapidOCR
 # --------------------------------------------------
-# CONFIG
 # --------------------------------------------------
-MONGO_URI = (
-    "mongodb+srv://harilogicgo_db_user:jFhyDM4oA4dklUsp"
-    "@api-logs.i7rqf9p.mongodb.net/ocr_fastapi"
-    "?retryWrites=true&w=majority&appName=API-LOGS"
 )
-DB_NAME = "ocr_fastapi"
-BUCKET_NAME = "ocr_images"   # ✔ USE THIS BUCKET
 app = FastAPI()
-# Init MongoDB & GridFS
-_client = AsyncIOMotorClient(MONGO_URI)
-_db = _client[DB_NAME]
-def gridfs():
-    return AsyncIOMotorGridFSBucket(_db, bucket_name=BUCKET_NAME)
 ocr_engine = RapidOCR()
 OCR_RESULTS = {}
@@ -40,79 +42,52 @@ async def health():
 @app.post("/upload")
 async def upload_image(file: UploadFile = File(...)):
     try:
-        data = await file.read()
-        fs = gridfs()
-        # Upload image bytes to GridFS
-        oid = await fs.upload_from_stream(
-            file.filename,
-            data,
-            metadata={"contentType": file.content_type},
         )
-        print("📌 Uploaded ID:", oid)
-        # Debug: Confirm stored in ocr_images.files
-        stored = await _db[f"{BUCKET_NAME}.files"].count_documents({"_id": oid})
-        print("📦 Stored in ocr_images.files:", stored)
-        return {"image_id": str(oid)}
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
 # --------------------------------------------------
-async def load_image_from_gridfs(image_id: str):
     try:
-        print("🔍 Fetching from GridFS:", image_id)
-        oid = ObjectId(image_id)
-        fs = gridfs()
-        # Check file existence
-        file_exists = await _db[f"{BUCKET_NAME}.files"].count_documents({"_id": oid})
-        print("📂 Exists in ocr_images.files:", file_exists)
-        if file_exists == 0:
-            raise HTTPException(status_code=404, detail="Image not found")
-        # Read file from GridFS
-        stream = await fs.open_download_stream(oid)
-        data = await stream.read()
-        await stream.close()
-        print("✅ Loaded image bytes from GridFS")
-        return data
     except Exception as e:
-        print("❌ GridFS error:", e)
-        raise HTTPException(status_code=404, detail="Image not found")
-# --------------------------------------------------
-@app.post("/generate/{image_id}")
-async def generate(image_id: str):
-    # Load the image data
-    raw_bytes = await load_image_from_gridfs(image_id)
-    # Decode into OpenCV format
     img_array = np.frombuffer(raw_bytes, np.uint8)
     img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
     if img is None:
         raise HTTPException(status_code=400, detail="Unable to decode image")
-    # Run OCR
     result, times = ocr_engine(img)
     if not result:
         raise HTTPException(status_code=500, detail="OCR returned empty result")
-    # Extract recognized text only
     extracted = [text for box, text, score in result]
     full_text = "\n".join(extracted)
-    # Cache OCR results for viewing
     OCR_RESULTS[image_id] = {
         "text": full_text,
         "details": result,
@@ -122,11 +97,8 @@ async def generate(image_id: str):
     return {"image_id": image_id, "text": full_text}
 # --------------------------------------------------
-@app.get("/view/{image_id}")
 async def view_details(image_id: str):
-    """
-    Returns OCR results stored in memory.
-    """
     if image_id not in OCR_RESULTS:
         raise HTTPException(status_code=404, detail="No OCR result found")
     return OCR_RESULTS[image_id]

 import uvicorn
 import numpy as np
 import cv2
+import boto3
+import os
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from rapidocr_onnxruntime import RapidOCR
 # --------------------------------------------------
+# CONFIG FROM ENV
 # --------------------------------------------------
+DO_KEY_ID = os.getenv("DO_SPACES_KEY_ID")
+DO_SECRET_KEY = os.getenv("DO_SPACES_SECRET_KEY")
+DO_REGION = os.getenv("DO_SPACES_REGION", "blr1")
+DO_ENDPOINT = os.getenv("DO_SPACES_ENDPOINT")
+DO_BUCKET = os.getenv("DO_SPACES_BUCKET", "milestone")
+FOLDER = "OCR_Images"
+if not (DO_KEY_ID and DO_SECRET_KEY and DO_ENDPOINT):
+    raise RuntimeError("Missing DigitalOcean Spaces credentials!")
+# S3 client
+s3 = boto3.client(
+    "s3",
+    region_name=DO_REGION,
+    endpoint_url=DO_ENDPOINT,
+    aws_access_key_id=DO_KEY_ID,
+    aws_secret_access_key=DO_SECRET_KEY,
 )
 app = FastAPI()
 ocr_engine = RapidOCR()
 OCR_RESULTS = {}
 @app.post("/upload")
 async def upload_image(file: UploadFile = File(...)):
     try:
+        file_bytes = await file.read()
+        image_key = f"{FOLDER}/{file.filename}"
+        # Upload to DigitalOcean Spaces
+        s3.put_object(
+            Bucket=DO_BUCKET,
+            Key=image_key,
+            Body=file_bytes,
+            ContentType=file.content_type,
+            ACL="private"
         )
+        return {
+            "image_id": image_key,
+            "message": "Uploaded successfully"
+        }
     except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 # --------------------------------------------------
+@app.post("/generate/{image_id:path}")
+async def generate(image_id: str):
     try:
+        # Download from Spaces
+        obj = s3.get_object(Bucket=DO_BUCKET, Key=image_id)
+        raw_bytes = obj["Body"].read()
     except Exception as e:
+        raise HTTPException(status_code=404, detail="Image not found in Spaces")
     img_array = np.frombuffer(raw_bytes, np.uint8)
     img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
     if img is None:
         raise HTTPException(status_code=400, detail="Unable to decode image")
+    # OCR
     result, times = ocr_engine(img)
     if not result:
         raise HTTPException(status_code=500, detail="OCR returned empty result")
     extracted = [text for box, text, score in result]
     full_text = "\n".join(extracted)
     OCR_RESULTS[image_id] = {
         "text": full_text,
         "details": result,
     return {"image_id": image_id, "text": full_text}
 # --------------------------------------------------
+@app.get("/view/{image_id:path}")
 async def view_details(image_id: str):
     if image_id not in OCR_RESULTS:
         raise HTTPException(status_code=404, detail="No OCR result found")
     return OCR_RESULTS[image_id]