Spaces:

Hammad712
/

Urdu-OCR-APP

Sleeping

File size: 6,043 Bytes

23c697b
 
6dd2bd7
b164852
70dda00
1b46c86
 
 
23c697b
b164852
1b46c86
4496186
1b46c86
b164852
 
23c697b
6dd2bd7
23c697b
b164852
 
 
 
ef11056
 
2a15edf
ef11056
2a15edf
 
70dda00
 
 
ef11056
70dda00
 
 
 
ef11056
70dda00
 
 
2a15edf
ef11056
2a15edf
ef11056
 
70dda00
ef11056
b164852
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70dda00
b164852
70dda00
b164852
 
1b46c86
 
 
b164852
1b46c86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfa0911
 
b164852
cfa0911
b164852
23c697b
b164852
ef11056
b164852
6dd2bd7
 
 
b164852
bb61a7a
6dd2bd7
b164852
fb4c874
6dd2bd7
 
 
 
4496186
 
6dd2bd7
1b46c86
6dd2bd7
 
1b46c86
6dd2bd7
ef11056
23c697b
1b46c86
23c697b
 
 
 
 
 
 
 
1b46c86
 
 
 
 
 
 
 
 
 
 
23c697b
 
 
ef11056
23c697b
 
 
 
1b46c86
23c697b
 
 
1b46c86
 
ef11056
23c697b
4496186
4e795bb
1b46c86
4e795bb
b164852

import os
import io
import time
import logging
import tempfile
from collections import defaultdict
from typing import Dict, List

import PIL.Image
import uvicorn
from fastapi import FastAPI, File, UploadFile, HTTPException, Request, Depends
from fastapi.responses import JSONResponse
from pdf2image import convert_from_bytes, pdfinfo_from_bytes

# Google GenAI SDK (Vertex AI)
from google import genai
from google.genai.errors import ClientError

# --- LOGGING SETUP ---
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)


# =====================================================================
# GCP CREDENTIALS WORKAROUND 
# =====================================================================
# This grabs the raw JSON string from the exact variable name you provided
gcp_raw_json = os.getenv("GCP_SERVICE_ACCOUNT_JSON")

if gcp_raw_json:
    try:
        # Create a secure, temporary file to hold the JSON text
        fd, temp_path = tempfile.mkstemp(suffix=".json")
        with os.fdopen(fd, 'w') as f:
            f.write(gcp_raw_json)
        
        # Point the Google SDK to this new temporary file automatically
        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = temp_path
        logger.info("Successfully loaded GCP credentials from JSON string into temp file.")
    except Exception as e:
        logger.error(f"Failed to process GCP_SERVICE_ACCOUNT_JSON: {e}")
else:
    logger.warning("No GCP_SERVICE_ACCOUNT_JSON found in environment.")
# =====================================================================


# --- APP INITIALIZATION ---
app = FastAPI(title="Vertex AI PDF/Image OCR API")

# Setup GCP Project and Location via environment variables
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
LOCATION = os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1")

if not PROJECT_ID:
    logger.warning("GOOGLE_CLOUD_PROJECT is not set. Ensure it is set before making Vertex AI calls.")

# Initialize the Vertex AI GenAI Client globally
try:
    vertex_client = genai.Client(
        vertexai=True,
        project=PROJECT_ID,
        location=LOCATION
    )
    logger.info("Vertex AI client initialized successfully.")
except Exception as e:
    logger.error(f"Failed to initialize Vertex AI client: {e}")
    vertex_client = None

# --- RATE LIMITING LOGIC ---
request_history: Dict[str, List[float]] = defaultdict(list)
RATE_LIMIT_COUNT = 5
RATE_LIMIT_WINDOW = 60  # Seconds

async def rate_limiter(request: Request):
    client_ip = request.client.host
    now = time.time()
    
    request_history[client_ip] = [t for t in request_history[client_ip] if now - t < RATE_LIMIT_WINDOW]
    
    if len(request_history[client_ip]) >= RATE_LIMIT_COUNT:
        raise HTTPException(
            status_code=429, 
            detail=f"Rate limit exceeded. Max {RATE_LIMIT_COUNT} requests per {RATE_LIMIT_WINDOW}s."
        )
    
    request_history[client_ip].append(now)

@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
    return JSONResponse(status_code=500, content={"detail": str(exc)})

# --- OCR LOGIC ---
def extract_text_from_image(img):
    if not vertex_client:
        raise HTTPException(status_code=500, detail="Vertex AI client is not initialized properly. Check your credentials.")

    max_retries = 3
    for attempt in range(max_retries):
        try:
            response = vertex_client.models.generate_content(
                model="gemini-2.5-flash",
                contents=[
                   "Extract all visible text from this image and preserve layout...",
                   img,
                ]
            )
            return response.text
        except ClientError as e:
            error_code = e.args[0] if e.args and isinstance(e.args[0], int) else None
            if error_code == 429:
                if attempt < max_retries - 1:
                    time.sleep(2 ** attempt)
                    continue
                else:
                    raise HTTPException(status_code=503, detail="API resource exhausted.")
            else:
                raise HTTPException(status_code=500, detail=f"Error communicating with Vertex AI: {str(e)}")

@app.post("/upload", dependencies=[Depends(rate_limiter)])
async def upload_file(file: UploadFile = File(...)):
    if not file.filename:
        raise HTTPException(status_code=400, detail="No file provided")
    
    file_contents = await file.read()
    output_text = ""
    
    if file.filename.lower().endswith(".pdf"):
        try:
            info = pdfinfo_from_bytes(file_contents)
            page_count = info.get("Pages", 0)
            if page_count > 5:
                raise HTTPException(
                    status_code=400, 
                    detail=f"PDF is too large ({page_count} pages). Maximum allowed is 5 pages."
                )
        except Exception as e:
            raise HTTPException(status_code=400, detail=f"Could not read PDF metadata: {str(e)}")

        try:
            images = convert_from_bytes(file_contents, dpi=200)
        except Exception as e:
            raise HTTPException(status_code=500, detail=f"Error converting PDF to images: {str(e)}")
        
        for idx, img in enumerate(images, start=1):
            page_text = extract_text_from_image(img)
            output_text += f"### Page {idx}\n\n{page_text}\n\n"
            
    else:
        try:
            img = PIL.Image.open(io.BytesIO(file_contents))
            output_text += extract_text_from_image(img) + "\n\n"
        except Exception:
            raise HTTPException(status_code=400, detail="Uploaded file is not a valid image format supported by PIL.")
    
    return JSONResponse(content={"extracted_text": output_text})

@app.get("/")
async def root():
    return JSONResponse(content={"message": "Vertex AI OCR API is up and running."})

# --- RUN THE APP ---
if __name__ == "__main__":
    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)