Spaces:
Sleeping
Sleeping
File size: 6,043 Bytes
23c697b 6dd2bd7 b164852 70dda00 1b46c86 23c697b b164852 1b46c86 4496186 1b46c86 b164852 23c697b 6dd2bd7 23c697b b164852 ef11056 2a15edf ef11056 2a15edf 70dda00 ef11056 70dda00 ef11056 70dda00 2a15edf ef11056 2a15edf ef11056 70dda00 ef11056 b164852 70dda00 b164852 70dda00 b164852 1b46c86 b164852 1b46c86 cfa0911 b164852 cfa0911 b164852 23c697b b164852 ef11056 b164852 6dd2bd7 b164852 bb61a7a 6dd2bd7 b164852 fb4c874 6dd2bd7 4496186 6dd2bd7 1b46c86 6dd2bd7 1b46c86 6dd2bd7 ef11056 23c697b 1b46c86 23c697b 1b46c86 23c697b ef11056 23c697b 1b46c86 23c697b 1b46c86 ef11056 23c697b 4496186 4e795bb 1b46c86 4e795bb b164852 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | import os
import io
import time
import logging
import tempfile
from collections import defaultdict
from typing import Dict, List
import PIL.Image
import uvicorn
from fastapi import FastAPI, File, UploadFile, HTTPException, Request, Depends
from fastapi.responses import JSONResponse
from pdf2image import convert_from_bytes, pdfinfo_from_bytes
# Google GenAI SDK (Vertex AI)
from google import genai
from google.genai.errors import ClientError
# --- LOGGING SETUP ---
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
# =====================================================================
# GCP CREDENTIALS WORKAROUND
# =====================================================================
# This grabs the raw JSON string from the exact variable name you provided
gcp_raw_json = os.getenv("GCP_SERVICE_ACCOUNT_JSON")
if gcp_raw_json:
try:
# Create a secure, temporary file to hold the JSON text
fd, temp_path = tempfile.mkstemp(suffix=".json")
with os.fdopen(fd, 'w') as f:
f.write(gcp_raw_json)
# Point the Google SDK to this new temporary file automatically
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = temp_path
logger.info("Successfully loaded GCP credentials from JSON string into temp file.")
except Exception as e:
logger.error(f"Failed to process GCP_SERVICE_ACCOUNT_JSON: {e}")
else:
logger.warning("No GCP_SERVICE_ACCOUNT_JSON found in environment.")
# =====================================================================
# --- APP INITIALIZATION ---
app = FastAPI(title="Vertex AI PDF/Image OCR API")
# Setup GCP Project and Location via environment variables
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
LOCATION = os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1")
if not PROJECT_ID:
logger.warning("GOOGLE_CLOUD_PROJECT is not set. Ensure it is set before making Vertex AI calls.")
# Initialize the Vertex AI GenAI Client globally
try:
vertex_client = genai.Client(
vertexai=True,
project=PROJECT_ID,
location=LOCATION
)
logger.info("Vertex AI client initialized successfully.")
except Exception as e:
logger.error(f"Failed to initialize Vertex AI client: {e}")
vertex_client = None
# --- RATE LIMITING LOGIC ---
request_history: Dict[str, List[float]] = defaultdict(list)
RATE_LIMIT_COUNT = 5
RATE_LIMIT_WINDOW = 60 # Seconds
async def rate_limiter(request: Request):
client_ip = request.client.host
now = time.time()
request_history[client_ip] = [t for t in request_history[client_ip] if now - t < RATE_LIMIT_WINDOW]
if len(request_history[client_ip]) >= RATE_LIMIT_COUNT:
raise HTTPException(
status_code=429,
detail=f"Rate limit exceeded. Max {RATE_LIMIT_COUNT} requests per {RATE_LIMIT_WINDOW}s."
)
request_history[client_ip].append(now)
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
return JSONResponse(status_code=500, content={"detail": str(exc)})
# --- OCR LOGIC ---
def extract_text_from_image(img):
if not vertex_client:
raise HTTPException(status_code=500, detail="Vertex AI client is not initialized properly. Check your credentials.")
max_retries = 3
for attempt in range(max_retries):
try:
response = vertex_client.models.generate_content(
model="gemini-2.5-flash",
contents=[
"Extract all visible text from this image and preserve layout...",
img,
]
)
return response.text
except ClientError as e:
error_code = e.args[0] if e.args and isinstance(e.args[0], int) else None
if error_code == 429:
if attempt < max_retries - 1:
time.sleep(2 ** attempt)
continue
else:
raise HTTPException(status_code=503, detail="API resource exhausted.")
else:
raise HTTPException(status_code=500, detail=f"Error communicating with Vertex AI: {str(e)}")
@app.post("/upload", dependencies=[Depends(rate_limiter)])
async def upload_file(file: UploadFile = File(...)):
if not file.filename:
raise HTTPException(status_code=400, detail="No file provided")
file_contents = await file.read()
output_text = ""
if file.filename.lower().endswith(".pdf"):
try:
info = pdfinfo_from_bytes(file_contents)
page_count = info.get("Pages", 0)
if page_count > 5:
raise HTTPException(
status_code=400,
detail=f"PDF is too large ({page_count} pages). Maximum allowed is 5 pages."
)
except Exception as e:
raise HTTPException(status_code=400, detail=f"Could not read PDF metadata: {str(e)}")
try:
images = convert_from_bytes(file_contents, dpi=200)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error converting PDF to images: {str(e)}")
for idx, img in enumerate(images, start=1):
page_text = extract_text_from_image(img)
output_text += f"### Page {idx}\n\n{page_text}\n\n"
else:
try:
img = PIL.Image.open(io.BytesIO(file_contents))
output_text += extract_text_from_image(img) + "\n\n"
except Exception:
raise HTTPException(status_code=400, detail="Uploaded file is not a valid image format supported by PIL.")
return JSONResponse(content={"extracted_text": output_text})
@app.get("/")
async def root():
return JSONResponse(content={"message": "Vertex AI OCR API is up and running."})
# --- RUN THE APP ---
if __name__ == "__main__":
uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True) |