Spaces:

OmarAbualrob
/

ocr-api

Paused

App Files Files Community

OmarAbualrob commited on Jul 29, 2025

Commit

bf69385

verified ·

1 Parent(s): fcf63e0

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -47

app.py CHANGED Viewed

@@ -1,79 +1,55 @@
 from fastapi import FastAPI, File, UploadFile, HTTPException
 from fastapi.responses import JSONResponse
-from transformers import AutoProcessor, AutoModelForCausalLM, BitsAndBytesConfig
 from PIL import Image
-import torch
 import io
 import logging
-# --- 1. Basic Setup ---
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-app = FastAPI(title="Florence-2 OCR API", description="An API to extract text from images using the Florence-2-large model on a GPU.")
-# --- 2. Global Variables and Device Configuration ---
-device = "cuda" if torch.cuda.is_available() else "cpu"
-torch_dtype = torch.bfloat16
 model = None
 processor = None
-# --- 3. Model Loading Logic (at startup) ---
 @app.on_event("startup")
 async def startup_event():
     global model, processor
-    if device == "cpu":
-        logger.warning("CUDA not available, model will not be loaded. This API requires a GPU.")
-        return
     try:
         logger.info(f"Using device: {device}")
-        logger.info("Starting model loading process with 4-bit quantization...")
         model_id = "microsoft/Florence-2-large"
-        quantization_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_compute_dtype=torch_dtype
-        )
-        # Load the model WITHOUT the invalid revision ID
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             trust_remote_code=True,
-            quantization_config=quantization_config,
-            # revision="e134b72"  <-- REMOVED THIS LINE
         )
-        # Load the processor WITHOUT the invalid revision ID
-        processor = AutoProcessor.from_pretrained(
-            model_id,
-            trust_remote_code=True
-            # revision="e134b72"  <-- REMOVED THIS LINE
-        )
-        logger.info("Model and processor loaded successfully.")
     except Exception as e:
         logger.error(f"FATAL: An error occurred during model loading: {e}", exc_info=True)
-# --- 4. Define the OCR Task Function ---
 def run_ocr(image: Image.Image) -> str:
     if model is None or processor is None:
         raise RuntimeError("Model is not available. Check startup logs for loading errors.")
     if image.mode != "RGB":
         image = image.convert("RGB")
     prompt = "<OCR>"
     inputs = processor(text=prompt, images=image, return_tensors="pt")
-    input_ids = inputs["input_ids"].to(device)
-    pixel_values = inputs["pixel_values"].to(device, dtype=torch_dtype)
     generated_ids = model.generate(
-        input_ids=input_ids,
-        pixel_values=pixel_values,
         max_new_tokens=4096,
         do_sample=False,
         num_beams=3
@@ -81,30 +57,24 @@ def run_ocr(image: Image.Image) -> str:
     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
     parsed_text = processor.post_process_generation(generated_text, task="<OCR>", image_size=(image.width, image.height))
     return parsed_text.get("<OCR>", "Error: Could not parse OCR output.")
-# --- 5. Create API Endpoints ---
 @app.post("/ocr", summary="Extract Text from Image")
 async def perform_ocr(file: UploadFile = File(..., description="Image file to perform OCR on.")):
     if model is None:
         raise HTTPException(status_code=503, detail="Model is not loaded or unavailable. Please check the server logs.")
     if not file.content_type.startswith("image/"):
-        raise HTTPException(status_code=400, detail="Invalid file type. Please upload an image (e.g., PNG, JPG).")
     try:
         contents = await file.read()
         image = Image.open(io.BytesIO(contents))
         logger.info(f"Running OCR on uploaded file: {file.filename}")
         extracted_text = run_ocr(image)
         logger.info("OCR completed successfully.")
-        return JSONResponse(
-            content={"filename": file.filename, "text": extracted_text}
-        )
     except Exception as e:
         logger.error(f"An error occurred during OCR processing for {file.filename}: {e}", exc_info=True)
         raise HTTPException(status_code=500, detail=f"An internal server error occurred: {str(e)}")

 from fastapi import FastAPI, File, UploadFile, HTTPException
 from fastapi.responses import JSONResponse
+from transformers import AutoProcessor, AutoModelForCausalLM
 from PIL import Image
 import io
 import logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+app = FastAPI(title="Florence-2 OCR API (CPU)", description="An API to extract text from images using the Florence-2-large model on CPU.")
+# --- Global Variables and Device Configuration ---
+device = "cpu"  # Force CPU
 model = None
 processor = None
+# --- Model Loading Logic (at startup) ---
 @app.on_event("startup")
 async def startup_event():
     global model, processor
     try:
         logger.info(f"Using device: {device}")
+        logger.info("Starting model loading process for CPU...")
         model_id = "microsoft/Florence-2-large"
+        # Load the model in full precision for CPU
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             trust_remote_code=True,
         )
+        processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+        logger.info("Model and processor loaded successfully on CPU.")
     except Exception as e:
         logger.error(f"FATAL: An error occurred during model loading: {e}", exc_info=True)
+# --- Define the OCR Task Function (CPU version) ---
 def run_ocr(image: Image.Image) -> str:
     if model is None or processor is None:
         raise RuntimeError("Model is not available. Check startup logs for loading errors.")
     if image.mode != "RGB":
         image = image.convert("RGB")
     prompt = "<OCR>"
     inputs = processor(text=prompt, images=image, return_tensors="pt")
+    # Generate on CPU (no .to(device) or dtype changes needed)
     generated_ids = model.generate(
+        input_ids=inputs["input_ids"],
+        pixel_values=inputs["pixel_values"],
         max_new_tokens=4096,
         do_sample=False,
         num_beams=3
     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
     parsed_text = processor.post_process_generation(generated_text, task="<OCR>", image_size=(image.width, image.height))
     return parsed_text.get("<OCR>", "Error: Could not parse OCR output.")
+# --- API Endpoints ---
+# (Your @app.post and @app.get endpoints remain exactly the same)
 @app.post("/ocr", summary="Extract Text from Image")
 async def perform_ocr(file: UploadFile = File(..., description="Image file to perform OCR on.")):
     if model is None:
         raise HTTPException(status_code=503, detail="Model is not loaded or unavailable. Please check the server logs.")
     if not file.content_type.startswith("image/"):
+        raise HTTPException(status_code=400, detail="Invalid file type. Please upload an image.")
     try:
         contents = await file.read()
         image = Image.open(io.BytesIO(contents))
         logger.info(f"Running OCR on uploaded file: {file.filename}")
         extracted_text = run_ocr(image)
         logger.info("OCR completed successfully.")
+        return JSONResponse(content={"filename": file.filename, "text": extracted_text})
     except Exception as e:
         logger.error(f"An error occurred during OCR processing for {file.filename}: {e}", exc_info=True)
         raise HTTPException(status_code=500, detail=f"An internal server error occurred: {str(e)}")