Spaces:

OmarAbualrob
/

ocr-api

Paused

OmarAbualrob commited on Jul 7, 2025

Commit

a039eef

verified ·

1 Parent(s): 60ec4f6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -19,7 +19,7 @@ app = FastAPI(title="Mixed-Content OCR API", description="An API to extract text
 try:
     logger.info("Loading model and processor...")
     # Use the large model for better accuracy
-    model_id = "microsoft/Florence-2-base"
     # NOTE: We need to trust remote code for Florence-2
     model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
     processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
@@ -55,6 +55,7 @@ def run_ocr(image: Image.Image) -> str:
         pixel_values=inputs["pixel_values"],
         max_new_tokens=4096, # Increased token limit for long documents
         do_sample=False, # Use greedy decoding for deterministic output
     )
     # Decode the generated IDs to a string

 try:
     logger.info("Loading model and processor...")
     # Use the large model for better accuracy
+    model_id = "microsoft/Florence-2-large"
     # NOTE: We need to trust remote code for Florence-2
     model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
     processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
         pixel_values=inputs["pixel_values"],
         max_new_tokens=4096, # Increased token limit for long documents
         do_sample=False, # Use greedy decoding for deterministic output
+        num_beams=3
     )
     # Decode the generated IDs to a string