Spaces:

arghyaxcodes
/

ocr-test

Runtime error

App Files Files Community

Arghya Ghosh commited on Jun 23, 2025

Commit

ec9c6ea

verified ·

1 Parent(s): 3e6422b

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -33

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import io
 import torch
 from PIL import Image
-from threading import Thread
 from fastapi import FastAPI, File, UploadFile, Form
 from fastapi.responses import JSONResponse
 from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
@@ -9,39 +8,31 @@ from transformers.generation.streamers import TextIteratorStreamer
 app = FastAPI()
-# Setup device
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# Load model and processor
 MODEL_ID = "nanonets/Nanonets-OCR-s"
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
-model = (
-    Qwen2_5_VLForConditionalGeneration.from_pretrained(
-        MODEL_ID,
-        trust_remote_code=True,
-        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    )
-    .to(device)
-    .eval()
 )
-def generate_response(image, prompt_text, **kwargs):
-    images = [image]
     messages = [
         {
             "role": "user",
-            "content": [{"type": "image"} for _ in images]
-            + [{"type": "text", "text": prompt_text}],
         }
     ]
     prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
-    inputs = processor(text=prompt, images=images, return_tensors="pt").to(device)
-    streamer = TextIteratorStreamer(
-        processor, skip_prompt=True, skip_special_tokens=True
-    )
     generation_kwargs = {
         **inputs,
         "streamer": streamer,
@@ -52,8 +43,8 @@ def generate_response(image, prompt_text, **kwargs):
         "repetition_penalty": kwargs.get("repetition_penalty", 1.2),
     }
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
     output = ""
     for chunk in streamer:
@@ -61,6 +52,11 @@ def generate_response(image, prompt_text, **kwargs):
     return output.strip()
 @app.post("/ocr/image")
 async def ocr_image(
     prompt: str = Form(...),
@@ -74,13 +70,16 @@ async def ocr_image(
     image_bytes = await image.read()
     pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
-    result = generate_response(
-        image=pil_image,
-        prompt_text=prompt,
-        max_new_tokens=max_new_tokens,
-        temperature=temperature,
-        top_p=top_p,
-        top_k=top_k,
-        repetition_penalty=repetition_penalty,
-    )
-    return JSONResponse(content={"result": result})

 import io
 import torch
 from PIL import Image
 from fastapi import FastAPI, File, UploadFile, Form
 from fastapi.responses import JSONResponse
 from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
 app = FastAPI()
+# Device config
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load processor and model
 MODEL_ID = "nanonets/Nanonets-OCR-s"
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    MODEL_ID,
+    trust_remote_code=True,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
 )
+model = model.to(device).eval()
+def generate_response(image: Image.Image, prompt_text: str, **kwargs) -> str:
     messages = [
         {
             "role": "user",
+            "content": [{"type": "image"}, {"type": "text", "text": prompt_text}],
         }
     ]
     prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
+    inputs = processor(text=prompt, images=[image], return_tensors="pt").to(device)
+    streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
     generation_kwargs = {
         **inputs,
         "streamer": streamer,
         "repetition_penalty": kwargs.get("repetition_penalty", 1.2),
     }
+    # 🧠 Direct call (no thread)
+    model.generate(**generation_kwargs)
     output = ""
     for chunk in streamer:
     return output.strip()
+@app.get("/")
+def health():
+    return {"status": "running", "model": MODEL_ID}
 @app.post("/ocr/image")
 async def ocr_image(
     prompt: str = Form(...),
     image_bytes = await image.read()
     pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+    try:
+        result = generate_response(
+            image=pil_image,
+            prompt_text=prompt,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            repetition_penalty=repetition_penalty,
+        )
+        return JSONResponse(content={"result": result})
+    except Exception as e:
+        return JSONResponse(status_code=500, content={"error": str(e)})