Spaces:

akashraut
/

docAI

Sleeping

App Files Files Community

akashraut commited on Feb 9

Commit

d4bebd2

verified ·

1 Parent(s): 1614ed7

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -16

app.py CHANGED Viewed

@@ -1,25 +1,25 @@
 import gradio as gr
 import torch
 import json
-import uuid
 from PIL import Image
-from transformers import AutoProcessor, AutoModelForVision2Seq
 MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 processor = AutoProcessor.from_pretrained(
     MODEL_ID,
     trust_remote_code=True
 )
-model = AutoModelForVision2Seq.from_pretrained(
     MODEL_ID,
-    device_map="auto",
     torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-    trust_remote_code=True
 )
 model.eval()
@@ -30,9 +30,9 @@ You are a universal document understanding AI.
 Return ONLY valid JSON.
 Extract:
-- document type
 - key-value fields
-- tables with rows & columns
 Be document-agnostic.
 Do not hallucinate.
@@ -45,23 +45,34 @@ Do not hallucinate.
     ).to(model.device)
     with torch.no_grad():
-        output = model.generate(**inputs, max_new_tokens=2048)
-    text = processor.decode(output[0], skip_special_tokens=True)
     try:
         start = text.find("{")
         end = text.rfind("}") + 1
         return json.loads(text[start:end])
-    except:
-        return {"error": "Failed to parse output"}
 with gr.Blocks() as demo:
-    gr.Markdown("# 📄 DocAI – Universal Document Intelligence")
-    img = gr.Image(type="pil")
-    out = gr.JSON()
-    gr.Button("Extract").click(extract_document, img, out)
 demo.launch()

 import gradio as gr
 import torch
 import json
 from PIL import Image
+from transformers import AutoProcessor, AutoModel
 MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Processor
 processor = AutoProcessor.from_pretrained(
     MODEL_ID,
     trust_remote_code=True
 )
+# Model (REMOTE CODE LOAD — critical)
+model = AutoModel.from_pretrained(
     MODEL_ID,
+    trust_remote_code=True,
     torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+    device_map="auto"
 )
 model.eval()
 Return ONLY valid JSON.
 Extract:
+- document_type
 - key-value fields
+- tables with rows and columns
 Be document-agnostic.
 Do not hallucinate.
     ).to(model.device)
     with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=2048,
+            temperature=0.0
+        )
+    text = processor.decode(outputs[0], skip_special_tokens=True)
     try:
         start = text.find("{")
         end = text.rfind("}") + 1
         return json.loads(text[start:end])
+    except Exception:
+        return {
+            "error": "Model output could not be parsed",
+            "raw_output": text
+        }
 with gr.Blocks() as demo:
+    gr.Markdown("# 📄 DocAI — Universal Document Intelligence")
+    image = gr.Image(type="pil", label="Upload document")
+    output = gr.JSON(label="Extracted JSON")
+    gr.Button("Extract").click(
+        extract_document,
+        inputs=image,
+        outputs=output
+    )
 demo.launch()