akashraut commited on
Commit
d4bebd2
Β·
verified Β·
1 Parent(s): 1614ed7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -16
app.py CHANGED
@@ -1,25 +1,25 @@
1
  import gradio as gr
2
  import torch
3
  import json
4
- import uuid
5
  from PIL import Image
6
-
7
- from transformers import AutoProcessor, AutoModelForVision2Seq
8
 
9
  MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
10
 
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
 
 
13
  processor = AutoProcessor.from_pretrained(
14
  MODEL_ID,
15
  trust_remote_code=True
16
  )
17
 
18
- model = AutoModelForVision2Seq.from_pretrained(
 
19
  MODEL_ID,
20
- device_map="auto",
21
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
22
- trust_remote_code=True
23
  )
24
 
25
  model.eval()
@@ -30,9 +30,9 @@ You are a universal document understanding AI.
30
  Return ONLY valid JSON.
31
 
32
  Extract:
33
- - document type
34
  - key-value fields
35
- - tables with rows & columns
36
 
37
  Be document-agnostic.
38
  Do not hallucinate.
@@ -45,23 +45,34 @@ Do not hallucinate.
45
  ).to(model.device)
46
 
47
  with torch.no_grad():
48
- output = model.generate(**inputs, max_new_tokens=2048)
 
 
 
 
49
 
50
- text = processor.decode(output[0], skip_special_tokens=True)
51
 
52
  try:
53
  start = text.find("{")
54
  end = text.rfind("}") + 1
55
  return json.loads(text[start:end])
56
- except:
57
- return {"error": "Failed to parse output"}
 
 
 
58
 
59
  with gr.Blocks() as demo:
60
- gr.Markdown("# πŸ“„ DocAI – Universal Document Intelligence")
61
 
62
- img = gr.Image(type="pil")
63
- out = gr.JSON()
64
 
65
- gr.Button("Extract").click(extract_document, img, out)
 
 
 
 
66
 
67
  demo.launch()
 
1
  import gradio as gr
2
  import torch
3
  import json
 
4
  from PIL import Image
5
+ from transformers import AutoProcessor, AutoModel
 
6
 
7
  MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
8
 
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
 
11
+ # Processor
12
  processor = AutoProcessor.from_pretrained(
13
  MODEL_ID,
14
  trust_remote_code=True
15
  )
16
 
17
+ # Model (REMOTE CODE LOAD β€” critical)
18
+ model = AutoModel.from_pretrained(
19
  MODEL_ID,
20
+ trust_remote_code=True,
21
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
22
+ device_map="auto"
23
  )
24
 
25
  model.eval()
 
30
  Return ONLY valid JSON.
31
 
32
  Extract:
33
+ - document_type
34
  - key-value fields
35
+ - tables with rows and columns
36
 
37
  Be document-agnostic.
38
  Do not hallucinate.
 
45
  ).to(model.device)
46
 
47
  with torch.no_grad():
48
+ outputs = model.generate(
49
+ **inputs,
50
+ max_new_tokens=2048,
51
+ temperature=0.0
52
+ )
53
 
54
+ text = processor.decode(outputs[0], skip_special_tokens=True)
55
 
56
  try:
57
  start = text.find("{")
58
  end = text.rfind("}") + 1
59
  return json.loads(text[start:end])
60
+ except Exception:
61
+ return {
62
+ "error": "Model output could not be parsed",
63
+ "raw_output": text
64
+ }
65
 
66
  with gr.Blocks() as demo:
67
+ gr.Markdown("# πŸ“„ DocAI β€” Universal Document Intelligence")
68
 
69
+ image = gr.Image(type="pil", label="Upload document")
70
+ output = gr.JSON(label="Extracted JSON")
71
 
72
+ gr.Button("Extract").click(
73
+ extract_document,
74
+ inputs=image,
75
+ outputs=output
76
+ )
77
 
78
  demo.launch()