KarthiEz commited on
Commit
6a80dac
·
verified ·
1 Parent(s): 795df5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -5
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  from PIL import Image
@@ -18,12 +20,14 @@ pipe = pipeline("image-text-to-text", model="Qwen/Qwen2-VL-2B-Instruct")
18
  def _only_model_text(out) -> str:
19
  # Case A: chat transcript list → grab the last assistant turn
20
  if isinstance(out, list):
 
21
  for item in reversed(out):
22
  if isinstance(item, dict) and item.get("role") == "assistant":
23
  content = item.get("content")
24
  if isinstance(content, str):
25
  return content
26
  if isinstance(content, list):
 
27
  texts = []
28
  for seg in content:
29
  if isinstance(seg, dict) and seg.get("type") == "text":
@@ -32,7 +36,7 @@ def _only_model_text(out) -> str:
32
  texts.append(t)
33
  if texts:
34
  return "\n".join(texts)
35
- # Handle [{'generated_text': '...'}]
36
  for item in out:
37
  if isinstance(item, dict) and "generated_text" in item:
38
  return item["generated_text"]
@@ -41,13 +45,14 @@ def _only_model_text(out) -> str:
41
  if isinstance(out, dict) and "generated_text" in out:
42
  return out["generated_text"]
43
 
44
- # Fallback
45
  try:
46
  import json as _json
47
  return _json.dumps(out, ensure_ascii=False)
48
  except Exception:
49
  return str(out)
50
 
 
51
  def infer(file_obj, prompt):
52
  if file_obj is None:
53
  return "Please upload an image or PDF."
@@ -77,15 +82,21 @@ def infer(file_obj, prompt):
77
  ]
78
  }]
79
 
80
- # run model ONCE
81
  out = pipe(text=messages, max_new_tokens=256)
82
 
83
  # return ONLY the assistant text
 
84
  return _only_model_text(out)
85
 
 
86
  # ---------- Gradio UI ----------
87
  with gr.Blocks(
88
  title="Qwen2-VL-2B — File + Prompt",
 
 
 
 
89
  ) as demo:
90
  gr.Markdown("### Qwen2-VL-2B — Upload an image (or PDF first page) and ask a question.")
91
  with gr.Row():
@@ -93,10 +104,10 @@ with gr.Blocks(
93
  prompt_in = gr.Textbox(label="Prompt", placeholder="Ask anything…", lines=3)
94
  run_btn = gr.Button("Run")
95
 
96
- # Markdown auto-expands with content
97
  resp_out = gr.Markdown(elem_id="resp_out")
98
 
99
  run_btn.click(fn=infer, inputs=[file_in, prompt_in], outputs=[resp_out])
100
 
101
  if __name__ == "__main__":
102
- demo.launch()
 
1
+ is this code correct??
2
+
3
  import gradio as gr
4
  from transformers import pipeline
5
  from PIL import Image
 
20
  def _only_model_text(out) -> str:
21
  # Case A: chat transcript list → grab the last assistant turn
22
  if isinstance(out, list):
23
+ # Prefer any assistant turn from the end
24
  for item in reversed(out):
25
  if isinstance(item, dict) and item.get("role") == "assistant":
26
  content = item.get("content")
27
  if isinstance(content, str):
28
  return content
29
  if isinstance(content, list):
30
+ # collect only text segments, ignore images
31
  texts = []
32
  for seg in content:
33
  if isinstance(seg, dict) and seg.get("type") == "text":
 
36
  texts.append(t)
37
  if texts:
38
  return "\n".join(texts)
39
+ # Also handle [{'generated_text': '...'}] pattern if present
40
  for item in out:
41
  if isinstance(item, dict) and "generated_text" in item:
42
  return item["generated_text"]
 
45
  if isinstance(out, dict) and "generated_text" in out:
46
  return out["generated_text"]
47
 
48
+ # Fallback: stringify safely
49
  try:
50
  import json as _json
51
  return _json.dumps(out, ensure_ascii=False)
52
  except Exception:
53
  return str(out)
54
 
55
+
56
  def infer(file_obj, prompt):
57
  if file_obj is None:
58
  return "Please upload an image or PDF."
 
82
  ]
83
  }]
84
 
85
+ # run model
86
  out = pipe(text=messages, max_new_tokens=256)
87
 
88
  # return ONLY the assistant text
89
+ out = pipe(text=messages, max_new_tokens=256)
90
  return _only_model_text(out)
91
 
92
+
93
  # ---------- Gradio UI ----------
94
  with gr.Blocks(
95
  title="Qwen2-VL-2B — File + Prompt",
96
+ css="""
97
+ /* make the output box grow nicely */
98
+ #resp_out textarea {min-height: 220px;}
99
+ """
100
  ) as demo:
101
  gr.Markdown("### Qwen2-VL-2B — Upload an image (or PDF first page) and ask a question.")
102
  with gr.Row():
 
104
  prompt_in = gr.Textbox(label="Prompt", placeholder="Ask anything…", lines=3)
105
  run_btn = gr.Button("Run")
106
 
107
+ # output textbox that expands (via CSS above)
108
  resp_out = gr.Markdown(elem_id="resp_out")
109
 
110
  run_btn.click(fn=infer, inputs=[file_in, prompt_in], outputs=[resp_out])
111
 
112
  if __name__ == "__main__":
113
+ demo.launch()