Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import pipeline
|
| 3 |
from PIL import Image
|
|
@@ -18,12 +20,14 @@ pipe = pipeline("image-text-to-text", model="Qwen/Qwen2-VL-2B-Instruct")
|
|
| 18 |
def _only_model_text(out) -> str:
|
| 19 |
# Case A: chat transcript list → grab the last assistant turn
|
| 20 |
if isinstance(out, list):
|
|
|
|
| 21 |
for item in reversed(out):
|
| 22 |
if isinstance(item, dict) and item.get("role") == "assistant":
|
| 23 |
content = item.get("content")
|
| 24 |
if isinstance(content, str):
|
| 25 |
return content
|
| 26 |
if isinstance(content, list):
|
|
|
|
| 27 |
texts = []
|
| 28 |
for seg in content:
|
| 29 |
if isinstance(seg, dict) and seg.get("type") == "text":
|
|
@@ -32,7 +36,7 @@ def _only_model_text(out) -> str:
|
|
| 32 |
texts.append(t)
|
| 33 |
if texts:
|
| 34 |
return "\n".join(texts)
|
| 35 |
-
#
|
| 36 |
for item in out:
|
| 37 |
if isinstance(item, dict) and "generated_text" in item:
|
| 38 |
return item["generated_text"]
|
|
@@ -41,13 +45,14 @@ def _only_model_text(out) -> str:
|
|
| 41 |
if isinstance(out, dict) and "generated_text" in out:
|
| 42 |
return out["generated_text"]
|
| 43 |
|
| 44 |
-
# Fallback
|
| 45 |
try:
|
| 46 |
import json as _json
|
| 47 |
return _json.dumps(out, ensure_ascii=False)
|
| 48 |
except Exception:
|
| 49 |
return str(out)
|
| 50 |
|
|
|
|
| 51 |
def infer(file_obj, prompt):
|
| 52 |
if file_obj is None:
|
| 53 |
return "Please upload an image or PDF."
|
|
@@ -77,15 +82,21 @@ def infer(file_obj, prompt):
|
|
| 77 |
]
|
| 78 |
}]
|
| 79 |
|
| 80 |
-
# run model
|
| 81 |
out = pipe(text=messages, max_new_tokens=256)
|
| 82 |
|
| 83 |
# return ONLY the assistant text
|
|
|
|
| 84 |
return _only_model_text(out)
|
| 85 |
|
|
|
|
| 86 |
# ---------- Gradio UI ----------
|
| 87 |
with gr.Blocks(
|
| 88 |
title="Qwen2-VL-2B — File + Prompt",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
) as demo:
|
| 90 |
gr.Markdown("### Qwen2-VL-2B — Upload an image (or PDF first page) and ask a question.")
|
| 91 |
with gr.Row():
|
|
@@ -93,10 +104,10 @@ with gr.Blocks(
|
|
| 93 |
prompt_in = gr.Textbox(label="Prompt", placeholder="Ask anything…", lines=3)
|
| 94 |
run_btn = gr.Button("Run")
|
| 95 |
|
| 96 |
-
#
|
| 97 |
resp_out = gr.Markdown(elem_id="resp_out")
|
| 98 |
|
| 99 |
run_btn.click(fn=infer, inputs=[file_in, prompt_in], outputs=[resp_out])
|
| 100 |
|
| 101 |
if __name__ == "__main__":
|
| 102 |
-
demo.launch()
|
|
|
|
| 1 |
+
is this code correct??
|
| 2 |
+
|
| 3 |
import gradio as gr
|
| 4 |
from transformers import pipeline
|
| 5 |
from PIL import Image
|
|
|
|
| 20 |
def _only_model_text(out) -> str:
|
| 21 |
# Case A: chat transcript list → grab the last assistant turn
|
| 22 |
if isinstance(out, list):
|
| 23 |
+
# Prefer any assistant turn from the end
|
| 24 |
for item in reversed(out):
|
| 25 |
if isinstance(item, dict) and item.get("role") == "assistant":
|
| 26 |
content = item.get("content")
|
| 27 |
if isinstance(content, str):
|
| 28 |
return content
|
| 29 |
if isinstance(content, list):
|
| 30 |
+
# collect only text segments, ignore images
|
| 31 |
texts = []
|
| 32 |
for seg in content:
|
| 33 |
if isinstance(seg, dict) and seg.get("type") == "text":
|
|
|
|
| 36 |
texts.append(t)
|
| 37 |
if texts:
|
| 38 |
return "\n".join(texts)
|
| 39 |
+
# Also handle [{'generated_text': '...'}] pattern if present
|
| 40 |
for item in out:
|
| 41 |
if isinstance(item, dict) and "generated_text" in item:
|
| 42 |
return item["generated_text"]
|
|
|
|
| 45 |
if isinstance(out, dict) and "generated_text" in out:
|
| 46 |
return out["generated_text"]
|
| 47 |
|
| 48 |
+
# Fallback: stringify safely
|
| 49 |
try:
|
| 50 |
import json as _json
|
| 51 |
return _json.dumps(out, ensure_ascii=False)
|
| 52 |
except Exception:
|
| 53 |
return str(out)
|
| 54 |
|
| 55 |
+
|
| 56 |
def infer(file_obj, prompt):
|
| 57 |
if file_obj is None:
|
| 58 |
return "Please upload an image or PDF."
|
|
|
|
| 82 |
]
|
| 83 |
}]
|
| 84 |
|
| 85 |
+
# run model
|
| 86 |
out = pipe(text=messages, max_new_tokens=256)
|
| 87 |
|
| 88 |
# return ONLY the assistant text
|
| 89 |
+
out = pipe(text=messages, max_new_tokens=256)
|
| 90 |
return _only_model_text(out)
|
| 91 |
|
| 92 |
+
|
| 93 |
# ---------- Gradio UI ----------
|
| 94 |
with gr.Blocks(
|
| 95 |
title="Qwen2-VL-2B — File + Prompt",
|
| 96 |
+
css="""
|
| 97 |
+
/* make the output box grow nicely */
|
| 98 |
+
#resp_out textarea {min-height: 220px;}
|
| 99 |
+
"""
|
| 100 |
) as demo:
|
| 101 |
gr.Markdown("### Qwen2-VL-2B — Upload an image (or PDF first page) and ask a question.")
|
| 102 |
with gr.Row():
|
|
|
|
| 104 |
prompt_in = gr.Textbox(label="Prompt", placeholder="Ask anything…", lines=3)
|
| 105 |
run_btn = gr.Button("Run")
|
| 106 |
|
| 107 |
+
# output textbox that expands (via CSS above)
|
| 108 |
resp_out = gr.Markdown(elem_id="resp_out")
|
| 109 |
|
| 110 |
run_btn.click(fn=infer, inputs=[file_in, prompt_in], outputs=[resp_out])
|
| 111 |
|
| 112 |
if __name__ == "__main__":
|
| 113 |
+
demo.launch()
|