Spaces:

KarthiEz
/

DocQwen2VL

Sleeping

App Files Files Community

KarthiEz commited on Oct 14

Commit

6a80dac

verified ·

1 Parent(s): 795df5f

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -5

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import gradio as gr
 from transformers import pipeline
 from PIL import Image
@@ -18,12 +20,14 @@ pipe = pipeline("image-text-to-text", model="Qwen/Qwen2-VL-2B-Instruct")
 def _only_model_text(out) -> str:
     # Case A: chat transcript list → grab the last assistant turn
     if isinstance(out, list):
         for item in reversed(out):
             if isinstance(item, dict) and item.get("role") == "assistant":
                 content = item.get("content")
                 if isinstance(content, str):
                     return content
                 if isinstance(content, list):
                     texts = []
                     for seg in content:
                         if isinstance(seg, dict) and seg.get("type") == "text":
@@ -32,7 +36,7 @@ def _only_model_text(out) -> str:
                                 texts.append(t)
                     if texts:
                         return "\n".join(texts)
-        # Handle [{'generated_text': '...'}]
         for item in out:
             if isinstance(item, dict) and "generated_text" in item:
                 return item["generated_text"]
@@ -41,13 +45,14 @@ def _only_model_text(out) -> str:
     if isinstance(out, dict) and "generated_text" in out:
         return out["generated_text"]
-    # Fallback
     try:
         import json as _json
         return _json.dumps(out, ensure_ascii=False)
     except Exception:
         return str(out)
 def infer(file_obj, prompt):
     if file_obj is None:
         return "Please upload an image or PDF."
@@ -77,15 +82,21 @@ def infer(file_obj, prompt):
         ]
     }]
-    # run model ONCE
     out = pipe(text=messages, max_new_tokens=256)
     # return ONLY the assistant text
     return _only_model_text(out)
 # ---------- Gradio UI ----------
 with gr.Blocks(
     title="Qwen2-VL-2B — File + Prompt",
 ) as demo:
     gr.Markdown("### Qwen2-VL-2B — Upload an image (or PDF first page) and ask a question.")
     with gr.Row():
@@ -93,10 +104,10 @@ with gr.Blocks(
     prompt_in = gr.Textbox(label="Prompt", placeholder="Ask anything…", lines=3)
     run_btn = gr.Button("Run")
-    # Markdown auto-expands with content
     resp_out = gr.Markdown(elem_id="resp_out")
     run_btn.click(fn=infer, inputs=[file_in, prompt_in], outputs=[resp_out])
 if __name__ == "__main__":
-    demo.launch()

+is this code correct??
 import gradio as gr
 from transformers import pipeline
 from PIL import Image
 def _only_model_text(out) -> str:
     # Case A: chat transcript list → grab the last assistant turn
     if isinstance(out, list):
+        # Prefer any assistant turn from the end
         for item in reversed(out):
             if isinstance(item, dict) and item.get("role") == "assistant":
                 content = item.get("content")
                 if isinstance(content, str):
                     return content
                 if isinstance(content, list):
+                    # collect only text segments, ignore images
                     texts = []
                     for seg in content:
                         if isinstance(seg, dict) and seg.get("type") == "text":
                                 texts.append(t)
                     if texts:
                         return "\n".join(texts)
+        # Also handle [{'generated_text': '...'}] pattern if present
         for item in out:
             if isinstance(item, dict) and "generated_text" in item:
                 return item["generated_text"]
     if isinstance(out, dict) and "generated_text" in out:
         return out["generated_text"]
+    # Fallback: stringify safely
     try:
         import json as _json
         return _json.dumps(out, ensure_ascii=False)
     except Exception:
         return str(out)
 def infer(file_obj, prompt):
     if file_obj is None:
         return "Please upload an image or PDF."
         ]
     }]
+    # run model
     out = pipe(text=messages, max_new_tokens=256)
     # return ONLY the assistant text
+    out = pipe(text=messages, max_new_tokens=256)
     return _only_model_text(out)
 # ---------- Gradio UI ----------
 with gr.Blocks(
     title="Qwen2-VL-2B — File + Prompt",
+    css="""
+    /* make the output box grow nicely */
+    #resp_out textarea {min-height: 220px;}
+    """
 ) as demo:
     gr.Markdown("### Qwen2-VL-2B — Upload an image (or PDF first page) and ask a question.")
     with gr.Row():
     prompt_in = gr.Textbox(label="Prompt", placeholder="Ask anything…", lines=3)
     run_btn = gr.Button("Run")
+    # output textbox that expands (via CSS above)
     resp_out = gr.Markdown(elem_id="resp_out")
     run_btn.click(fn=infer, inputs=[file_in, prompt_in], outputs=[resp_out])
 if __name__ == "__main__":
+    demo.launch()