Spaces:

prithivMLmods
/

Multimodal-VLM-Thinking

Running on Zero

App Files Files Community

prithivMLmods commited on Jun 23

Commit

d2b9d98

verified ·

1 Parent(s): c64538b

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -4

app.py CHANGED Viewed

@@ -208,6 +208,8 @@ video_examples = [
     ["Explain the ad in detail.", "videos/1.mp4"]
 ]
 css = """
 .submit-btn {
     background-color: #2980b9 !important;
@@ -216,7 +218,15 @@ css = """
 .submit-btn:hover {
     background-color: #3498db !important;
 }
 """
 # Create the Gradio Interface
 with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
@@ -246,19 +256,26 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
                 top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
                 top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
                 repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2)
         with gr.Column():
-            output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
-            markdown_output = gr.Markdown(label="Result.Md")
             model_choice = gr.Radio(
                 choices=["DREX-062225-exp", "VIREX-062225-exp", "olmOCR-7B-0225"],
                 label="Select Model",
                 value="DREX-062225-exp"
             )
             gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Doc-VLMs/discussions)")
             gr.Markdown("> [DREX-062225-exp](https://huggingface.co/prithivMLmods/DREX-062225-exp): the drex-062225-exp (document retrieval and extraction expert) model is a specialized fine-tuned version of docscopeocr-7b-050425-exp, optimized for document retrieval, content extraction, and analysis recognition. built on top of the qwen2.5-vl architecture.")
             gr.Markdown("> [VIREX-062225-exp](https://huggingface.co/prithivMLmods/VIREX-062225-exp): the virex-062225-exp (video information retrieval and extraction expert - experimental) model is a fine-tuned version of qwen2.5-vl-7b-instruct, specifically optimized for advanced video understanding, image comprehension, sense of reasoning, and natural language decision-making through cot reasoning.")
-            gr.Markdown("> [olmOCR-7B-0225](https://huggingface.co/allenai/olmOCR-7B-0225-preview): the olmocr-7b-0225-preview model is based on qwen2-vl-7b, optimized for document-level optical character recognition (ocr), long-context vision-language understanding, and accurate image-to-text conversion with mathematical latex formatting. designed with a focus on high-fidelity visual-textual comprehension.")
     image_submit.click(
         fn=generate_image,

     ["Explain the ad in detail.", "videos/1.mp4"]
 ]
+# --- CODE MODIFICATION START ---
+# Added CSS to style the output area as a "Canvas"
 css = """
 .submit-btn {
     background-color: #2980b9 !important;
 .submit-btn:hover {
     background-color: #3498db !important;
 }
+.canvas-output {
+    border: 2px solid #e0e0e0;
+    border-radius: 10px;
+    padding: 20px;
+    background-color: #f9f9f9;
+}
 """
+# --- CODE MODIFICATION END ---
 # Create the Gradio Interface
 with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
                 top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
                 top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
                 repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2)
         with gr.Column():
+            # --- CODE MODIFICATION START ---
+            # Wrapped the outputs in a styled Column to act as the Canvas
+            with gr.Column(elem_classes="canvas-output"):
+                gr.Markdown("## 📋 Result Canvas")
+                output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=2)
+                markdown_output = gr.Markdown(label="Formatted Result (Result.Md)")
+            # --- CODE MODIFICATION END ---
             model_choice = gr.Radio(
                 choices=["DREX-062225-exp", "VIREX-062225-exp", "olmOCR-7B-0225"],
                 label="Select Model",
                 value="DREX-062225-exp"
             )
             gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Doc-VLMs/discussions)")
             gr.Markdown("> [DREX-062225-exp](https://huggingface.co/prithivMLmods/DREX-062225-exp): the drex-062225-exp (document retrieval and extraction expert) model is a specialized fine-tuned version of docscopeocr-7b-050425-exp, optimized for document retrieval, content extraction, and analysis recognition. built on top of the qwen2.5-vl architecture.")
             gr.Markdown("> [VIREX-062225-exp](https://huggingface.co/prithivMLmods/VIREX-062225-exp): the virex-062225-exp (video information retrieval and extraction expert - experimental) model is a fine-tuned version of qwen2.5-vl-7b-instruct, specifically optimized for advanced video understanding, image comprehension, sense of reasoning, and natural language decision-making through cot reasoning.")
+            gr.Markdown("> [olmOCR-7B-0225](https://huggingface.co/allenai/olmOCR-7B-0225-preview): the olmocr-7b-0225-preview model is based on qwen2-vl-7b, optimized for document-level optical character recognition (ocr), long-context vision-language understanding, and accurate image-to-text conversion with mathematical latex formatting. designed with a focus on high-fidelity visual-textual comprehension.")
     image_submit.click(
         fn=generate_image,