Spaces:

jena-shreyas
/

Video-Inference-Demo

Sleeping

App Files Files Community

jena-shreyas commited on 12 days ago

Commit

b70cd0c

1 Parent(s): bcf2256

Add UI features

Browse files

Files changed (2) hide show

README.md +0 -1
app.py +125 -18

README.md CHANGED Viewed

@@ -9,7 +9,6 @@ app_file: app.py
 python_version: "3.10"
 pinned: false
 license: apache-2.0
-preinstall: "bash pre-install.sh"
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 python_version: "3.10"
 pinned: false
 license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -52,7 +52,16 @@ def get_video_path(video_id: str):
 # ----------------------
 # Inference function
 # ----------------------
-def video_qa(video_id: str, prompt: str) -> str:
     if not video_id:
         return "❌ Please select a video ID."
@@ -61,16 +70,32 @@ def video_qa(video_id: str, prompt: str) -> str:
     video_path = get_video_path(video_id)
     if video_path is None:
-        return f"❌ Video not found: {video_id}.webm"
     try:
-        response = model.chat(
-            prompt=prompt,
-            video_path=video_path,
-            fps=FPS,
-            max_new_tokens=MAX_NEW_TOKENS,
-            temperature=TEMPERATURE,
-        )
         return response
     except Exception as e:
@@ -79,37 +104,110 @@ def video_qa(video_id: str, prompt: str) -> str:
 # ----------------------
 # Gradio UI
 # ----------------------
-with gr.Blocks(title="Video QA – LLaVa-Video-7B-Qwen2") as demo:
     gr.Markdown("## 🎥 Video Question Answering (LLaVa-Video-7B-Qwen2)")
     with gr.Row():
         # LEFT COLUMN
         with gr.Column(scale=1):
             video_id = gr.Dropdown(
                 choices=VIDEO_IDS,
                 label="Video ID",
                 filterable=True,
-                interactive=True
             )
             video_player = gr.Video(
                 label="Selected Video",
-                autoplay=True,
-                height=240
             )
         # RIGHT COLUMN
         with gr.Column(scale=2):
             prompt = gr.Textbox(
                 label="Prompt",
-                placeholder="Ask a question about the selected video",
-                lines=4
             )
             answer = gr.Textbox(
                 label="Model Answer",
-                lines=8
             )
-            run = gr.Button("Run Inference 🚀")
     # Update video player when dropdown changes
     video_id.change(
@@ -121,7 +219,16 @@ with gr.Blocks(title="Video QA – LLaVa-Video-7B-Qwen2") as demo:
     # Run inference
     run.click(
         fn=video_qa,
-        inputs=[video_id, prompt],
         outputs=answer
     )

 # ----------------------
 # Inference function
 # ----------------------
+def video_qa(
+    video_id: str,
+    prompt: str,
+    fps: float,
+    max_tokens: int,
+    temperature: float,
+    top_k: int,
+    top_p: float,
+    video_mode: str,
+) -> str:
     if not video_id:
         return "❌ Please select a video ID."
     video_path = get_video_path(video_id)
     if video_path is None:
+        return f"❌ Video not found: {video_id}.mp4"
     try:
+        # Prepare generation config
+        generation_config = {
+            "max_new_tokens": max_tokens,
+            "temperature": temperature,
+            "top_k": top_k,
+            "top_p": top_p,
+        }
+        # Add video_mode if supported by the model
+        kwargs = {
+            "prompt": prompt,
+            "video_path": video_path,
+            "fps": fps,
+            "generation_config": generation_config,
+        }
+        # Try to add video_mode (for Qwen models)
+        try:
+            response = model.chat(**kwargs, video_mode=video_mode)
+        except TypeError:
+            # If video_mode is not supported, fall back to without it
+            response = model.chat(**kwargs)
         return response
     except Exception as e:
 # ----------------------
 # Gradio UI
 # ----------------------
+with gr.Blocks(title="Video QA – LLaVa-Video-7B-Qwen2", theme=gr.themes.Soft()) as demo:
     gr.Markdown("## 🎥 Video Question Answering (LLaVa-Video-7B-Qwen2)")
     with gr.Row():
         # LEFT COLUMN
         with gr.Column(scale=1):
+            gr.Markdown("### 📁 Video Selection")
             video_id = gr.Dropdown(
                 choices=VIDEO_IDS,
                 label="Video ID",
                 filterable=True,
+                interactive=True,
+                value=VIDEO_IDS[0] if VIDEO_IDS else None
             )
             video_player = gr.Video(
                 label="Selected Video",
+                autoplay=False,
+                height=300
+            )
+            gr.Markdown("### ⚙️ Model Parameters")
+            fps_slider = gr.Slider(
+                minimum=0.5,
+                maximum=5.0,
+                step=0.5,
+                value=FPS,
+                label="🎞️ Frames Per Second (FPS)",
+                info="Sample rate for video frames"
             )
+            video_mode_radio = gr.Radio(
+                choices=["video", "frames"],
+                value="video",
+                label="📹 Video Mode",
+                info="'video' for FPS-based, 'frames' for fixed count"
+            )
+            with gr.Accordion("🔧 Advanced Settings", open=False):
+                max_tokens_slider = gr.Slider(
+                    minimum=128,
+                    maximum=2048,
+                    step=128,
+                    value=MAX_NEW_TOKENS,
+                    label="Max New Tokens",
+                    info="Maximum length of generated response"
+                )
+                temperature_slider = gr.Slider(
+                    minimum=0.01,
+                    maximum=2.0,
+                    step=0.01,
+                    value=TEMPERATURE,
+                    label="🌡️ Temperature",
+                    info="Higher = more creative, lower = more focused"
+                )
+                top_k_slider = gr.Slider(
+                    minimum=1,
+                    maximum=100,
+                    step=1,
+                    value=50,
+                    label="🔝 Top-K",
+                    info="Sample from top K tokens"
+                )
+                top_p_slider = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    step=0.05,
+                    value=0.95,
+                    label="🎯 Top-P (Nucleus)",
+                    info="Cumulative probability threshold"
+                )
         # RIGHT COLUMN
         with gr.Column(scale=2):
+            gr.Markdown("### 💬 Question & Answer")
             prompt = gr.Textbox(
                 label="Prompt",
+                placeholder="Ask a question about the selected video...",
+                lines=4,
+                value="Describe what is happening in this video."
             )
             answer = gr.Textbox(
                 label="Model Answer",
+                lines=20,
+                interactive=False
             )
+            run = gr.Button("🚀 Run Inference", variant="primary", size="lg")
+    gr.Markdown("""
+    ---
+    **ℹ️ Tips:**
+    - Adjust FPS to control video sampling rate (higher = more frames, slower inference)
+    - Use video_mode='frames' for fixed frame count (useful for very long videos)
+    - Temperature: Lower (0.01-0.5) for factual, higher (0.7-1.5) for creative responses
+    - Top-K and Top-P control output diversity
+    """)
     # Update video player when dropdown changes
     video_id.change(
     # Run inference
     run.click(
         fn=video_qa,
+        inputs=[
+            video_id,
+            prompt,
+            fps_slider,
+            max_tokens_slider,
+            temperature_slider,
+            top_k_slider,
+            top_p_slider,
+            video_mode_radio,
+        ],
         outputs=answer
     )