Spaces:

jena-shreyas
/

Video-Inference-Demo

Paused

App Files Files Community

jena-shreyas commited on Feb 10

Commit

8427fe9

1 Parent(s): e6b02aa

Add support for all models

Browse files

Files changed (2) hide show

README.md +1 -1
app.py +41 -15

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: LLaVA Video Demo
 emoji: 🤖
 colorFrom: blue
 colorTo: blue

 ---
+title: Video Inference Demo
 emoji: 🤖
 colorFrom: blue
 colorTo: blue

app.py CHANGED Viewed

@@ -14,7 +14,6 @@ from models.base import BaseVideoModel
 # ----------------------
 # CONFIG
 # ----------------------
-MODEL_PATH = "Isotr0py/LLaVA-Video-7B-Qwen2-hf"
 DEVICE_MAP = "cuda:0"
 VIDEO_DIR = str(Path(__file__).parent / "videos")
@@ -27,11 +26,15 @@ TEMPERATURE = 0.01
 # Model loading with quantization support
 # ----------------------
 model: BaseVideoModel = None
 current_quantization = "16-bit"
-def load_model_with_quantization(quantization_mode: str):
     """Load or reload the model with specified quantization"""
-    global model, current_quantization
     # Free GPU memory if model already exists
     if model is not None:
@@ -44,25 +47,30 @@ def load_model_with_quantization(quantization_mode: str):
     load_8bit = False
     load_4bit = False
-    if quantization_mode == "8-bit":
         load_8bit = True
-    elif quantization_mode == "4-bit":
         load_4bit = True
     # else: 16-bit (normal) - both flags remain False
-    print(f"Loading LLaVa-Video-7B-Qwen2 with {quantization_mode} quantization...")
     model = load_model(
-        MODEL_PATH,
         device_map=DEVICE_MAP,
         load_8bit=load_8bit,
         load_4bit=load_4bit,
     )
-    current_quantization = quantization_mode
-    print(f"Model loaded with {quantization_mode} quantization.")
-    return f"✅ Model loaded successfully with {quantization_mode} quantization"
 # Load model initially with 16-bit (normal)
-load_model_with_quantization("16-bit")
 # ----------------------
 # Collect video IDs
@@ -139,8 +147,8 @@ def video_qa(
 # ----------------------
 # Gradio UI
 # ----------------------
-with gr.Blocks(title="Video QA – LLaVa-Video-7B-Qwen2", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## 🎥 Video Question Answering (LLaVa-Video-7B-Qwen2)")
     with gr.Row():
         # LEFT COLUMN
@@ -160,6 +168,21 @@ with gr.Blocks(title="Video QA – LLaVa-Video-7B-Qwen2", theme=gr.themes.Soft()
                 autoplay=False,
                 height=300
             )
             gr.Markdown("### ⚙️ Model Parameters")
@@ -173,7 +196,7 @@ with gr.Blocks(title="Video QA – LLaVa-Video-7B-Qwen2", theme=gr.themes.Soft()
             reload_button = gr.Button("🔄 Reload Model", variant="secondary")
             reload_status = gr.Textbox(
                 label="Model Status",
-                value=f"Model loaded with {current_quantization} quantization",
                 interactive=False,
                 lines=1
             )
@@ -279,7 +302,10 @@ with gr.Blocks(title="Video QA – LLaVa-Video-7B-Qwen2", theme=gr.themes.Soft()
     # Reload model with new quantization
     reload_button.click(
         fn=load_model_with_quantization,
-        inputs=quantization_radio,
         outputs=reload_status
     )

 # ----------------------
 # CONFIG
 # ----------------------
 DEVICE_MAP = "cuda:0"
 VIDEO_DIR = str(Path(__file__).parent / "videos")
 # Model loading with quantization support
 # ----------------------
 model: BaseVideoModel = None
+current_model_name = "Qwen3-VL-4B-Instruct"
 current_quantization = "16-bit"
+def load_model_with_quantization(
+        model_name: str,
+        quantization: str
+    ):
     """Load or reload the model with specified quantization"""
+    global model, current_model_name, current_quantization
     # Free GPU memory if model already exists
     if model is not None:
     load_8bit = False
     load_4bit = False
+    if quantization == "8-bit":
         load_8bit = True
+    elif quantization == "4-bit":
         load_4bit = True
     # else: 16-bit (normal) - both flags remain False
+    print(f"Loading {model_name} with {quantization} quantization...")
+    model_path = model_name
+    # Load the HF version of LLaVA-Video-7B instead of the default version, for transformers v5 compatibility
+    if model_name == "LLaVA-Video-7B-Qwen2":
+        model_path = "Isotr0py/LLaVA-Video-7B-Qwen2-hf"
     model = load_model(
+        model_path,
         device_map=DEVICE_MAP,
         load_8bit=load_8bit,
         load_4bit=load_4bit,
     )
+    current_model_name = model_name
+    current_quantization = quantization
+    print(f"{model_name} loaded with {quantization} quantization.")
+    return f"✅ {model_name} loaded successfully with {quantization} quantization"
 # Load model initially with 16-bit (normal)
+load_model_with_quantization(current_model_name, current_quantization)
 # ----------------------
 # Collect video IDs
 # ----------------------
 # Gradio UI
 # ----------------------
+with gr.Blocks(title="Video Inference Demo", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## 🎥 Video Inference")
     with gr.Row():
         # LEFT COLUMN
                 autoplay=False,
                 height=300
             )
+            gr.Markdown("### 🤖 Model Name")
+            model_name_radio = gr.Radio(
+                choices=[
+                    "Qwen3-VL-4B-Instruct",
+                    "Qwen3-VL-8B-Instruct",
+                    "Qwen3-VL-2B-Thinking",
+                    "Qwen3-VL-4B-Thinking",
+                    "LLaVA-Video-7B-Qwen2"
+                ],
+                value="Qwen3-VL-4B-Instruct",
+                label="🤖 Model Name",
+                info="Select the model to use for inference"
+            )
             gr.Markdown("### ⚙️ Model Parameters")
             reload_button = gr.Button("🔄 Reload Model", variant="secondary")
             reload_status = gr.Textbox(
                 label="Model Status",
+                value=f"{current_model_name} loaded with {current_quantization} quantization",
                 interactive=False,
                 lines=1
             )
     # Reload model with new quantization
     reload_button.click(
         fn=load_model_with_quantization,
+        inputs=[
+            model_name_radio,
+            quantization_radio,
+        ],
         outputs=reload_status
     )