Spaces:

saadfarhad
/

Crowdanalyzer_v1

Runtime error

App Files Files Community

saadfarhad commited on Feb 9, 2025

Commit

021fd45

verified ·

1 Parent(s): 5642ff6

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -28

app.py CHANGED Viewed

@@ -1,58 +1,39 @@
-import gradio as gr
 import torch
-from transformers import AutoProcessor
-# Import the custom model class directly from the remote code.
-# Note: The import path here is based on the repository structure. If this fails,
-# check the model repository's files to confirm the correct import path and class name.
-from transformers.models.llava.modeling_llava import LlavaForCausalLM
 # Load the processor and model while trusting remote code.
 processor = AutoProcessor.from_pretrained(
     "lmms-lab/LLaVA-Video-7B-Qwen2",
     trust_remote_code=True
 )
-model = LlavaForCausalLM.from_pretrained(
     "lmms-lab/LLaVA-Video-7B-Qwen2",
     trust_remote_code=True
 )
-# Set device to GPU if available.
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
 def analyze_video(video_path):
-    """
-    This function accepts the path to a video file,
-    then uses the LLaVA-Video model to analyze it for the moment
-    when the crowd is most engaged.
-    """
     prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
-    # Process the text and video input.
-    # (Make sure that the processor handles video inputs as expected.)
     inputs = processor(text=prompt, video=video_path, return_tensors="pt")
-    # Move tensors to the device.
-    inputs = {key: value.to(device) for key, value in inputs.items()}
-    # Generate a response.
     outputs = model.generate(**inputs, max_new_tokens=100)
-    # Decode the generated tokens to a string.
     answer = processor.decode(outputs[0], skip_special_tokens=True)
     return answer
-# Create the Gradio interface.
 iface = gr.Interface(
     fn=analyze_video,
     inputs=gr.Video(label="Upload Concert/Event Video", type="filepath"),
     outputs=gr.Textbox(label="Engagement Analysis"),
     title="Crowd Engagement Analyzer",
-    description=(
-        "Upload a video of a concert or event and the model will analyze "
-        "the video to identify the moment when the crowd is most engaged."
-    )
 )
 if __name__ == "__main__":

+from transformers import AutoProcessor, AutoModelForCausalLM
 import torch
+import gradio as gr
+# Ensure you use the latest version of transformers!
+# For example, in your requirements.txt, you might include:
+# transformers>=4.31.0
 # Load the processor and model while trusting remote code.
 processor = AutoProcessor.from_pretrained(
     "lmms-lab/LLaVA-Video-7B-Qwen2",
     trust_remote_code=True
 )
+model = AutoModelForCausalLM.from_pretrained(
     "lmms-lab/LLaVA-Video-7B-Qwen2",
     trust_remote_code=True
 )
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
 def analyze_video(video_path):
     prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
+    # The processor is expected to handle both text and video input.
     inputs = processor(text=prompt, video=video_path, return_tensors="pt")
+    inputs = {k: v.to(device) for k, v in inputs.items()}
     outputs = model.generate(**inputs, max_new_tokens=100)
     answer = processor.decode(outputs[0], skip_special_tokens=True)
     return answer
 iface = gr.Interface(
     fn=analyze_video,
     inputs=gr.Video(label="Upload Concert/Event Video", type="filepath"),
     outputs=gr.Textbox(label="Engagement Analysis"),
     title="Crowd Engagement Analyzer",
+    description="Upload a video of a concert or event and the model will analyze the moment when the crowd is most engaged."
 )
 if __name__ == "__main__":