videoanalyzer / app.py
cweigendev's picture
Update app.py
366bb41 verified
raw
history blame
3.16 kB
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# HERE IS WHERE THE MODEL NAME GOES ⬇️
model_name = "DAMO-NLP-SG/VideoRefer-VideoLLaMA3-7B"
# Load the model function
def load_model():
try:
# Use the model name here ⬇️
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name, # And here ⬇️
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True # May be needed for some models
)
return tokenizer, model
except Exception as e:
return None, None
# Initialize model (this happens when the Space starts)
print(f"Loading model: {model_name}") # And you can use it here ⬇️
tokenizer, model = load_model()
def process_video_question(video_file, question):
"""Process video and answer questions about it"""
if model is None:
return "Sorry, the model failed to load. Please try again later."
if video_file is None:
return "Please upload a video file first."
if not question.strip():
return "Please enter a question about the video."
try:
# Your video processing logic would go here
# This is a placeholder - you'll need to implement the actual VideoLLaMA3 pipeline
# For now, just return a simple response
response = f"I received your video and question: '{question}'. Video processing with {model_name} would happen here."
return response
except Exception as e:
return f"Error processing video: {str(e)}"
# Create the Gradio interface
with gr.Blocks(title="VideoLLaMA3 Demo") as demo:
gr.Markdown("# πŸŽ₯ VideoLLaMA3 Interactive Demo")
gr.Markdown(f"**Model:** `{model_name}`") # Display the model name ⬇️
gr.Markdown("Upload a video and ask questions about its content!")
with gr.Row():
with gr.Column(scale=1):
video_input = gr.Video(
label="πŸ“Ή Upload Video",
height=300
)
question_input = gr.Textbox(
label="❓ Ask a question about the video",
placeholder="What is happening in this video?",
lines=2
)
submit_btn = gr.Button("πŸš€ Analyze Video", variant="primary")
with gr.Column(scale=1):
output_text = gr.Textbox(
label="πŸ€– AI Response",
lines=10,
placeholder="The AI response will appear here..."
)
# Examples section
gr.Markdown("### πŸ’‘ Example Questions:")
gr.Markdown("""
- "What objects can you see in this video?"
- "Describe the main action happening"
- "What is the setting or location?"
- "How many people are in the video?"
""")
# Connect the button to the function
submit_btn.click(
fn=process_video_question,
inputs=[video_input, question_input],
outputs=output_text
)
# Launch the app
if __name__ == "__main__":
demo.launch()