Spaces:
Paused
Paused
File size: 3,164 Bytes
92be57f 366bb41 4726b4a 366bb41 4726b4a 366bb41 92be57f 366bb41 4726b4a 366bb41 4726b4a 366bb41 92be57f 366bb41 92be57f 366bb41 92be57f 366bb41 92be57f 366bb41 92be57f 366bb41 90dbf48 366bb41 92be57f 366bb41 92be57f 366bb41 92be57f 366bb41 92be57f 366bb41 92be57f 366bb41 92be57f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# HERE IS WHERE THE MODEL NAME GOES ⬇️
model_name = "DAMO-NLP-SG/VideoRefer-VideoLLaMA3-7B"
# Load the model function
def load_model():
try:
# Use the model name here ⬇️
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name, # And here ⬇️
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True # May be needed for some models
)
return tokenizer, model
except Exception as e:
return None, None
# Initialize model (this happens when the Space starts)
print(f"Loading model: {model_name}") # And you can use it here ⬇️
tokenizer, model = load_model()
def process_video_question(video_file, question):
"""Process video and answer questions about it"""
if model is None:
return "Sorry, the model failed to load. Please try again later."
if video_file is None:
return "Please upload a video file first."
if not question.strip():
return "Please enter a question about the video."
try:
# Your video processing logic would go here
# This is a placeholder - you'll need to implement the actual VideoLLaMA3 pipeline
# For now, just return a simple response
response = f"I received your video and question: '{question}'. Video processing with {model_name} would happen here."
return response
except Exception as e:
return f"Error processing video: {str(e)}"
# Create the Gradio interface
with gr.Blocks(title="VideoLLaMA3 Demo") as demo:
gr.Markdown("# 🎥 VideoLLaMA3 Interactive Demo")
gr.Markdown(f"**Model:** `{model_name}`") # Display the model name ⬇️
gr.Markdown("Upload a video and ask questions about its content!")
with gr.Row():
with gr.Column(scale=1):
video_input = gr.Video(
label="📹 Upload Video",
height=300
)
question_input = gr.Textbox(
label="❓ Ask a question about the video",
placeholder="What is happening in this video?",
lines=2
)
submit_btn = gr.Button("🚀 Analyze Video", variant="primary")
with gr.Column(scale=1):
output_text = gr.Textbox(
label="🤖 AI Response",
lines=10,
placeholder="The AI response will appear here..."
)
# Examples section
gr.Markdown("### 💡 Example Questions:")
gr.Markdown("""
- "What objects can you see in this video?"
- "Describe the main action happening"
- "What is the setting or location?"
- "How many people are in the video?"
""")
# Connect the button to the function
submit_btn.click(
fn=process_video_question,
inputs=[video_input, question_input],
outputs=output_text
)
# Launch the app
if __name__ == "__main__":
demo.launch() |