Spaces:
Paused
Paused
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| # HERE IS WHERE THE MODEL NAME GOES β¬οΈ | |
| model_name = "DAMO-NLP-SG/VideoRefer-VideoLLaMA3-7B" | |
| # Load the model function | |
| def load_model(): | |
| try: | |
| # Use the model name here β¬οΈ | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, # And here β¬οΈ | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| trust_remote_code=True # May be needed for some models | |
| ) | |
| return tokenizer, model | |
| except Exception as e: | |
| return None, None | |
| # Initialize model (this happens when the Space starts) | |
| print(f"Loading model: {model_name}") # And you can use it here β¬οΈ | |
| tokenizer, model = load_model() | |
| def process_video_question(video_file, question): | |
| """Process video and answer questions about it""" | |
| if model is None: | |
| return "Sorry, the model failed to load. Please try again later." | |
| if video_file is None: | |
| return "Please upload a video file first." | |
| if not question.strip(): | |
| return "Please enter a question about the video." | |
| try: | |
| # Your video processing logic would go here | |
| # This is a placeholder - you'll need to implement the actual VideoLLaMA3 pipeline | |
| # For now, just return a simple response | |
| response = f"I received your video and question: '{question}'. Video processing with {model_name} would happen here." | |
| return response | |
| except Exception as e: | |
| return f"Error processing video: {str(e)}" | |
| # Create the Gradio interface | |
| with gr.Blocks(title="VideoLLaMA3 Demo") as demo: | |
| gr.Markdown("# π₯ VideoLLaMA3 Interactive Demo") | |
| gr.Markdown(f"**Model:** `{model_name}`") # Display the model name β¬οΈ | |
| gr.Markdown("Upload a video and ask questions about its content!") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| video_input = gr.Video( | |
| label="πΉ Upload Video", | |
| height=300 | |
| ) | |
| question_input = gr.Textbox( | |
| label="β Ask a question about the video", | |
| placeholder="What is happening in this video?", | |
| lines=2 | |
| ) | |
| submit_btn = gr.Button("π Analyze Video", variant="primary") | |
| with gr.Column(scale=1): | |
| output_text = gr.Textbox( | |
| label="π€ AI Response", | |
| lines=10, | |
| placeholder="The AI response will appear here..." | |
| ) | |
| # Examples section | |
| gr.Markdown("### π‘ Example Questions:") | |
| gr.Markdown(""" | |
| - "What objects can you see in this video?" | |
| - "Describe the main action happening" | |
| - "What is the setting or location?" | |
| - "How many people are in the video?" | |
| """) | |
| # Connect the button to the function | |
| submit_btn.click( | |
| fn=process_video_question, | |
| inputs=[video_input, question_input], | |
| outputs=output_text | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() |