Spaces:

AffordableAI
/

Mutimodal_Video_Chat_RAG

Sleeping

App Files Files Community

capradeepgujaran commited on Oct 22, 2024

Commit

f5fbd23

verified ·

1 Parent(s): 63b2f85

Create app.py

Browse files

Files changed (1) hide show

app.py +132 -0

app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import gradio as gr
+from video_rag_tool import VideoRAGTool
+import tempfile
+import os
+from PIL import Image
+import cv2
+import numpy as np
+import torch
+class VideoRAGApp:
+    def __init__(self):
+        self.rag_tool = VideoRAGTool()
+        self.current_video_path = None
+        self.processed = False
+    def process_video(self, video_file):
+        """Process uploaded video and return status message"""
+        if video_file is None:
+            return "Please upload a video first."
+        # Save uploaded video to temporary file
+        temp_dir = tempfile.mkdtemp()
+        temp_path = os.path.join(temp_dir, "uploaded_video.mp4")
+        with open(temp_path, "wb") as f:
+            f.write(video_file)
+        self.current_video_path = temp_path
+        try:
+            self.rag_tool.process_video(temp_path)
+            self.processed = True
+            return "Video processed successfully! You can now ask questions about the video."
+        except Exception as e:
+            return f"Error processing video: {str(e)}"
+    def query_video(self, query_text):
+        """Query the video and return relevant frames with descriptions"""
+        if not self.processed:
+            return "Please process a video first."
+        try:
+            results = self.rag_tool.query_video(query_text, k=4)
+            # Extract frames for display
+            frames = []
+            captions = []
+            cap = cv2.VideoCapture(self.current_video_path)
+            for result in results:
+                frame_number = result['frame_number']
+                cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
+                ret, frame = cap.read()
+                if ret:
+                    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                    frames.append(Image.fromarray(frame_rgb))
+                    caption = f"Timestamp: {result['timestamp']:.2f}s\n"
+                    caption += f"Relevance: {result['relevance_score']:.2f}"
+                    captions.append(caption)
+            cap.release()
+            return frames, captions
+        except Exception as e:
+            return f"Error querying video: {str(e)}"
+    def create_interface(self):
+        """Create and return Gradio interface"""
+        with gr.Blocks(title="Video Chat RAG") as interface:
+            gr.Markdown("# Video Chat RAG")
+            gr.Markdown("Upload a video and ask questions about its content!")
+            with gr.Row():
+                video_input = gr.File(
+                    label="Upload Video",
+                    file_types=["video"],
+                )
+                process_button = gr.Button("Process Video")
+            status_output = gr.Textbox(
+                label="Status",
+                interactive=False
+            )
+            with gr.Row():
+                query_input = gr.Textbox(
+                    label="Ask about the video",
+                    placeholder="What's happening in the video?"
+                )
+                query_button = gr.Button("Search")
+            with gr.Row():
+                gallery = gr.Gallery(
+                    label="Retrieved Frames",
+                    show_label=True,
+                    elem_id="gallery",
+                    columns=[2],
+                    rows=[2],
+                    height="auto"
+                )
+            captions = gr.Textbox(
+                label="Frame Details",
+                interactive=False
+            )
+            # Set up event handlers
+            process_button.click(
+                fn=self.process_video,
+                inputs=[video_input],
+                outputs=[status_output]
+            )
+            query_button.click(
+                fn=self.query_video,
+                inputs=[query_input],
+                outputs=[gallery, captions]
+            )
+        return interface
+# For Hugging Face Spaces deployment
+app = VideoRAGApp()
+interface = app.create_interface()
+# Launch the app (for local testing)
+if __name__ == "__main__":
+    interface.launch()