VRAG1

Sleeping

App Files Files Community

stevafernandes commited on Sep 12, 2025

Commit

3533176

verified ·

1 Parent(s): 6b90321

Update app.py

Browse files

Files changed (1) hide show

app.py +230 -75

app.py CHANGED Viewed

@@ -3,23 +3,69 @@ import google.generativeai as genai
 import os
 import tempfile
 import time
-import mimetypes
 from pathlib import Path
-# --- Get API key from environment variable or user input ---
 def get_api_key():
     GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
     if not GOOGLE_API_KEY:
-        GOOGLE_API_KEY = st.text_input("Enter your Gemini API key", type="password")
-    return GOOGLE_API_KEY or "AIzaSyDCMPwXHagWqYTQB3HL7FceHEmKUv3v4wc"
-# VideoProcessor class
 class VideoProcessor:
     def __init__(self, api_key):
         genai.configure(api_key=api_key)
-        self.model = genai.GenerativeModel("gemini-2.0-flash")
-    def upload_video(self, video_path, display_name="uploaded_video"):
         return genai.upload_file(path=video_path, display_name=display_name)
     def wait_for_processing(self, video_file):
@@ -30,87 +76,196 @@ class VideoProcessor:
             raise RuntimeError("Video processing failed")
         return video_file
-    def chat_with_video(self, video_file, prompt):
         response = self.model.generate_content([video_file, prompt])
         return response.text
 # Initialize session state
-for key in ["video_processor", "video_file", "video_name", "messages"]:
-    if key not in st.session_state:
-        st.session_state[key] = None if key != "messages" else []
 # Main app function
 def main():
-    st.set_page_config(page_title="Video Retrieval-Augmented Generation", page_icon="🎬", layout="wide")
-    st.header("Video Retrieval-Augmented Generation - Gemini 2.0")
     st.markdown("---")
-    # Step 1: API Key input
-    st.subheader("Step 1: Enter your Gemini API key")
     api_key = get_api_key()
     if not api_key:
-        st.error("Please enter your API key to proceed.")
         st.stop()
-    # Step 2: Upload Video
-    st.subheader("Step 2: Upload your video file")
-    uploaded_file = st.file_uploader("Upload a video", type=['mp4', 'mov', 'avi', 'mkv', 'webm'])
-    if uploaded_file:
-        if mimetypes.guess_type(uploaded_file.name)[0].startswith("video"):
-            file_size = len(uploaded_file.getvalue()) / (1024**2)
-            st.info(f"Size: {file_size:.2f} MB")
-            if st.session_state.video_name != uploaded_file.name:
-                st.session_state.video_processor = VideoProcessor(api_key)
-                with tempfile.NamedTemporaryFile(delete=False, suffix=Path(uploaded_file.name).suffix) as tmp:
-                    tmp.write(uploaded_file.getvalue())
                     tmp_path = tmp.name
-                with st.spinner("Uploading and processing..."):
-                    video_file = st.session_state.video_processor.upload_video(tmp_path, uploaded_file.name)
-                    processed_file = st.session_state.video_processor.wait_for_processing(video_file)
-                    st.session_state.video_file = processed_file
-                    st.session_state.video_name = uploaded_file.name
-                    st.session_state.messages.clear()
-                    st.success("✅ Video processed")
-                os.unlink(tmp_path)
-            st.video(uploaded_file.getvalue())
         else:
-            st.error("Not a valid video file")
-    if st.button("Reset Chat"):
-        st.session_state.messages.clear()
-    if st.button("Reset All"):
-        st.session_state.clear()
-    # Step 3: Chat about Video
-    st.subheader("Step 3: Chat with your video")
-    if st.session_state.video_file:
-        for msg in st.session_state.messages:
-            with st.chat_message(msg["role"]):
-                st.markdown(msg["content"])
-        user_question = st.chat_input("Ask a question about the video...")
-        if user_question:
-            st.session_state.messages.append({"role": "user", "content": user_question})
-            with st.chat_message("user"):
-                st.markdown(user_question)
-            with st.chat_message("assistant"):
-                placeholder = st.empty()
-                with st.spinner("Generating response..."):
-                    response = st.session_state.video_processor.chat_with_video(st.session_state.video_file, user_question)
-                placeholder.markdown(response)
-                st.session_state.messages.append({"role": "assistant", "content": response})
-    else:
-        st.info("Please upload a video in step 2 to start chatting.")
 if __name__ == "__main__":
-    main()

 import os
 import tempfile
 import time
+import cv2
+import numpy as np
+from streamlit_webrtc import webrtc_streamer, VideoProcessorBase, WebRtcMode
+import av
+import threading
 from pathlib import Path
+# --- Get API key from Hugging Face secret or environment ---
 def get_api_key():
+    # First try to get from Hugging Face Spaces secrets
     GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
+    # Fallback to Streamlit secrets if available
+    if not GOOGLE_API_KEY and hasattr(st, "secrets"):
+        GOOGLE_API_KEY = st.secrets.get("GOOGLE_API_KEY", "")
+    # Last resort: manual input (remove in production)
     if not GOOGLE_API_KEY:
+        GOOGLE_API_KEY = st.sidebar.text_input(
+            "Enter Gemini API key (for testing only)",
+            type="password",
+            help="In production, this should be set as a Hugging Face Space secret"
+        )
+    return GOOGLE_API_KEY
+# Video Recorder class to handle WebRTC recording
+class VideoRecorder(VideoProcessorBase):
+    def __init__(self):
+        self.frames = []
+        self.recording = False
+        self.lock = threading.Lock()
+    def recv(self, frame):
+        img = frame.to_ndarray(format="bgr24")
+        with self.lock:
+            if self.recording:
+                self.frames.append(img.copy())
+        return av.VideoFrame.from_ndarray(img, format="bgr24")
+    def start_recording(self):
+        with self.lock:
+            self.recording = True
+            self.frames = []
+    def stop_recording(self):
+        with self.lock:
+            self.recording = False
+            return self.frames.copy()
+    def is_recording(self):
+        with self.lock:
+            return self.recording
+# VideoProcessor class for Gemini API
 class VideoProcessor:
     def __init__(self, api_key):
         genai.configure(api_key=api_key)
+        self.model = genai.GenerativeModel("gemini-2.0-flash-exp")
+    def upload_video(self, video_path, display_name="recorded_video"):
         return genai.upload_file(path=video_path, display_name=display_name)
     def wait_for_processing(self, video_file):
             raise RuntimeError("Video processing failed")
         return video_file
+    def generate_summary(self, video_file):
+        prompt = """Analyze this video and provide a comprehensive summary that includes:
+        1. **Main Content**: What is happening in the video?
+        2. **Key Points**: What are the most important moments or information?
+        3. **Visual Elements**: Describe the scene, people, objects, or activities shown
+        4. **Audio/Speech**: If there's speech, summarize what was said
+        5. **Duration and Structure**: How is the video organized?
+        6. **Purpose**: What appears to be the purpose or message of this video?
+        Please format the summary in a clear, structured way."""
         response = self.model.generate_content([video_file, prompt])
         return response.text
+def save_frames_as_video(frames, output_path, fps=30):
+    """Save recorded frames as a video file"""
+    if not frames:
+        return False
+    height, width, _ = frames[0].shape
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+    for frame in frames:
+        out.write(frame)
+    out.release()
+    return True
 # Initialize session state
+if "video_processor" not in st.session_state:
+    st.session_state.video_processor = None
+if "video_file" not in st.session_state:
+    st.session_state.video_file = None
+if "summary" not in st.session_state:
+    st.session_state.summary = None
+if "recording_complete" not in st.session_state:
+    st.session_state.recording_complete = False
+if "recorded_frames" not in st.session_state:
+    st.session_state.recorded_frames = None
 # Main app function
 def main():
+    st.set_page_config(
+        page_title="Video Recording & Analysis",
+        page_icon="🎥",
+        layout="wide"
+    )
+    st.title("🎥 Video Recording & Automatic Analysis")
+    st.markdown("Record a video directly from your camera and get an AI-generated summary")
     st.markdown("---")
+    # Get API key
     api_key = get_api_key()
     if not api_key:
+        st.error("⚠️ Please set your GOOGLE_API_KEY in Hugging Face Spaces secrets or environment variables")
         st.stop()
+    # Initialize video processor
+    if st.session_state.video_processor is None:
+        st.session_state.video_processor = VideoProcessor(api_key)
+    # Create two columns for layout
+    col1, col2 = st.columns([1, 1])
+    with col1:
+        st.subheader("📹 Video Recording")
+        # WebRTC streamer for video recording
+        ctx = webrtc_streamer(
+            key="video-recorder",
+            mode=WebRtcMode.SENDRECV,
+            video_processor_factory=VideoRecorder,
+            media_stream_constraints={"video": True, "audio": False},
+            async_processing=True,
+        )
+        # Recording controls
+        col_start, col_stop = st.columns(2)
+        with col_start:
+            if st.button("🔴 Start Recording", use_container_width=True):
+                if ctx.video_processor:
+                    ctx.video_processor.start_recording()
+                    st.session_state.recording_complete = False
+                    st.session_state.summary = None
+                    st.success("Recording started...")
+        with col_stop:
+            if st.button("⏹️ Stop Recording", use_container_width=True):
+                if ctx.video_processor and ctx.video_processor.is_recording():
+                    frames = ctx.video_processor.stop_recording()
+                    if frames:
+                        st.session_state.recorded_frames = frames
+                        st.session_state.recording_complete = True
+                        st.success(f"Recording stopped! Captured {len(frames)} frames")
+                    else:
+                        st.warning("No frames were recorded")
+        # Display recording status
+        if ctx.video_processor and ctx.video_processor.is_recording():
+            st.info("🔴 Recording in progress...")
+        # Process the recorded video
+        if st.session_state.recording_complete and st.session_state.recorded_frames:
+            with st.spinner("Processing video..."):
+                # Save frames as video
+                with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp:
                     tmp_path = tmp.name
+                if save_frames_as_video(st.session_state.recorded_frames, tmp_path):
+                    try:
+                        # Upload to Gemini
+                        video_file = st.session_state.video_processor.upload_video(
+                            tmp_path,
+                            "recorded_video"
+                        )
+                        # Wait for processing
+                        processed_file = st.session_state.video_processor.wait_for_processing(
+                            video_file
+                        )
+                        # Generate summary
+                        st.session_state.summary = st.session_state.video_processor.generate_summary(
+                            processed_file
+                        )
+                        st.success("✅ Video analyzed successfully!")
+                    except Exception as e:
+                        st.error(f"Error processing video: {str(e)}")
+                    finally:
+                        # Clean up temp file
+                        if os.path.exists(tmp_path):
+                            os.unlink(tmp_path)
+                        st.session_state.recording_complete = False
+                else:
+                    st.error("Failed to save video")
+    with col2:
+        st.subheader("📝 Video Summary")
+        if st.session_state.summary:
+            st.markdown(st.session_state.summary)
+            # Option to download summary
+            st.download_button(
+                label="📥 Download Summary",
+                data=st.session_state.summary,
+                file_name="video_summary.txt",
+                mime="text/plain"
+            )
         else:
+            st.info("Record a video and it will be automatically analyzed. The summary will appear here.")
+    # Sidebar with instructions
+    with st.sidebar:
+        st.markdown("### 📖 How to Use")
+        st.markdown("""
+        1. **Allow camera access** when prompted
+        2. Click **Start Recording** to begin
+        3. Perform your action or speak
+        4. Click **Stop Recording** to end
+        5. Wait for automatic analysis
+        6. View your video summary
+        """)
+        st.markdown("### ⚙️ Settings")
+        if st.button("🔄 Reset Application"):
+            st.session_state.clear()
+            st.rerun()
+        st.markdown("### 📌 Notes")
+        st.markdown("""
+        - Video is processed using Gemini 2.0
+        - Recording is temporary and not stored
+        - API key should be set in HF Spaces secrets
+        """)
 if __name__ == "__main__":
+    # Install required packages if not present
+    try:
+        import streamlit_webrtc
+    except ImportError:
+        st.error("Please install streamlit-webrtc: `pip install streamlit-webrtc`")
+        st.stop()
+    main()