Spaces:

Hug0endob
/

Video-Analysis

Build error

App Files Files Community

CB commited on Sep 8, 2025

Commit

ee05179

verified ·

1 Parent(s): 6d36e3a

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +50 -65

streamlit_app.py CHANGED Viewed

@@ -11,15 +11,6 @@ from dotenv import load_dotenv
 load_dotenv()
-# Optional imports for Phi/Gemini and Google genai
-try:
-    from phi.agent import Agent
-    from phi.model.google import Gemini
-    from phi.tools.duckduckgo import DuckDuckGo
-    PHI_AVAILABLE = True
-except Exception:
-    PHI_AVAILABLE = False
 try:
     import google.generativeai as genai
     from google.generativeai import upload_file, get_file
@@ -27,18 +18,15 @@ try:
 except Exception:
     GENAI_AVAILABLE = False
-# Page config
 st.set_page_config(page_title="Generate the story of videos:", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
-# Session state defaults
 st.session_state.setdefault("videos", "")
-st.session_state.setdefault("downloaded", [])
 st.session_state.setdefault("loop_video", True)
-# Sidebar UI (all controls live here)
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL (or local .mp4 path)", key="url", placeholder="Enter Video URL or path")
@@ -56,25 +44,6 @@ model_id = settings_exp.text_input("Gemini Model", "gemini-2.0-flash-lite")
 analysis_prompt = settings_exp.text_area("Enter analysis", "watch entire video and describe")
 settings_exp.text_input("Video Password", key="video-password", placeholder="Enter Video Password (if needed)")
-# Safety settings (kept but optional)
-safety_settings = [
-    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
-    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
-    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"},
-    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
-]
-# Initialize phi agent (cached)
-@st.cache_resource
-def initialize_agent(model_id: str):
-    if not PHI_AVAILABLE:
-        return None
-    return Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
-multimodal_Agent = None
-if PHI_AVAILABLE:
-    multimodal_Agent = initialize_agent(model_id)
 def sanitize_filename(path_str: str):
     name = Path(path_str).name
     name = name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
@@ -84,47 +53,33 @@ def convert_video_to_mp4(video_path: str) -> str:
     target_path = str(Path(video_path).with_suffix(".mp4"))
     if os.path.exists(target_path):
         return target_path
     try:
-        ffmpeg.input(video_path).output(target_path).run(overwrite_output=True, quiet=True)
-        try:
-            os.remove(video_path)
-        except FileNotFoundError:
-            pass
-    except Exception as e:
-        st.error(f"FFmpeg conversion failed: {e}")
-        raise
     return target_path
 def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
     if not url:
         raise ValueError("No URL provided")
-    # If it's a local file path and exists, just return it (convert if needed)
     if os.path.exists(url) and os.path.isfile(url):
         return convert_video_to_mp4(url)
     outtmpl = os.path.join(save_dir, "%(id)s.%(ext)s")
     ydl_opts = {"outtmpl": outtmpl, "format": "best"}
     if video_password:
         ydl_opts["videopassword"] = video_password
-    try:
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            ydl.download([url])
-    except Exception as e:
-        raise RuntimeError(f"yt-dlp download failed: {e}")
     video_id = url.rstrip("/").split("/")[-1] or url.rstrip("/").split("/")[-2]
     matches = glob(os.path.join(save_dir, f"{video_id}.*"))
     if not matches:
         matches = sorted(glob(os.path.join(save_dir, "*")), key=os.path.getmtime, reverse=True)[:1]
     if not matches:
         raise FileNotFoundError("Downloaded video not found")
     return convert_video_to_mp4(matches[0])
-# Sidebar actions
 if st.sidebar.button("Load Video", use_container_width=True):
     try:
         video_password = st.session_state.get("video-password", "")
@@ -133,7 +88,7 @@ if st.sidebar.button("Load Video", use_container_width=True):
     except Exception as e:
         st.sidebar.error(f"Failed to load video: {e}")
-# Preview & options in sidebar
 if st.session_state["videos"]:
     try:
         st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", True))
@@ -166,26 +121,56 @@ if st.session_state["videos"]:
     st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
-# Main area action button
 if st.button("Generate the story", type="primary"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
     elif not GENAI_AVAILABLE or not API_KEY:
         st.error("Google generative API not configured. Provide API key in Settings.")
-    elif not PHI_AVAILABLE or multimodal_Agent is None:
-        st.error("Phi/Gemini agent not available in this environment.")
     else:
         try:
             with st.spinner("Uploading video to Google for processing..."):
-                processed_video = upload_file(st.session_state["videos"])
-                while getattr(processed_video, "state", None) and processed_video.state.name == "PROCESSING":
                     time.sleep(2)
-                    processed_video = get_file(processed_video.name)
-            with st.spinner("Running Gemini analysis..."):
-                response = multimodal_Agent.run(analysis_prompt, videos=[processed_video], safety_settings=safety_settings)
             st.subheader("Analysis Result")
-            st.markdown(response.content if hasattr(response, "content") else str(response))
         except Exception as e:
             st.error(f"An error occurred: {e}")

 load_dotenv()
 try:
     import google.generativeai as genai
     from google.generativeai import upload_file, get_file
 except Exception:
     GENAI_AVAILABLE = False
 st.set_page_config(page_title="Generate the story of videos:", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
 st.session_state.setdefault("videos", "")
 st.session_state.setdefault("loop_video", True)
+# Sidebar controls
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL (or local .mp4 path)", key="url", placeholder="Enter Video URL or path")
 analysis_prompt = settings_exp.text_area("Enter analysis", "watch entire video and describe")
 settings_exp.text_input("Video Password", key="video-password", placeholder="Enter Video Password (if needed)")
 def sanitize_filename(path_str: str):
     name = Path(path_str).name
     name = name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
     target_path = str(Path(video_path).with_suffix(".mp4"))
     if os.path.exists(target_path):
         return target_path
+    ffmpeg.input(video_path).output(target_path).run(overwrite_output=True, quiet=True)
     try:
+        os.remove(video_path)
+    except Exception:
+        pass
     return target_path
 def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
     if not url:
         raise ValueError("No URL provided")
     if os.path.exists(url) and os.path.isfile(url):
         return convert_video_to_mp4(url)
     outtmpl = os.path.join(save_dir, "%(id)s.%(ext)s")
     ydl_opts = {"outtmpl": outtmpl, "format": "best"}
     if video_password:
         ydl_opts["videopassword"] = video_password
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([url])
     video_id = url.rstrip("/").split("/")[-1] or url.rstrip("/").split("/")[-2]
     matches = glob(os.path.join(save_dir, f"{video_id}.*"))
     if not matches:
         matches = sorted(glob(os.path.join(save_dir, "*")), key=os.path.getmtime, reverse=True)[:1]
     if not matches:
         raise FileNotFoundError("Downloaded video not found")
     return convert_video_to_mp4(matches[0])
+# Load video button
 if st.sidebar.button("Load Video", use_container_width=True):
     try:
         video_password = st.session_state.get("video-password", "")
     except Exception as e:
         st.sidebar.error(f"Failed to load video: {e}")
+# Sidebar preview & options
 if st.session_state["videos"]:
     try:
         st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", True))
     st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
+# Main action: generate story using google.generativeai only
 if st.button("Generate the story", type="primary"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
     elif not GENAI_AVAILABLE or not API_KEY:
         st.error("Google generative API not configured. Provide API key in Settings.")
     else:
         try:
             with st.spinner("Uploading video to Google for processing..."):
+                uploaded = upload_file(st.session_state["videos"])
+                # Poll until processing finished
+                while getattr(uploaded, "state", None) and uploaded.state.name == "PROCESSING":
                     time.sleep(2)
+                    uploaded = get_file(uploaded.name)
+            # Build a multimodal request. The exact shape may vary with SDK versions;
+            # below is a robust pattern: include the uploaded file as an "external" multimodal input
+            prompt_text = analysis_prompt.strip() or "Describe this video in vivid detail."
+            with st.spinner("Generating description from Gemini..."):
+                response = genai.responses.create(
+                    model=model_id,
+                    input=[{
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt_text},
+                            # reference the uploaded file by its name (SDK uses files param)
+                            {"type": "file", "file_name": uploaded.name}
+                        ]
+                    }],
+                    safety_settings=[
+                        {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
+                        {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
+                        {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"},
+                        {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
+                    ],
+                )
+            # responses.create returns a structure; extract text
+            text_out = ""
+            if hasattr(response, "output") and response.output:
+                # recent SDKs: response.output[0].content[0].text
+                try:
+                    for item in response.output:
+                        for cont in item.get("content", []):
+                            if cont.get("type") == "output_text" or cont.get("type") == "text":
+                                text_out += cont.get("text", "")
+                except Exception:
+                    text_out = str(response)
+            else:
+                # fallback to string
+                text_out = str(response)
             st.subheader("Analysis Result")
+            st.markdown(text_out)
         except Exception as e:
             st.error(f"An error occurred: {e}")