CB commited on
Commit
ee05179
·
verified ·
1 Parent(s): 6d36e3a

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +50 -65
streamlit_app.py CHANGED
@@ -11,15 +11,6 @@ from dotenv import load_dotenv
11
 
12
  load_dotenv()
13
 
14
- # Optional imports for Phi/Gemini and Google genai
15
- try:
16
- from phi.agent import Agent
17
- from phi.model.google import Gemini
18
- from phi.tools.duckduckgo import DuckDuckGo
19
- PHI_AVAILABLE = True
20
- except Exception:
21
- PHI_AVAILABLE = False
22
-
23
  try:
24
  import google.generativeai as genai
25
  from google.generativeai import upload_file, get_file
@@ -27,18 +18,15 @@ try:
27
  except Exception:
28
  GENAI_AVAILABLE = False
29
 
30
- # Page config
31
  st.set_page_config(page_title="Generate the story of videos:", layout="wide")
32
 
33
  DATA_DIR = Path("./data")
34
  DATA_DIR.mkdir(exist_ok=True)
35
 
36
- # Session state defaults
37
  st.session_state.setdefault("videos", "")
38
- st.session_state.setdefault("downloaded", [])
39
  st.session_state.setdefault("loop_video", True)
40
 
41
- # Sidebar UI (all controls live here)
42
  st.sidebar.header("Video Input")
43
  st.sidebar.text_input("Video URL (or local .mp4 path)", key="url", placeholder="Enter Video URL or path")
44
 
@@ -56,25 +44,6 @@ model_id = settings_exp.text_input("Gemini Model", "gemini-2.0-flash-lite")
56
  analysis_prompt = settings_exp.text_area("Enter analysis", "watch entire video and describe")
57
  settings_exp.text_input("Video Password", key="video-password", placeholder="Enter Video Password (if needed)")
58
 
59
- # Safety settings (kept but optional)
60
- safety_settings = [
61
- {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
62
- {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
63
- {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"},
64
- {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
65
- ]
66
-
67
- # Initialize phi agent (cached)
68
- @st.cache_resource
69
- def initialize_agent(model_id: str):
70
- if not PHI_AVAILABLE:
71
- return None
72
- return Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
73
-
74
- multimodal_Agent = None
75
- if PHI_AVAILABLE:
76
- multimodal_Agent = initialize_agent(model_id)
77
-
78
  def sanitize_filename(path_str: str):
79
  name = Path(path_str).name
80
  name = name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
@@ -84,47 +53,33 @@ def convert_video_to_mp4(video_path: str) -> str:
84
  target_path = str(Path(video_path).with_suffix(".mp4"))
85
  if os.path.exists(target_path):
86
  return target_path
 
87
  try:
88
- ffmpeg.input(video_path).output(target_path).run(overwrite_output=True, quiet=True)
89
- try:
90
- os.remove(video_path)
91
- except FileNotFoundError:
92
- pass
93
- except Exception as e:
94
- st.error(f"FFmpeg conversion failed: {e}")
95
- raise
96
  return target_path
97
 
98
  def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
99
  if not url:
100
  raise ValueError("No URL provided")
101
-
102
- # If it's a local file path and exists, just return it (convert if needed)
103
  if os.path.exists(url) and os.path.isfile(url):
104
  return convert_video_to_mp4(url)
105
-
106
  outtmpl = os.path.join(save_dir, "%(id)s.%(ext)s")
107
  ydl_opts = {"outtmpl": outtmpl, "format": "best"}
108
  if video_password:
109
  ydl_opts["videopassword"] = video_password
110
-
111
- try:
112
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
113
- ydl.download([url])
114
- except Exception as e:
115
- raise RuntimeError(f"yt-dlp download failed: {e}")
116
-
117
  video_id = url.rstrip("/").split("/")[-1] or url.rstrip("/").split("/")[-2]
118
  matches = glob(os.path.join(save_dir, f"{video_id}.*"))
119
  if not matches:
120
  matches = sorted(glob(os.path.join(save_dir, "*")), key=os.path.getmtime, reverse=True)[:1]
121
-
122
  if not matches:
123
  raise FileNotFoundError("Downloaded video not found")
124
-
125
  return convert_video_to_mp4(matches[0])
126
 
127
- # Sidebar actions
128
  if st.sidebar.button("Load Video", use_container_width=True):
129
  try:
130
  video_password = st.session_state.get("video-password", "")
@@ -133,7 +88,7 @@ if st.sidebar.button("Load Video", use_container_width=True):
133
  except Exception as e:
134
  st.sidebar.error(f"Failed to load video: {e}")
135
 
136
- # Preview & options in sidebar
137
  if st.session_state["videos"]:
138
  try:
139
  st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", True))
@@ -166,26 +121,56 @@ if st.session_state["videos"]:
166
 
167
  st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
168
 
169
- # Main area action button
170
  if st.button("Generate the story", type="primary"):
171
  if not st.session_state.get("videos"):
172
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
173
  elif not GENAI_AVAILABLE or not API_KEY:
174
  st.error("Google generative API not configured. Provide API key in Settings.")
175
- elif not PHI_AVAILABLE or multimodal_Agent is None:
176
- st.error("Phi/Gemini agent not available in this environment.")
177
  else:
178
  try:
179
  with st.spinner("Uploading video to Google for processing..."):
180
- processed_video = upload_file(st.session_state["videos"])
181
- while getattr(processed_video, "state", None) and processed_video.state.name == "PROCESSING":
 
182
  time.sleep(2)
183
- processed_video = get_file(processed_video.name)
184
-
185
- with st.spinner("Running Gemini analysis..."):
186
- response = multimodal_Agent.run(analysis_prompt, videos=[processed_video], safety_settings=safety_settings)
187
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  st.subheader("Analysis Result")
189
- st.markdown(response.content if hasattr(response, "content") else str(response))
190
  except Exception as e:
191
  st.error(f"An error occurred: {e}")
 
11
 
12
  load_dotenv()
13
 
 
 
 
 
 
 
 
 
 
14
  try:
15
  import google.generativeai as genai
16
  from google.generativeai import upload_file, get_file
 
18
  except Exception:
19
  GENAI_AVAILABLE = False
20
 
 
21
  st.set_page_config(page_title="Generate the story of videos:", layout="wide")
22
 
23
  DATA_DIR = Path("./data")
24
  DATA_DIR.mkdir(exist_ok=True)
25
 
 
26
  st.session_state.setdefault("videos", "")
 
27
  st.session_state.setdefault("loop_video", True)
28
 
29
+ # Sidebar controls
30
  st.sidebar.header("Video Input")
31
  st.sidebar.text_input("Video URL (or local .mp4 path)", key="url", placeholder="Enter Video URL or path")
32
 
 
44
  analysis_prompt = settings_exp.text_area("Enter analysis", "watch entire video and describe")
45
  settings_exp.text_input("Video Password", key="video-password", placeholder="Enter Video Password (if needed)")
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def sanitize_filename(path_str: str):
48
  name = Path(path_str).name
49
  name = name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 
53
  target_path = str(Path(video_path).with_suffix(".mp4"))
54
  if os.path.exists(target_path):
55
  return target_path
56
+ ffmpeg.input(video_path).output(target_path).run(overwrite_output=True, quiet=True)
57
  try:
58
+ os.remove(video_path)
59
+ except Exception:
60
+ pass
 
 
 
 
 
61
  return target_path
62
 
63
  def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
64
  if not url:
65
  raise ValueError("No URL provided")
 
 
66
  if os.path.exists(url) and os.path.isfile(url):
67
  return convert_video_to_mp4(url)
 
68
  outtmpl = os.path.join(save_dir, "%(id)s.%(ext)s")
69
  ydl_opts = {"outtmpl": outtmpl, "format": "best"}
70
  if video_password:
71
  ydl_opts["videopassword"] = video_password
72
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
73
+ ydl.download([url])
 
 
 
 
 
74
  video_id = url.rstrip("/").split("/")[-1] or url.rstrip("/").split("/")[-2]
75
  matches = glob(os.path.join(save_dir, f"{video_id}.*"))
76
  if not matches:
77
  matches = sorted(glob(os.path.join(save_dir, "*")), key=os.path.getmtime, reverse=True)[:1]
 
78
  if not matches:
79
  raise FileNotFoundError("Downloaded video not found")
 
80
  return convert_video_to_mp4(matches[0])
81
 
82
+ # Load video button
83
  if st.sidebar.button("Load Video", use_container_width=True):
84
  try:
85
  video_password = st.session_state.get("video-password", "")
 
88
  except Exception as e:
89
  st.sidebar.error(f"Failed to load video: {e}")
90
 
91
+ # Sidebar preview & options
92
  if st.session_state["videos"]:
93
  try:
94
  st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", True))
 
121
 
122
  st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
123
 
124
+ # Main action: generate story using google.generativeai only
125
  if st.button("Generate the story", type="primary"):
126
  if not st.session_state.get("videos"):
127
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
128
  elif not GENAI_AVAILABLE or not API_KEY:
129
  st.error("Google generative API not configured. Provide API key in Settings.")
 
 
130
  else:
131
  try:
132
  with st.spinner("Uploading video to Google for processing..."):
133
+ uploaded = upload_file(st.session_state["videos"])
134
+ # Poll until processing finished
135
+ while getattr(uploaded, "state", None) and uploaded.state.name == "PROCESSING":
136
  time.sleep(2)
137
+ uploaded = get_file(uploaded.name)
138
+ # Build a multimodal request. The exact shape may vary with SDK versions;
139
+ # below is a robust pattern: include the uploaded file as an "external" multimodal input
140
+ prompt_text = analysis_prompt.strip() or "Describe this video in vivid detail."
141
+ with st.spinner("Generating description from Gemini..."):
142
+ response = genai.responses.create(
143
+ model=model_id,
144
+ input=[{
145
+ "role": "user",
146
+ "content": [
147
+ {"type": "text", "text": prompt_text},
148
+ # reference the uploaded file by its name (SDK uses files param)
149
+ {"type": "file", "file_name": uploaded.name}
150
+ ]
151
+ }],
152
+ safety_settings=[
153
+ {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
154
+ {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
155
+ {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"},
156
+ {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
157
+ ],
158
+ )
159
+ # responses.create returns a structure; extract text
160
+ text_out = ""
161
+ if hasattr(response, "output") and response.output:
162
+ # recent SDKs: response.output[0].content[0].text
163
+ try:
164
+ for item in response.output:
165
+ for cont in item.get("content", []):
166
+ if cont.get("type") == "output_text" or cont.get("type") == "text":
167
+ text_out += cont.get("text", "")
168
+ except Exception:
169
+ text_out = str(response)
170
+ else:
171
+ # fallback to string
172
+ text_out = str(response)
173
  st.subheader("Analysis Result")
174
+ st.markdown(text_out)
175
  except Exception as e:
176
  st.error(f"An error occurred: {e}")