CB commited on
Commit
ed1c53f
·
verified ·
1 Parent(s): 770380c

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +46 -80
streamlit_app.py CHANGED
@@ -15,7 +15,7 @@ from dotenv import load_dotenv
15
 
16
  load_dotenv()
17
 
18
- # Optional phi integration (Agent + Gemini wrapper)
19
  try:
20
  from phi.agent import Agent
21
  from phi.model.google import Gemini
@@ -25,22 +25,20 @@ except Exception:
25
  Agent = Gemini = DuckDuckGo = None
26
  HAS_PHI = False
27
 
28
- # google-genai (v1.49.1)
29
  try:
30
- import google_genai as genai # package name for google-genai
31
- from google_genai import Files, Responses, configure as genai_configure # convenience
32
  HAS_GENAI = True
33
  except Exception:
34
  genai = None
35
- Files = Responses = None
36
- genai_configure = None
37
  HAS_GENAI = False
38
 
39
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
40
  DATA_DIR = Path("./data")
41
  DATA_DIR.mkdir(exist_ok=True)
42
 
43
- # ---- Defaults & constants ----
44
  MODEL_OPTIONS = [
45
  "gemini-2.5-flash",
46
  "gemini-2.5-flash-lite",
@@ -48,14 +46,12 @@ MODEL_OPTIONS = [
48
  "gemini-2.0-flash-lite",
49
  "custom",
50
  ]
51
-
52
  DEFAULT_MODEL = "gemini-2.0-flash-lite"
53
  DEFAULT_PROMPT = (
54
  "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
55
  "Keep language professional. Include a list of observations for notable events."
56
  )
57
 
58
- # ---- Session defaults ----
59
  st.session_state.setdefault("videos", "")
60
  st.session_state.setdefault("loop_video", False)
61
  st.session_state.setdefault("uploaded_file", None)
@@ -73,7 +69,6 @@ st.session_state.setdefault("processing_timeout", 900)
73
  st.session_state.setdefault("generation_timeout", 300)
74
  st.session_state.setdefault("compress_threshold_mb", 200)
75
 
76
- # ---- Helpers ----
77
  def sanitize_filename(path_str: str):
78
  name = Path(path_str).name
79
  return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
@@ -139,13 +134,11 @@ def configure_genai_if_needed():
139
  if not key:
140
  return False
141
  try:
142
- if genai_configure:
143
- genai_configure(api_key=key)
144
  except Exception:
145
  pass
146
  return True
147
 
148
- # ---- Agent management (reuse) ----
149
  _agent = None
150
  def maybe_create_agent(model_id: str):
151
  global _agent
@@ -156,8 +149,7 @@ def maybe_create_agent(model_id: str):
156
  if _agent and st.session_state.get("last_model") == model_id:
157
  return _agent
158
  try:
159
- if genai_configure:
160
- genai_configure(api_key=key)
161
  _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
162
  st.session_state["last_model"] = model_id
163
  except Exception:
@@ -183,7 +175,6 @@ if current_url != st.session_state.get("last_url_value"):
183
  clear_all_video_state()
184
  st.session_state["last_url_value"] = current_url
185
 
186
- # ---- Sidebar UI ----
187
  st.sidebar.header("Video Input")
188
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
189
 
@@ -228,34 +219,28 @@ safety_settings = [
228
  {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
229
  ]
230
 
231
- # ---- Upload & processing helpers for google-genai Files ----
232
  def upload_video_sdk(filepath: str):
233
  key = get_effective_api_key()
234
  if not key:
235
  raise RuntimeError("No API key provided")
236
- if not HAS_GENAI or Files is None:
237
- raise RuntimeError("google-genai SDK not available; cannot upload")
238
- if genai_configure:
239
- genai_configure(api_key=key)
240
- with open(filepath, "rb") as fh:
241
- # Files.upload returns a response-like object; adapt as needed
242
- resp = Files.create(file=fh, purpose="video")
243
- return resp
244
 
245
  def wait_for_processed(file_obj, timeout: int = None):
246
  if timeout is None:
247
  timeout = st.session_state.get("processing_timeout", 900)
248
- if not HAS_GENAI or Files is None:
249
  return file_obj
250
  start = time.time()
251
- # file_obj may be a dict or an SDK object; adapt
252
- file_id = file_obj.get("name") if isinstance(file_obj, dict) else getattr(file_obj, "name", None) or getattr(file_obj, "id", None)
253
- if not file_id:
254
  return file_obj
255
  backoff = 1.0
256
  while True:
257
  try:
258
- obj = Files.get(file_id)
259
  except Exception as e:
260
  if time.time() - start > timeout:
261
  raise TimeoutError(f"Failed to fetch file status before timeout: {e}")
@@ -263,9 +248,8 @@ def wait_for_processed(file_obj, timeout: int = None):
263
  backoff = min(backoff * 2, 8.0)
264
  continue
265
 
266
- state = obj.get("state") if isinstance(obj, dict) else getattr(obj, "state", None)
267
- name = state.get("name") if isinstance(state, dict) else getattr(state, "name", None)
268
- if not name or name != "PROCESSING":
269
  return obj
270
 
271
  if time.time() - start > timeout:
@@ -312,62 +296,46 @@ def compress_video_if_large(local_path: str, threshold_mb: int = 200):
312
  st.session_state["last_error"] = f"Video compression failed: {e}\n{traceback.format_exc()}"
313
  return local_path, False
314
 
315
- # ---- Responses API caller adapted for google-genai Responses ----
316
  def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300):
317
  key = get_effective_api_key()
318
  if not key:
319
  raise RuntimeError("No API key provided")
320
- if not HAS_GENAI or Responses is None:
321
- raise RuntimeError("Responses API not available; install google-genai SDK.")
322
- if genai_configure:
323
- genai_configure(api_key=key)
324
-
325
- file_name = file_name_or_id(processed)
326
- if not file_name:
327
  raise RuntimeError("Uploaded file missing name/id")
328
 
329
- # Build a minimal Responses.create call that attaches the video file reference.
330
- # The exact shape depends on google-genai; here we create a simple text + reference instruction.
331
- request = {
332
  "model": model_used,
333
- "input": [
334
- {"role": "system", "content": prompt_text},
335
- {"role": "user", "content": "Please summarize the attached video."}
336
- ],
337
- "attachments": [{"mime_type": "video/mp4", "uri": f"file:{file_name}"}],
338
  "max_output_tokens": max_tokens,
339
  "temperature": 0.2,
340
  }
341
-
342
- # Responses.create returns a response object/dict; attempt to extract text
343
- resp = Responses.create(**request)
344
- text = ""
345
- # support multiple response shapes
346
- if isinstance(resp, dict):
347
- # common shapes: resp['output'][0]['content'] or resp['candidates'][0]['content']
348
- out = resp.get("output") or resp.get("candidates")
349
- if isinstance(out, list) and out:
350
- first = out[0]
351
- if isinstance(first, dict):
352
- text = first.get("content") or first.get("text") or ""
353
  else:
354
- text = str(first)
355
  else:
356
- text = resp.get("content") or resp.get("text") or ""
357
- else:
358
- # SDK object: try attributes
359
- try:
360
- if hasattr(resp, "outputs"):
361
- outputs = getattr(resp, "outputs", None)
362
- if outputs:
363
- text = outputs[0].get("content") if isinstance(outputs, list) and isinstance(outputs[0], dict) else str(outputs[0])
364
- elif hasattr(resp, "text"):
365
- text = getattr(resp, "text", "")
366
- except Exception:
367
- text = str(resp)
368
- return text or ""
369
 
370
- # ---- Layout ----
371
  col1, col2 = st.columns([1, 3])
372
  with col1:
373
  generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
@@ -417,7 +385,6 @@ if st.session_state["videos"]:
417
  except Exception:
418
  pass
419
 
420
- # ---- Main generation flow ----
421
  if generate_now and not st.session_state.get("busy"):
422
  if not st.session_state.get("videos"):
423
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -430,8 +397,7 @@ if generate_now and not st.session_state.get("busy"):
430
  st.session_state["busy"] = True
431
  try:
432
  if HAS_GENAI and genai is not None:
433
- if genai_configure:
434
- genai_configure(api_key=key_to_use)
435
  except Exception:
436
  pass
437
 
@@ -453,7 +419,7 @@ if generate_now and not st.session_state.get("busy"):
453
 
454
  if reupload_needed:
455
  if not HAS_GENAI:
456
- raise RuntimeError("google-genai SDK not available; install it.")
457
  local_path = current_path
458
  upload_path, compressed = compress_video_if_large(local_path, threshold_mb=st.session_state.get("compress_threshold_mb", 200))
459
 
 
15
 
16
  load_dotenv()
17
 
18
+ # Optional phi integration
19
  try:
20
  from phi.agent import Agent
21
  from phi.model.google import Gemini
 
25
  Agent = Gemini = DuckDuckGo = None
26
  HAS_PHI = False
27
 
28
+ # Legacy google.generativeai SDK
29
  try:
30
+ import google.generativeai as genai
31
+ from google.generativeai import upload_file, get_file, responses # type: ignore
32
  HAS_GENAI = True
33
  except Exception:
34
  genai = None
35
+ upload_file = get_file = responses = None
 
36
  HAS_GENAI = False
37
 
38
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
39
  DATA_DIR = Path("./data")
40
  DATA_DIR.mkdir(exist_ok=True)
41
 
 
42
  MODEL_OPTIONS = [
43
  "gemini-2.5-flash",
44
  "gemini-2.5-flash-lite",
 
46
  "gemini-2.0-flash-lite",
47
  "custom",
48
  ]
 
49
  DEFAULT_MODEL = "gemini-2.0-flash-lite"
50
  DEFAULT_PROMPT = (
51
  "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
52
  "Keep language professional. Include a list of observations for notable events."
53
  )
54
 
 
55
  st.session_state.setdefault("videos", "")
56
  st.session_state.setdefault("loop_video", False)
57
  st.session_state.setdefault("uploaded_file", None)
 
69
  st.session_state.setdefault("generation_timeout", 300)
70
  st.session_state.setdefault("compress_threshold_mb", 200)
71
 
 
72
  def sanitize_filename(path_str: str):
73
  name = Path(path_str).name
74
  return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 
134
  if not key:
135
  return False
136
  try:
137
+ genai.configure(api_key=key)
 
138
  except Exception:
139
  pass
140
  return True
141
 
 
142
  _agent = None
143
  def maybe_create_agent(model_id: str):
144
  global _agent
 
149
  if _agent and st.session_state.get("last_model") == model_id:
150
  return _agent
151
  try:
152
+ genai.configure(api_key=key)
 
153
  _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
154
  st.session_state["last_model"] = model_id
155
  except Exception:
 
175
  clear_all_video_state()
176
  st.session_state["last_url_value"] = current_url
177
 
 
178
  st.sidebar.header("Video Input")
179
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
180
 
 
219
  {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
220
  ]
221
 
 
222
  def upload_video_sdk(filepath: str):
223
  key = get_effective_api_key()
224
  if not key:
225
  raise RuntimeError("No API key provided")
226
+ if not HAS_GENAI or upload_file is None:
227
+ raise RuntimeError("google.generativeai SDK not available; cannot upload")
228
+ genai.configure(api_key=key)
229
+ return upload_file(filepath)
 
 
 
 
230
 
231
  def wait_for_processed(file_obj, timeout: int = None):
232
  if timeout is None:
233
  timeout = st.session_state.get("processing_timeout", 900)
234
+ if not HAS_GENAI or get_file is None:
235
  return file_obj
236
  start = time.time()
237
+ name = file_name_or_id(file_obj)
238
+ if not name:
 
239
  return file_obj
240
  backoff = 1.0
241
  while True:
242
  try:
243
+ obj = get_file(name)
244
  except Exception as e:
245
  if time.time() - start > timeout:
246
  raise TimeoutError(f"Failed to fetch file status before timeout: {e}")
 
248
  backoff = min(backoff * 2, 8.0)
249
  continue
250
 
251
+ state = getattr(obj, "state", None)
252
+ if not state or getattr(state, "name", None) != "PROCESSING":
 
253
  return obj
254
 
255
  if time.time() - start > timeout:
 
296
  st.session_state["last_error"] = f"Video compression failed: {e}\n{traceback.format_exc()}"
297
  return local_path, False
298
 
 
299
  def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300):
300
  key = get_effective_api_key()
301
  if not key:
302
  raise RuntimeError("No API key provided")
303
+ if not HAS_GENAI or responses is None:
304
+ raise RuntimeError("Responses API not available; install google-generativeai SDK.")
305
+ genai.configure(api_key=key)
306
+ fname = file_name_or_id(processed)
307
+ if not fname:
 
 
308
  raise RuntimeError("Uploaded file missing name/id")
309
 
310
+ system_msg = {"role": "system", "content": prompt_text}
311
+ user_msg = {"role": "user", "content": "Please summarize the attached video."}
312
+ req = {
313
  "model": model_used,
314
+ "input": [system_msg, user_msg],
315
+ "files": [fname],
 
 
 
316
  "max_output_tokens": max_tokens,
317
  "temperature": 0.2,
318
  }
319
+ resp = responses.create(**req)
320
+ # extract text robustly
321
+ out = ""
322
+ try:
323
+ if isinstance(resp, dict):
324
+ candidates = resp.get("candidates") or resp.get("output") or []
325
+ if isinstance(candidates, list) and candidates:
326
+ c = candidates[0]
327
+ if isinstance(c, dict):
328
+ out = c.get("content") or c.get("text") or ""
329
+ else:
330
+ out = str(c)
331
  else:
332
+ out = resp.get("outputText") or resp.get("content") or resp.get("text") or ""
333
  else:
334
+ out = getattr(resp, "output_text", "") or getattr(resp, "text", "") or ""
335
+ except Exception:
336
+ out = str(resp)
337
+ return out or ""
 
 
 
 
 
 
 
 
 
338
 
 
339
  col1, col2 = st.columns([1, 3])
340
  with col1:
341
  generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
 
385
  except Exception:
386
  pass
387
 
 
388
  if generate_now and not st.session_state.get("busy"):
389
  if not st.session_state.get("videos"):
390
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
 
397
  st.session_state["busy"] = True
398
  try:
399
  if HAS_GENAI and genai is not None:
400
+ genai.configure(api_key=key_to_use)
 
401
  except Exception:
402
  pass
403
 
 
419
 
420
  if reupload_needed:
421
  if not HAS_GENAI:
422
+ raise RuntimeError("google-generativeai SDK not available; install it.")
423
  local_path = current_path
424
  upload_path, compressed = compress_video_if_large(local_path, threshold_mb=st.session_state.get("compress_threshold_mb", 200))
425