CB commited on
Commit
4633b20
·
verified ·
1 Parent(s): ef36655

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +11 -41
streamlit_app.py CHANGED
@@ -1,11 +1,10 @@
1
  # streamlit_app.py
2
  import os
3
  import time
4
- import json
5
  import string
 
6
  from glob import glob
7
  from pathlib import Path
8
- import hashlib
9
  from difflib import SequenceMatcher
10
 
11
  import yt_dlp
@@ -37,7 +36,6 @@ st.set_page_config(page_title="Generate the story of videos", layout="wide")
37
  DATA_DIR = Path("./data")
38
  DATA_DIR.mkdir(exist_ok=True)
39
 
40
- # Session state defaults
41
  st.session_state.setdefault("videos", "")
42
  st.session_state.setdefault("loop_video", False)
43
  st.session_state.setdefault("uploaded_file", None)
@@ -49,7 +47,6 @@ st.session_state.setdefault("last_error", "")
49
  st.session_state.setdefault("file_hash", None)
50
  st.session_state.setdefault("fast_mode", False)
51
 
52
- # Helpers
53
  def sanitize_filename(path_str: str):
54
  name = Path(path_str).name
55
  return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
@@ -104,14 +101,12 @@ def file_name_or_id(file_obj):
104
  return file_obj.get("name") or file_obj.get("id")
105
  return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
106
 
107
- # Configure Google SDK if key present
108
  if os.getenv("GOOGLE_API_KEY") and HAS_GENAI:
109
  try:
110
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
111
  except Exception:
112
  pass
113
 
114
- # UI: Sidebar inputs
115
  st.sidebar.header("Video Input")
116
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
117
 
@@ -121,7 +116,12 @@ API_KEY = settings_exp.text_input("Google API Key", value=env_api_key, placehold
121
  model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite")
122
  model_id = model_input.strip() or "gemini-2.0-flash-lite"
123
  model_arg = model_id if not model_id.startswith("models/") else model_id.split("/", 1)[1]
124
- analysis_prompt = settings_exp.text_area("Enter analysis", value="watch entire video and describe", height=120)
 
 
 
 
 
125
  settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
126
  settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
127
 
@@ -135,7 +135,6 @@ safety_settings = [
135
  {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
136
  ]
137
 
138
- # Build Agent if available
139
  _agent = None
140
  if HAS_PHI and HAS_GENAI and (API_KEY or os.getenv("GOOGLE_API_KEY")):
141
  try:
@@ -159,7 +158,6 @@ def clear_all_video_state():
159
  except Exception:
160
  pass
161
 
162
- # Track URL changes
163
  if "last_url_value" not in st.session_state:
164
  st.session_state["last_url_value"] = st.session_state.get("url", "")
165
  current_url = st.session_state.get("url", "")
@@ -167,7 +165,6 @@ if current_url != st.session_state.get("last_url_value"):
167
  clear_all_video_state()
168
  st.session_state["last_url_value"] = current_url
169
 
170
- # Load video button
171
  if st.sidebar.button("Load Video", use_container_width=True):
172
  try:
173
  vpw = st.session_state.get("video-password", "")
@@ -180,7 +177,6 @@ if st.sidebar.button("Load Video", use_container_width=True):
180
  except Exception as e:
181
  st.sidebar.error(f"Failed to load video: {e}")
182
 
183
- # Sidebar preview & options
184
  if st.session_state["videos"]:
185
  try:
186
  st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
@@ -202,7 +198,6 @@ if st.session_state["videos"]:
202
 
203
  st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
204
 
205
- # Upload helpers
206
  def upload_video_sdk(filepath: str):
207
  key = API_KEY if API_KEY else os.getenv("GOOGLE_API_KEY")
208
  if not key:
@@ -230,7 +225,6 @@ def wait_for_processed(file_obj, timeout=180):
230
  time.sleep(backoff)
231
  backoff = min(backoff * 2, 8.0)
232
 
233
- # Robust prompt-echo removal
234
  def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_threshold: float = 0.68):
235
  if not prompt or not text:
236
  return text
@@ -239,13 +233,10 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
239
  b = " ".join(b_full[:check_len].lower().split())
240
  ratio = SequenceMatcher(None, a, b).ratio()
241
  if ratio >= ratio_threshold:
242
- # remove the approximate prefix by length of prompt, but be conservative
243
  cut = min(len(b_full), max(int(len(prompt) * 0.9), len(a)))
244
  new_text = b_full[cut:].lstrip(" \n:-")
245
- # If result is empty or too small, return original to avoid data loss
246
  if len(new_text) >= 3:
247
  return new_text
248
- # also remove common placeholder prefixes
249
  placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
250
  low = b_full.strip().lower()
251
  for ph in placeholders:
@@ -253,18 +244,13 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
253
  return b_full[len(ph):].lstrip(" \n:-")
254
  return text
255
 
256
- # Main UI layout
257
  col1, col2 = st.columns([1, 3])
258
  with col1:
259
- if st.session_state.get("busy"):
260
- st.button("Generate the story", disabled=True)
261
- else:
262
- generate_now = st.button("Generate the story", type="primary")
263
  with col2:
264
  pass
265
 
266
- # Generation flow
267
- if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
268
  if not st.session_state.get("videos"):
269
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
270
  else:
@@ -275,9 +261,7 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
275
  try:
276
  st.session_state["busy"] = True
277
  processed = st.session_state.get("processed_file")
278
- # Use file hash to determine if we must re-upload
279
  current_path = st.session_state.get("videos")
280
- current_hash = None
281
  try:
282
  current_hash = file_sha256(current_path) if current_path and os.path.exists(current_path) else None
283
  except Exception:
@@ -291,7 +275,6 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
291
  if not HAS_GENAI:
292
  raise RuntimeError("google.generativeai SDK not available; install it.")
293
  local_path = current_path
294
- # Fast mode overrides compression behavior
295
  fast_mode = st.session_state.get("fast_mode", False)
296
  upload_path = local_path
297
  try:
@@ -299,11 +282,9 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
299
  except Exception:
300
  file_size_mb = 0
301
 
302
- # Only compress if large and not in fast mode
303
  if not fast_mode and file_size_mb > 50:
304
  compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
305
  try:
306
- # Use faster preset when focusing on speed
307
  preset = "veryfast" if fast_mode else "fast"
308
  upload_path = compress_video(local_path, compressed_path, crf=28, preset=preset)
309
  except Exception:
@@ -317,10 +298,9 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
317
  st.session_state["last_loaded_path"] = current_path
318
  st.session_state["file_hash"] = current_hash
319
 
320
- prompt_text = (analysis_prompt.strip() or "Describe this video in vivid detail.").strip()
321
 
322
  out = ""
323
- # Use lighter model/tokens in fast mode
324
  if st.session_state.get("fast_mode"):
325
  model_used = model_arg if model_arg else "gemini-2.0-flash-lite"
326
  max_tokens = 512
@@ -370,13 +350,10 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
370
  txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
371
  if txt:
372
  text_pieces.append(txt)
373
-
374
  if not text_pieces:
375
  top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
376
  if top_text:
377
  text_pieces.append(top_text)
378
-
379
- # dedupe preserving order
380
  seen = set()
381
  filtered = []
382
  for t in text_pieces:
@@ -385,23 +362,17 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
385
  seen.add(t)
386
  out = "\n\n".join(filtered)
387
 
388
- # Remove prompt echo robustly
389
  if out:
390
  out = remove_prompt_echo(prompt_text, out)
391
-
392
- # fallback: trim if startswith prompt exactly (legacy)
393
  p = prompt_text
394
  if p and out.strip().lower().startswith(p.lower()):
395
  out = out.strip()[len(p):].lstrip(" \n:-")
396
-
397
- # strip placeholders
398
  placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
399
  low = out.strip().lower()
400
  for ph in placeholders:
401
  if low.startswith(ph):
402
  out = out.strip()[len(ph):].lstrip(" \n:-")
403
  break
404
-
405
  out = out.strip()
406
 
407
  st.session_state["analysis_out"] = out
@@ -414,7 +385,6 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
414
  finally:
415
  st.session_state["busy"] = False
416
 
417
- # Display cached analysis if available (avoid duplicate on same run)
418
  if st.session_state.get("analysis_out"):
419
  just_loaded_same = (st.session_state.get("last_loaded_path") == st.session_state.get("videos"))
420
  if not just_loaded_same:
@@ -423,4 +393,4 @@ if st.session_state.get("analysis_out"):
423
 
424
  if st.session_state.get("last_error"):
425
  with st.expander("Last Error", expanded=False):
426
- st.write(st.session_state.get("last_error"))
 
1
  # streamlit_app.py
2
  import os
3
  import time
 
4
  import string
5
+ import hashlib
6
  from glob import glob
7
  from pathlib import Path
 
8
  from difflib import SequenceMatcher
9
 
10
  import yt_dlp
 
36
  DATA_DIR = Path("./data")
37
  DATA_DIR.mkdir(exist_ok=True)
38
 
 
39
  st.session_state.setdefault("videos", "")
40
  st.session_state.setdefault("loop_video", False)
41
  st.session_state.setdefault("uploaded_file", None)
 
47
  st.session_state.setdefault("file_hash", None)
48
  st.session_state.setdefault("fast_mode", False)
49
 
 
50
  def sanitize_filename(path_str: str):
51
  name = Path(path_str).name
52
  return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 
101
  return file_obj.get("name") or file_obj.get("id")
102
  return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
103
 
 
104
  if os.getenv("GOOGLE_API_KEY") and HAS_GENAI:
105
  try:
106
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
107
  except Exception:
108
  pass
109
 
 
110
  st.sidebar.header("Video Input")
111
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
112
 
 
116
  model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite")
117
  model_id = model_input.strip() or "gemini-2.0-flash-lite"
118
  model_arg = model_id if not model_id.startswith("models/") else model_id.split("/", 1)[1]
119
+ default_prompt = (
120
+ "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
121
+ "Use vivid, anatomically rich descriptions with numeric estimates for measurements. Include a list of detailed anatomical observations and measurements. "
122
+ "Adopt a playful, inquisitive persona and ensure the report is engaging and informative."
123
+ )
124
+ analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
125
  settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
126
  settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
127
 
 
135
  {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
136
  ]
137
 
 
138
  _agent = None
139
  if HAS_PHI and HAS_GENAI and (API_KEY or os.getenv("GOOGLE_API_KEY")):
140
  try:
 
158
  except Exception:
159
  pass
160
 
 
161
  if "last_url_value" not in st.session_state:
162
  st.session_state["last_url_value"] = st.session_state.get("url", "")
163
  current_url = st.session_state.get("url", "")
 
165
  clear_all_video_state()
166
  st.session_state["last_url_value"] = current_url
167
 
 
168
  if st.sidebar.button("Load Video", use_container_width=True):
169
  try:
170
  vpw = st.session_state.get("video-password", "")
 
177
  except Exception as e:
178
  st.sidebar.error(f"Failed to load video: {e}")
179
 
 
180
  if st.session_state["videos"]:
181
  try:
182
  st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
 
198
 
199
  st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
200
 
 
201
  def upload_video_sdk(filepath: str):
202
  key = API_KEY if API_KEY else os.getenv("GOOGLE_API_KEY")
203
  if not key:
 
225
  time.sleep(backoff)
226
  backoff = min(backoff * 2, 8.0)
227
 
 
228
  def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_threshold: float = 0.68):
229
  if not prompt or not text:
230
  return text
 
233
  b = " ".join(b_full[:check_len].lower().split())
234
  ratio = SequenceMatcher(None, a, b).ratio()
235
  if ratio >= ratio_threshold:
 
236
  cut = min(len(b_full), max(int(len(prompt) * 0.9), len(a)))
237
  new_text = b_full[cut:].lstrip(" \n:-")
 
238
  if len(new_text) >= 3:
239
  return new_text
 
240
  placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
241
  low = b_full.strip().lower()
242
  for ph in placeholders:
 
244
  return b_full[len(ph):].lstrip(" \n:-")
245
  return text
246
 
 
247
  col1, col2 = st.columns([1, 3])
248
  with col1:
249
+ generate_now = st.button("Generate the story", type="primary")
 
 
 
250
  with col2:
251
  pass
252
 
253
+ if generate_now and not st.session_state.get("busy"):
 
254
  if not st.session_state.get("videos"):
255
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
256
  else:
 
261
  try:
262
  st.session_state["busy"] = True
263
  processed = st.session_state.get("processed_file")
 
264
  current_path = st.session_state.get("videos")
 
265
  try:
266
  current_hash = file_sha256(current_path) if current_path and os.path.exists(current_path) else None
267
  except Exception:
 
275
  if not HAS_GENAI:
276
  raise RuntimeError("google.generativeai SDK not available; install it.")
277
  local_path = current_path
 
278
  fast_mode = st.session_state.get("fast_mode", False)
279
  upload_path = local_path
280
  try:
 
282
  except Exception:
283
  file_size_mb = 0
284
 
 
285
  if not fast_mode and file_size_mb > 50:
286
  compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
287
  try:
 
288
  preset = "veryfast" if fast_mode else "fast"
289
  upload_path = compress_video(local_path, compressed_path, crf=28, preset=preset)
290
  except Exception:
 
298
  st.session_state["last_loaded_path"] = current_path
299
  st.session_state["file_hash"] = current_hash
300
 
301
+ prompt_text = (analysis_prompt.strip() or default_prompt).strip()
302
 
303
  out = ""
 
304
  if st.session_state.get("fast_mode"):
305
  model_used = model_arg if model_arg else "gemini-2.0-flash-lite"
306
  max_tokens = 512
 
350
  txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
351
  if txt:
352
  text_pieces.append(txt)
 
353
  if not text_pieces:
354
  top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
355
  if top_text:
356
  text_pieces.append(top_text)
 
 
357
  seen = set()
358
  filtered = []
359
  for t in text_pieces:
 
362
  seen.add(t)
363
  out = "\n\n".join(filtered)
364
 
 
365
  if out:
366
  out = remove_prompt_echo(prompt_text, out)
 
 
367
  p = prompt_text
368
  if p and out.strip().lower().startswith(p.lower()):
369
  out = out.strip()[len(p):].lstrip(" \n:-")
 
 
370
  placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
371
  low = out.strip().lower()
372
  for ph in placeholders:
373
  if low.startswith(ph):
374
  out = out.strip()[len(ph):].lstrip(" \n:-")
375
  break
 
376
  out = out.strip()
377
 
378
  st.session_state["analysis_out"] = out
 
385
  finally:
386
  st.session_state["busy"] = False
387
 
 
388
  if st.session_state.get("analysis_out"):
389
  just_loaded_same = (st.session_state.get("last_loaded_path") == st.session_state.get("videos"))
390
  if not just_loaded_same:
 
393
 
394
  if st.session_state.get("last_error"):
395
  with st.expander("Last Error", expanded=False):
396
+ st.write(st.session_state.get("last_error"))