Spaces:
Sleeping
Sleeping
CB commited on
Update streamlit_app.py
Browse files- streamlit_app.py +119 -68
streamlit_app.py
CHANGED
|
@@ -36,6 +36,7 @@ st.set_page_config(page_title="Generate the story of videos", layout="wide")
|
|
| 36 |
DATA_DIR = Path("./data")
|
| 37 |
DATA_DIR.mkdir(exist_ok=True)
|
| 38 |
|
|
|
|
| 39 |
st.session_state.setdefault("videos", "")
|
| 40 |
st.session_state.setdefault("loop_video", False)
|
| 41 |
st.session_state.setdefault("uploaded_file", None)
|
|
@@ -46,6 +47,9 @@ st.session_state.setdefault("analysis_out", "")
|
|
| 46 |
st.session_state.setdefault("last_error", "")
|
| 47 |
st.session_state.setdefault("file_hash", None)
|
| 48 |
st.session_state.setdefault("fast_mode", False)
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
def sanitize_filename(path_str: str):
|
| 51 |
name = Path(path_str).name
|
|
@@ -101,48 +105,36 @@ def file_name_or_id(file_obj):
|
|
| 101 |
return file_obj.get("name") or file_obj.get("id")
|
| 102 |
return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
|
| 103 |
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
try:
|
| 106 |
-
genai.configure(api_key=
|
| 107 |
except Exception:
|
|
|
|
| 108 |
pass
|
| 109 |
-
|
| 110 |
-
st.sidebar.header("Video Input")
|
| 111 |
-
st.sidebar.text_input("Video URL", key="url", placeholder="https://")
|
| 112 |
-
|
| 113 |
-
settings_exp = st.sidebar.expander("Settings", expanded=False)
|
| 114 |
-
env_api_key = os.getenv("GOOGLE_API_KEY", "")
|
| 115 |
-
API_KEY = settings_exp.text_input("Google API Key", value=env_api_key, placeholder="Set GOOGLE_API_KEY in .env or enter here", type="password")
|
| 116 |
-
model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite")
|
| 117 |
-
model_id = model_input.strip() or "gemini-2.0-flash-lite"
|
| 118 |
-
model_arg = model_id if not model_id.startswith("models/") else model_id.split("/", 1)[1]
|
| 119 |
-
default_prompt = (
|
| 120 |
-
"Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
|
| 121 |
-
"Use vivid, anatomically rich descriptions with numeric estimates for measurements. Include a list of detailed anatomical observations and measurements. "
|
| 122 |
-
"Adopt a playful, inquisitive persona and ensure the report is engaging and informative."
|
| 123 |
-
)
|
| 124 |
-
analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
|
| 125 |
-
settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
|
| 126 |
-
settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
|
| 127 |
-
|
| 128 |
-
if not API_KEY and not os.getenv("GOOGLE_API_KEY"):
|
| 129 |
-
settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
|
| 130 |
-
|
| 131 |
-
safety_settings = [
|
| 132 |
-
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
|
| 133 |
-
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
|
| 134 |
-
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"},
|
| 135 |
-
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
|
| 136 |
-
]
|
| 137 |
|
| 138 |
_agent = None
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
try:
|
| 141 |
-
|
| 142 |
-
genai.configure(api_key=key_to_use)
|
| 143 |
_agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
|
|
|
|
| 144 |
except Exception:
|
| 145 |
_agent = None
|
|
|
|
| 146 |
|
| 147 |
def clear_all_video_state():
|
| 148 |
st.session_state.pop("uploaded_file", None)
|
|
@@ -165,46 +157,43 @@ if current_url != st.session_state.get("last_url_value"):
|
|
| 165 |
clear_all_video_state()
|
| 166 |
st.session_state["last_url_value"] = current_url
|
| 167 |
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
vpw = st.session_state.get("video-password", "")
|
| 171 |
-
path = download_video_ytdlp(st.session_state.get("url", ""), str(DATA_DIR), vpw)
|
| 172 |
-
st.session_state["videos"] = path
|
| 173 |
-
st.session_state["last_loaded_path"] = path
|
| 174 |
-
st.session_state.pop("uploaded_file", None)
|
| 175 |
-
st.session_state.pop("processed_file", None)
|
| 176 |
-
st.session_state["file_hash"] = file_sha256(path)
|
| 177 |
-
except Exception as e:
|
| 178 |
-
st.sidebar.error(f"Failed to load video: {e}")
|
| 179 |
-
|
| 180 |
-
if st.session_state["videos"]:
|
| 181 |
-
try:
|
| 182 |
-
st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
|
| 183 |
-
except Exception:
|
| 184 |
-
st.sidebar.write("Couldn't preview video")
|
| 185 |
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
-
|
| 191 |
-
|
|
|
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
st.download_button("Download Video", data=vf, file_name=sanitize_filename(st.session_state["videos"]), mime="video/mp4", use_container_width=True)
|
| 196 |
-
except Exception:
|
| 197 |
-
st.sidebar.error("Failed to prepare download")
|
| 198 |
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
|
| 201 |
def upload_video_sdk(filepath: str):
|
| 202 |
-
key =
|
| 203 |
if not key:
|
| 204 |
raise RuntimeError("No API key provided")
|
| 205 |
if not HAS_GENAI or upload_file is None:
|
| 206 |
raise RuntimeError("google.generativeai SDK not available; cannot upload")
|
| 207 |
genai.configure(api_key=key)
|
|
|
|
| 208 |
return upload_file(filepath)
|
| 209 |
|
| 210 |
def wait_for_processed(file_obj, timeout=180):
|
|
@@ -246,20 +235,75 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
|
|
| 246 |
|
| 247 |
col1, col2 = st.columns([1, 3])
|
| 248 |
with col1:
|
| 249 |
-
generate_now = st.button("Generate the story", type="primary")
|
| 250 |
with col2:
|
| 251 |
pass
|
| 252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
if generate_now and not st.session_state.get("busy"):
|
| 254 |
if not st.session_state.get("videos"):
|
| 255 |
st.error("No video loaded. Use 'Load Video' in the sidebar.")
|
| 256 |
else:
|
| 257 |
-
key_to_use =
|
| 258 |
if not key_to_use:
|
| 259 |
st.error("Google API key not set.")
|
| 260 |
else:
|
| 261 |
try:
|
| 262 |
st.session_state["busy"] = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
processed = st.session_state.get("processed_file")
|
| 264 |
current_path = st.session_state.get("videos")
|
| 265 |
try:
|
|
@@ -302,15 +346,21 @@ if generate_now and not st.session_state.get("busy"):
|
|
| 302 |
|
| 303 |
out = ""
|
| 304 |
if st.session_state.get("fast_mode"):
|
| 305 |
-
model_used =
|
| 306 |
max_tokens = 512
|
| 307 |
else:
|
| 308 |
-
model_used =
|
| 309 |
max_tokens = 1024
|
| 310 |
|
| 311 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
with st.spinner("Generating description via Agent..."):
|
| 313 |
-
response =
|
| 314 |
out = getattr(response, "content", None) or getattr(response, "outputText", None) or str(response)
|
| 315 |
else:
|
| 316 |
if not HAS_GENAI or genai is None:
|
|
@@ -379,6 +429,7 @@ if generate_now and not st.session_state.get("busy"):
|
|
| 379 |
st.session_state["last_error"] = ""
|
| 380 |
st.subheader("Analysis Result")
|
| 381 |
st.markdown(out)
|
|
|
|
| 382 |
except Exception as e:
|
| 383 |
st.session_state["last_error"] = str(e)
|
| 384 |
st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
|
|
|
|
| 36 |
DATA_DIR = Path("./data")
|
| 37 |
DATA_DIR.mkdir(exist_ok=True)
|
| 38 |
|
| 39 |
+
# Session defaults
|
| 40 |
st.session_state.setdefault("videos", "")
|
| 41 |
st.session_state.setdefault("loop_video", False)
|
| 42 |
st.session_state.setdefault("uploaded_file", None)
|
|
|
|
| 47 |
st.session_state.setdefault("last_error", "")
|
| 48 |
st.session_state.setdefault("file_hash", None)
|
| 49 |
st.session_state.setdefault("fast_mode", False)
|
| 50 |
+
st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
|
| 51 |
+
st.session_state.setdefault("last_model", "")
|
| 52 |
+
st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
|
| 53 |
|
| 54 |
def sanitize_filename(path_str: str):
|
| 55 |
name = Path(path_str).name
|
|
|
|
| 105 |
return file_obj.get("name") or file_obj.get("id")
|
| 106 |
return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
|
| 107 |
|
| 108 |
+
def get_effective_api_key():
|
| 109 |
+
return st.session_state.get("api_key") or os.getenv("GOOGLE_API_KEY")
|
| 110 |
+
|
| 111 |
+
def configure_genai_if_needed():
|
| 112 |
+
key = get_effective_api_key()
|
| 113 |
+
if not key:
|
| 114 |
+
return False
|
| 115 |
try:
|
| 116 |
+
genai.configure(api_key=key)
|
| 117 |
except Exception:
|
| 118 |
+
# ignore here; callers will handle failures
|
| 119 |
pass
|
| 120 |
+
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
_agent = None
|
| 123 |
+
def maybe_create_agent(model_id: str):
|
| 124 |
+
global _agent
|
| 125 |
+
key = get_effective_api_key()
|
| 126 |
+
if not (HAS_PHI and HAS_GENAI and key):
|
| 127 |
+
_agent = None
|
| 128 |
+
return None
|
| 129 |
+
if _agent and st.session_state.get("last_model") == model_id:
|
| 130 |
+
return _agent
|
| 131 |
try:
|
| 132 |
+
genai.configure(api_key=key)
|
|
|
|
| 133 |
_agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
|
| 134 |
+
st.session_state["last_model"] = model_id
|
| 135 |
except Exception:
|
| 136 |
_agent = None
|
| 137 |
+
return _agent
|
| 138 |
|
| 139 |
def clear_all_video_state():
|
| 140 |
st.session_state.pop("uploaded_file", None)
|
|
|
|
| 157 |
clear_all_video_state()
|
| 158 |
st.session_state["last_url_value"] = current_url
|
| 159 |
|
| 160 |
+
st.sidebar.header("Video Input")
|
| 161 |
+
st.sidebar.text_input("Video URL", key="url", placeholder="https://")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
+
settings_exp = st.sidebar.expander("Settings", expanded=False)
|
| 164 |
+
model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite", key="model_input")
|
| 165 |
+
# session API key widget (session-first, fallback to .env)
|
| 166 |
+
settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
|
| 167 |
+
default_prompt = (
|
| 168 |
+
"Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
|
| 169 |
+
"Keep language professional and avoid anatomically explicit or sensitive detail. Include a list of observations and any timestamps for notable events."
|
| 170 |
+
)
|
| 171 |
+
analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
|
| 172 |
+
settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
|
| 173 |
+
settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
|
| 174 |
|
| 175 |
+
# Show which key is active
|
| 176 |
+
key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
|
| 177 |
+
settings_exp.caption(f"Using API key from: **{key_source}**")
|
| 178 |
|
| 179 |
+
if not get_effective_api_key():
|
| 180 |
+
settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
+
safety_settings = [
|
| 183 |
+
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
|
| 184 |
+
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
|
| 185 |
+
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"},
|
| 186 |
+
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
|
| 187 |
+
]
|
| 188 |
|
| 189 |
def upload_video_sdk(filepath: str):
|
| 190 |
+
key = get_effective_api_key()
|
| 191 |
if not key:
|
| 192 |
raise RuntimeError("No API key provided")
|
| 193 |
if not HAS_GENAI or upload_file is None:
|
| 194 |
raise RuntimeError("google.generativeai SDK not available; cannot upload")
|
| 195 |
genai.configure(api_key=key)
|
| 196 |
+
# upload_file may stream; wrap to update session progress if supported
|
| 197 |
return upload_file(filepath)
|
| 198 |
|
| 199 |
def wait_for_processed(file_obj, timeout=180):
|
|
|
|
| 235 |
|
| 236 |
col1, col2 = st.columns([1, 3])
|
| 237 |
with col1:
|
| 238 |
+
generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
|
| 239 |
with col2:
|
| 240 |
pass
|
| 241 |
|
| 242 |
+
if st.sidebar.button("Load Video", use_container_width=True):
|
| 243 |
+
try:
|
| 244 |
+
vpw = st.session_state.get("video-password", "")
|
| 245 |
+
path = download_video_ytdlp(st.session_state.get("url", ""), str(DATA_DIR), vpw)
|
| 246 |
+
st.session_state["videos"] = path
|
| 247 |
+
st.session_state["last_loaded_path"] = path
|
| 248 |
+
st.session_state.pop("uploaded_file", None)
|
| 249 |
+
st.session_state.pop("processed_file", None)
|
| 250 |
+
st.session_state["file_hash"] = file_sha256(path)
|
| 251 |
+
except Exception as e:
|
| 252 |
+
st.sidebar.error(f"Failed to load video: {e}")
|
| 253 |
+
|
| 254 |
+
if st.session_state["videos"]:
|
| 255 |
+
try:
|
| 256 |
+
st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
|
| 257 |
+
except Exception:
|
| 258 |
+
st.sidebar.write("Couldn't preview video")
|
| 259 |
+
|
| 260 |
+
with st.sidebar.expander("Options", expanded=False):
|
| 261 |
+
loop_checkbox = st.checkbox("Enable Loop", value=st.session_state.get("loop_video", False))
|
| 262 |
+
st.session_state["loop_video"] = loop_checkbox
|
| 263 |
+
|
| 264 |
+
if st.button("Clear Video(s)"):
|
| 265 |
+
clear_all_video_state()
|
| 266 |
+
|
| 267 |
+
try:
|
| 268 |
+
with open(st.session_state["videos"], "rb") as vf:
|
| 269 |
+
st.download_button("Download Video", data=vf, file_name=sanitize_filename(st.session_state["videos"]), mime="video/mp4", use_container_width=True)
|
| 270 |
+
except Exception:
|
| 271 |
+
st.sidebar.error("Failed to prepare download")
|
| 272 |
+
|
| 273 |
+
st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
|
| 274 |
+
# show file size and compression suggestion
|
| 275 |
+
try:
|
| 276 |
+
file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
|
| 277 |
+
st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
|
| 278 |
+
if file_size_mb > 50 and not st.session_state.get("fast_mode", False):
|
| 279 |
+
st.sidebar.warning("Large file detected — consider enabling Fast mode or compression.", icon="⚠️")
|
| 280 |
+
st.session_state["fast_mode"] = True
|
| 281 |
+
except Exception:
|
| 282 |
+
pass
|
| 283 |
+
|
| 284 |
if generate_now and not st.session_state.get("busy"):
|
| 285 |
if not st.session_state.get("videos"):
|
| 286 |
st.error("No video loaded. Use 'Load Video' in the sidebar.")
|
| 287 |
else:
|
| 288 |
+
key_to_use = get_effective_api_key()
|
| 289 |
if not key_to_use:
|
| 290 |
st.error("Google API key not set.")
|
| 291 |
else:
|
| 292 |
try:
|
| 293 |
st.session_state["busy"] = True
|
| 294 |
+
# ensure genai is configured now
|
| 295 |
+
try:
|
| 296 |
+
genai.configure(api_key=key_to_use)
|
| 297 |
+
except Exception:
|
| 298 |
+
pass
|
| 299 |
+
|
| 300 |
+
# recreate/clear agent if key or model changed
|
| 301 |
+
model_id = (st.session_state.get("model_input") or "gemini-2.0-flash-lite").strip()
|
| 302 |
+
if st.session_state.get("last_model") != model_id:
|
| 303 |
+
# clear cached agent to rebuild with new model/key
|
| 304 |
+
st.session_state["last_model"] = ""
|
| 305 |
+
maybe_create_agent(model_id)
|
| 306 |
+
|
| 307 |
processed = st.session_state.get("processed_file")
|
| 308 |
current_path = st.session_state.get("videos")
|
| 309 |
try:
|
|
|
|
| 346 |
|
| 347 |
out = ""
|
| 348 |
if st.session_state.get("fast_mode"):
|
| 349 |
+
model_used = model_id if model_id else "gemini-2.0-flash-lite"
|
| 350 |
max_tokens = 512
|
| 351 |
else:
|
| 352 |
+
model_used = model_id
|
| 353 |
max_tokens = 1024
|
| 354 |
|
| 355 |
+
# cost/tokens estimate (very rough)
|
| 356 |
+
est_tokens = max_tokens
|
| 357 |
+
est_cost_caption = f"Est. max tokens: {est_tokens}"
|
| 358 |
+
|
| 359 |
+
# Generate via Agent if available
|
| 360 |
+
agent = maybe_create_agent(model_used)
|
| 361 |
+
if agent:
|
| 362 |
with st.spinner("Generating description via Agent..."):
|
| 363 |
+
response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
|
| 364 |
out = getattr(response, "content", None) or getattr(response, "outputText", None) or str(response)
|
| 365 |
else:
|
| 366 |
if not HAS_GENAI or genai is None:
|
|
|
|
| 429 |
st.session_state["last_error"] = ""
|
| 430 |
st.subheader("Analysis Result")
|
| 431 |
st.markdown(out)
|
| 432 |
+
st.caption(est_cost_caption)
|
| 433 |
except Exception as e:
|
| 434 |
st.session_state["last_error"] = str(e)
|
| 435 |
st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
|