Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -177,8 +177,21 @@ Your primary task is to identify the main event in the video. Your description s
|
|
| 177 |
In daylight, an unmarked white van enters from the right side of the frame and parks at the curb. An adult male exits the driver's seat. The man is wearing a black hooded sweatshirt with the hood up, obscuring his face, dark pants, and white sneakers. He walks directly to the house, pulls a tool from his pocket, and begins prying at the front door lock. After approximately 15 seconds, the door opens, and the man enters the residence. Three minutes later, he exits the house carrying a laptop and a backpack. He quickly returns to the van and drives away, exiting the frame to the left. No other people or vehicles are visible. This event can be categorized as a burglary.
|
| 178 |
"""
|
| 179 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
@spaces.GPU(duration=900)
|
| 181 |
-
|
|
|
|
| 182 |
if not video_path:
|
| 183 |
return "No video provided."
|
| 184 |
|
|
@@ -204,13 +217,17 @@ def caption_video(video_path: str) -> str:
|
|
| 204 |
t = time.perf_counter()
|
| 205 |
safe_path, used_temp, tr_info = maybe_transcode(video_path)
|
| 206 |
T.mark("maybe_transcode_s", time.perf_counter()-t)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
|
| 208 |
messages = [
|
| 209 |
{
|
| 210 |
"role": "user",
|
| 211 |
"content": [
|
| 212 |
{"type": "video", "video": video_path, "fps": 1},
|
| 213 |
-
{"type": "text", "text":
|
| 214 |
],
|
| 215 |
}
|
| 216 |
]
|
|
@@ -310,14 +327,35 @@ def caption_video(video_path: str) -> str:
|
|
| 310 |
return caption
|
| 311 |
|
| 312 |
# Gradio 介面
|
| 313 |
-
demo = gr.Interface(
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
|
| 322 |
if __name__ == "__main__":
|
| 323 |
demo.launch(show_error=True)
|
|
|
|
| 177 |
In daylight, an unmarked white van enters from the right side of the frame and parks at the curb. An adult male exits the driver's seat. The man is wearing a black hooded sweatshirt with the hood up, obscuring his face, dark pants, and white sneakers. He walks directly to the house, pulls a tool from his pocket, and begins prying at the front door lock. After approximately 15 seconds, the door opens, and the man enters the residence. Three minutes later, he exits the house carrying a laptop and a backpack. He quickly returns to the van and drives away, exiting the frame to the left. No other people or vehicles are visible. This event can be categorized as a burglary.
|
| 178 |
"""
|
| 179 |
|
| 180 |
+
# [NEW] 自訂 prompt 的長度(避免爆 context)
|
| 181 |
+
MAX_PROMPT_CHARS = int(os.environ.get("MAX_PROMPT_CHARS", "8000"))
|
| 182 |
+
|
| 183 |
+
# [NEW] 清理/截斷使用者自訂 prompt
|
| 184 |
+
def _sanitize_prompt(p: str) -> str:
|
| 185 |
+
if not p:
|
| 186 |
+
return ""
|
| 187 |
+
p = p.strip()
|
| 188 |
+
if len(p) > MAX_PROMPT_CHARS:
|
| 189 |
+
p = p[:MAX_PROMPT_CHARS]
|
| 190 |
+
return p
|
| 191 |
+
|
| 192 |
@spaces.GPU(duration=900)
|
| 193 |
+
# [CHANGED] 增加可選參數 prompt_text;預設空字串代表客戶未提供
|
| 194 |
+
def caption_video(video_path: str, prompt_text: str = "") -> str:
|
| 195 |
if not video_path:
|
| 196 |
return "No video provided."
|
| 197 |
|
|
|
|
| 217 |
t = time.perf_counter()
|
| 218 |
safe_path, used_temp, tr_info = maybe_transcode(video_path)
|
| 219 |
T.mark("maybe_transcode_s", time.perf_counter()-t)
|
| 220 |
+
|
| 221 |
+
# [NEW] 若客戶沒輸入,就用後端的 PROBLEM_TEXT;否則使用自訂(但不顯示預設給客戶)
|
| 222 |
+
user_prompt = _sanitize_prompt(prompt_text)
|
| 223 |
+
selected_prompt = user_prompt if user_prompt else PROBLEM_TEXT
|
| 224 |
|
| 225 |
messages = [
|
| 226 |
{
|
| 227 |
"role": "user",
|
| 228 |
"content": [
|
| 229 |
{"type": "video", "video": video_path, "fps": 1},
|
| 230 |
+
{"type": "text", "text": selected_prompt}, # [CHANGED] 由 selected_prompt 餵入
|
| 231 |
],
|
| 232 |
}
|
| 233 |
]
|
|
|
|
| 327 |
return caption
|
| 328 |
|
| 329 |
# Gradio 介面
|
| 330 |
+
# demo = gr.Interface(
|
| 331 |
+
# fn=caption_video,
|
| 332 |
+
# inputs=gr.Video(label="Upload a video (mp4, mov, etc.)"),
|
| 333 |
+
# outputs=gr.Textbox(label="Caption", lines=18),
|
| 334 |
+
# title="Smart Home Video Caption (Private weights)",
|
| 335 |
+
# description="Upload a short clip to generate a factual surveillance-style caption.",
|
| 336 |
+
# allow_flagging="never",
|
| 337 |
+
# )
|
| 338 |
+
|
| 339 |
+
# [CHANGED] Gradio 介面:不再把預設 prompt 顯示給客戶
|
| 340 |
+
# - 第二個輸入改成「可選的自訂 prompt」文字框,value="",僅 placeholder 提示
|
| 341 |
+
# - 也可放在 Accordion 內,避免一般使用者注意到
|
| 342 |
+
with gr.Blocks(title="Smart Home Video Caption (Private weights)") as demo:
|
| 343 |
+
gr.Markdown("Upload a short clip to generate a factual surveillance-style caption.")
|
| 344 |
+
with gr.Row():
|
| 345 |
+
video_in = gr.Video(label="Upload a video (mp4, mov, etc.)")
|
| 346 |
+
with gr.Accordion("Advanced (optional custom prompt)", open=False): # [NEW]
|
| 347 |
+
prompt_in = gr.Textbox(
|
| 348 |
+
label="Custom prompt (optional)",
|
| 349 |
+
value="", # 空字串 => 客戶看不到預設內容
|
| 350 |
+
placeholder="Leave empty to use the default internal prompt",
|
| 351 |
+
lines=10,
|
| 352 |
+
show_copy_button=True
|
| 353 |
+
)
|
| 354 |
+
caption_out = gr.Textbox(label="Caption", lines=18)
|
| 355 |
+
|
| 356 |
+
run_btn = gr.Button("Run")
|
| 357 |
+
run_btn.click(fn=caption_video, inputs=[video_in, prompt_in], outputs=[caption_out])
|
| 358 |
+
|
| 359 |
|
| 360 |
if __name__ == "__main__":
|
| 361 |
demo.launch(show_error=True)
|