eddywu commited on
Commit
7dcdb8f
·
verified ·
1 Parent(s): 2063b66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -10
app.py CHANGED
@@ -177,8 +177,21 @@ Your primary task is to identify the main event in the video. Your description s
177
  In daylight, an unmarked white van enters from the right side of the frame and parks at the curb. An adult male exits the driver's seat. The man is wearing a black hooded sweatshirt with the hood up, obscuring his face, dark pants, and white sneakers. He walks directly to the house, pulls a tool from his pocket, and begins prying at the front door lock. After approximately 15 seconds, the door opens, and the man enters the residence. Three minutes later, he exits the house carrying a laptop and a backpack. He quickly returns to the van and drives away, exiting the frame to the left. No other people or vehicles are visible. This event can be categorized as a burglary.
178
  """
179
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  @spaces.GPU(duration=900)
181
- def caption_video(video_path: str) -> str:
 
182
  if not video_path:
183
  return "No video provided."
184
 
@@ -204,13 +217,17 @@ def caption_video(video_path: str) -> str:
204
  t = time.perf_counter()
205
  safe_path, used_temp, tr_info = maybe_transcode(video_path)
206
  T.mark("maybe_transcode_s", time.perf_counter()-t)
 
 
 
 
207
 
208
  messages = [
209
  {
210
  "role": "user",
211
  "content": [
212
  {"type": "video", "video": video_path, "fps": 1},
213
- {"type": "text", "text": PROBLEM_TEXT},
214
  ],
215
  }
216
  ]
@@ -310,14 +327,35 @@ def caption_video(video_path: str) -> str:
310
  return caption
311
 
312
  # Gradio 介面
313
- demo = gr.Interface(
314
- fn=caption_video,
315
- inputs=gr.Video(label="Upload a video (mp4, mov, etc.)"),
316
- outputs=gr.Textbox(label="Caption", lines=18),
317
- title="Smart Home Video Caption (Private weights)",
318
- description="Upload a short clip to generate a factual surveillance-style caption.",
319
- allow_flagging="never",
320
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
 
322
  if __name__ == "__main__":
323
  demo.launch(show_error=True)
 
177
  In daylight, an unmarked white van enters from the right side of the frame and parks at the curb. An adult male exits the driver's seat. The man is wearing a black hooded sweatshirt with the hood up, obscuring his face, dark pants, and white sneakers. He walks directly to the house, pulls a tool from his pocket, and begins prying at the front door lock. After approximately 15 seconds, the door opens, and the man enters the residence. Three minutes later, he exits the house carrying a laptop and a backpack. He quickly returns to the van and drives away, exiting the frame to the left. No other people or vehicles are visible. This event can be categorized as a burglary.
178
  """
179
 
180
+ # [NEW] 自訂 prompt 的長度(避免爆 context)
181
+ MAX_PROMPT_CHARS = int(os.environ.get("MAX_PROMPT_CHARS", "8000"))
182
+
183
+ # [NEW] 清理/截斷使用者自訂 prompt
184
+ def _sanitize_prompt(p: str) -> str:
185
+ if not p:
186
+ return ""
187
+ p = p.strip()
188
+ if len(p) > MAX_PROMPT_CHARS:
189
+ p = p[:MAX_PROMPT_CHARS]
190
+ return p
191
+
192
  @spaces.GPU(duration=900)
193
+ # [CHANGED] 增加可選參數 prompt_text;預設空字串代表客戶未提供
194
+ def caption_video(video_path: str, prompt_text: str = "") -> str:
195
  if not video_path:
196
  return "No video provided."
197
 
 
217
  t = time.perf_counter()
218
  safe_path, used_temp, tr_info = maybe_transcode(video_path)
219
  T.mark("maybe_transcode_s", time.perf_counter()-t)
220
+
221
+ # [NEW] 若客戶沒輸入,就用後端的 PROBLEM_TEXT;否則使用自訂(但不顯示預設給客戶)
222
+ user_prompt = _sanitize_prompt(prompt_text)
223
+ selected_prompt = user_prompt if user_prompt else PROBLEM_TEXT
224
 
225
  messages = [
226
  {
227
  "role": "user",
228
  "content": [
229
  {"type": "video", "video": video_path, "fps": 1},
230
+ {"type": "text", "text": selected_prompt}, # [CHANGED] 由 selected_prompt 餵入
231
  ],
232
  }
233
  ]
 
327
  return caption
328
 
329
  # Gradio 介面
330
+ # demo = gr.Interface(
331
+ # fn=caption_video,
332
+ # inputs=gr.Video(label="Upload a video (mp4, mov, etc.)"),
333
+ # outputs=gr.Textbox(label="Caption", lines=18),
334
+ # title="Smart Home Video Caption (Private weights)",
335
+ # description="Upload a short clip to generate a factual surveillance-style caption.",
336
+ # allow_flagging="never",
337
+ # )
338
+
339
+ # [CHANGED] Gradio 介面:不再把預設 prompt 顯示給客戶
340
+ # - 第二個輸入改成「可選的自訂 prompt」文字框,value="",僅 placeholder 提示
341
+ # - 也可放在 Accordion 內,避免一般使用者注意到
342
+ with gr.Blocks(title="Smart Home Video Caption (Private weights)") as demo:
343
+ gr.Markdown("Upload a short clip to generate a factual surveillance-style caption.")
344
+ with gr.Row():
345
+ video_in = gr.Video(label="Upload a video (mp4, mov, etc.)")
346
+ with gr.Accordion("Advanced (optional custom prompt)", open=False): # [NEW]
347
+ prompt_in = gr.Textbox(
348
+ label="Custom prompt (optional)",
349
+ value="", # 空字串 => 客戶看不到預設內容
350
+ placeholder="Leave empty to use the default internal prompt",
351
+ lines=10,
352
+ show_copy_button=True
353
+ )
354
+ caption_out = gr.Textbox(label="Caption", lines=18)
355
+
356
+ run_btn = gr.Button("Run")
357
+ run_btn.click(fn=caption_video, inputs=[video_in, prompt_in], outputs=[caption_out])
358
+
359
 
360
  if __name__ == "__main__":
361
  demo.launch(show_error=True)