Spaces:

eddywu
/

security_demo

Sleeping

App Files Files Community

eddywu commited on Nov 11, 2025

Commit

43f8af8

verified ·

1 Parent(s): ef2a23b

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -12

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os
 import gradio as gr
 import spaces
 import torch
@@ -9,6 +9,91 @@ from qwen_vl_utils import process_vision_info
 # --- 配置區 ---
 REPO_ID = "Memories-ai/security_model"
 TOKEN = os.environ.get("HF_TOKEN")
 # 載入模型（用私有 token），自動上 GPU
 @lru_cache(maxsize=1)
@@ -89,7 +174,13 @@ def caption_video(video_path: str) -> str:
     if not video_path:
         return "No video provided."
     model, processor = _load_model_and_processor()
     messages = [
         {
             "role": "user",
@@ -101,6 +192,7 @@ def caption_video(video_path: str) -> str:
     ]
     # 建構聊天模板與多模態輸入
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, video_inputs, video_kwargs = process_vision_info(
         messages, return_video_kwargs=True
@@ -118,19 +210,48 @@ def caption_video(video_path: str) -> str:
     # 上 GPU（若可）
     if torch.cuda.is_available():
         inputs = inputs.to("cuda")
     with torch.inference_mode():
-        generated_ids = model.generate(**inputs, max_new_tokens=768)
-        generated_ids_trimmed = [
-            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
-        ]
-        output_text = processor.batch_decode(
-            generated_ids_trimmed,
-            skip_special_tokens=True,
-            clean_up_tokenization_spaces=False
-        )
-    return output_text[0] if output_text else ""
 # Gradio 介面
 demo = gr.Interface(

+import os, json, time, subprocess, tempfile, shutil
 import gradio as gr
 import spaces
 import torch
 # --- 配置區 ---
 REPO_ID = "Memories-ai/security_model"
 TOKEN = os.environ.get("HF_TOKEN")
+MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "160"))  # 原 768 太高，先收斂
+FORCE_FPS = int(os.environ.get("FORCE_FPS", "6"))              # 影片抽幀 6fps 足夠 caption
+TARGET_MAX_W = int(os.environ.get("TARGET_MAX_W", "1280"))     # 寬度上限 1280 (<=720p)
+DEBUG_TIMINGS = os.environ.get("DEBUG_TIMINGS", "0") == "1"    # 1 時把分段時間附在輸出
+# 速度小優化（Ampere 以後有效）
+torch.backends.cuda.matmul.allow_tf32 = True
+try:
+    torch.set_float32_matmul_precision("high")
+except Exception:
+    pass
+# ---------- 實用工具：ffprobe & 可能轉碼 ----------
+def _run_quiet(cmd: list[str]):
+    subprocess.check_call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
+def ffprobe_meta(path: str):
+    try:
+        out = subprocess.check_output([
+            "ffprobe","-v","error","-select_streams","v:0",
+            "-show_entries","stream=codec_name,width,height,avg_frame_rate",
+            "-of","json", path
+        ])
+        data = json.loads(out.decode("utf-8"))
+        st = data["streams"][0] if data.get("streams") else {}
+        fps = 0.0
+        afr = st.get("avg_frame_rate","0/0")
+        if isinstance(afr,str) and "/" in afr:
+            num, den = afr.split("/")
+            fps = float(num)/float(den) if float(den) != 0 else 0.0
+        return {
+            "codec": st.get("codec_name"),
+            "w": int(st.get("width") or 0),
+            "h": int(st.get("height") or 0),
+            "fps": fps
+        }
+    except Exception:
+        return {"codec": None, "w": 0, "h": 0, "fps": 0.0}
+def maybe_transcode(input_path: str):
+    """
+    碰到 HEVC/H.265 或解析度太大時，快速轉成 H.264 + yuv420p + 目標寬度 + 限制 FPS
+    轉完回傳 (path, used_temp=True/False, reason)
+    """
+    meta = ffprobe_meta(input_path)
+    codec, w, h, fps = meta["codec"], meta["w"], meta["h"], meta["fps"]
+    need_codec_fix = codec in ("hevc","h265")
+    need_resize = (w and w > TARGET_MAX_W)
+    need_fps = (fps and fps > FORCE_FPS + 0.5)
+    if not (need_codec_fix or need_resize or need_fps):
+        return input_path, False, {"meta": meta, "transcoded": False}
+    tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
+    out_path = tmp.name; tmp.close()
+    # scale 只在寬度超標時啟動，保留比例；fps 超標則限速
+    vf_parts = []
+    if need_resize:
+        vf_parts.append(f"scale='min({TARGET_MAX_W},iw)':-2")
+    if need_fps:
+        vf_parts.append(f"fps={FORCE_FPS}")
+    vf = ",".join(vf_parts) if vf_parts else "scale=trunc(iw/2)*2:trunc(ih/2)*2"
+    cmd = [
+        "ffmpeg","-y","-i", input_path,
+        "-vsync","vfr",
+        "-c:v","libx264","-preset","veryfast","-crf","23",
+        "-pix_fmt","yuv420p",
+        "-vf", vf,
+        "-c:a","aac","-b:a","128k",
+        "-movflags","+faststart",
+        out_path
+    ]
+    _run_quiet(cmd)
+    return out_path, True, {"meta": meta, "transcoded": True, "vf": vf}
+# ---------- 分段計時 ----------
+class Timer:
+    def __init__(self): self.t0=time.perf_counter(); self.spans=[]
+    def mark(self, name, dur): self.spans.append((name, round(dur,3)))
+    def result(self):
+        total = round(time.perf_counter()-self.t0, 3)
+        return {"total_s": total, **{k:v for k,v in self.spans}}
 # 載入模型（用私有 token），自動上 GPU
 @lru_cache(maxsize=1)
     if not video_path:
         return "No video provided."
+    T = Timer()
     model, processor = _load_model_and_processor()
+    # 1) 可能轉碼 / 降維 / 限 FPS
+    t = time.perf_counter()
+    safe_path, used_temp, tr_info = maybe_transcode(video_path)
+    T.mark("maybe_transcode_s", time.perf_counter()-t)
     messages = [
         {
             "role": "user",
     ]
     # 建構聊天模板與多模態輸入
+    t = time.perf_counter()
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, video_inputs, video_kwargs = process_vision_info(
         messages, return_video_kwargs=True
     # 上 GPU（若可）
     if torch.cuda.is_available():
         inputs = inputs.to("cuda")
+        torch.cuda.synchronize()
+    T.mark("preprocess_s", time.perf_counter()-t)
+    gen_kwargs = dict(
+        max_new_tokens=MAX_NEW_TOKENS,
+        do_sample=False,           # caption 任務較適合確定性解碼，速度更快
+        temperature=0.0,
+        top_p=1.0
+    )
+    t = time.perf_counter()
     with torch.inference_mode():
+        generated_ids = model.generate(**inputs, **gen_kwargs)
+        if torch.cuda.is_available(): torch.cuda.synchronize()
+    T.mark("generate_s", time.perf_counter()-t)
+    # 5) 後處理
+    t = time.perf_counter()
+    generated_ids_trimmed = [
+        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed,
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=False
+    )
+    T.mark("postprocess_s", time.perf_counter()-t)
+    # 6) 清理暫存檔
+    if used_temp:
+        try: os.remove(safe_path)
+        except Exception: pass
+    # 打印詳細 timing 到日誌（HF Spaces Logs 可見）
+    print({"timings": T.result(), "transcode": tr_info})
+    caption = (output_text[0] if output_text else "").strip()
+    if DEBUG_TIMINGS:
+        rt = T.result()
+        caption += f"\n\n[timings] total={rt['total_s']}s, transcode={rt.get('maybe_transcode_s','-')}s, preprocess={rt.get('preprocess_s','-')}s, generate={rt.get('generate_s','-')}s"
+    return caption
 # Gradio 介面
 demo = gr.Interface(