Spaces:

promise42da
/

paul

Runtime error

App Files Files Community

suhpau commited on Jan 22

Commit

4105a85

verified ·

1 Parent(s): 356f4f2

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -39

app.py CHANGED Viewed

@@ -1,73 +1,105 @@
-import gradio as gr
-import cv2
 import numpy as np
 import torch
 from transformers import AutoImageProcessor, VideoMAEForVideoClassification
-import tempfile
-import os
-MODEL_DIR = "models/hotcold_videomae"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 processor = None
 model = None
-def load_model():
     global processor, model
     processor = AutoImageProcessor.from_pretrained(MODEL_DIR)
     model = VideoMAEForVideoClassification.from_pretrained(MODEL_DIR)
     model.to(device)
     model.eval()
-def sample_frames(video_path, num_frames=16, size=224):
-    cap = cv2.VideoCapture(video_path)
-    frames = []
-    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    idxs = np.linspace(0, max(total-1,0), num_frames).astype(int)
-    cur = 0
-    ret_frames = []
-    while True:
-        ok, frame = cap.read()
-        if not ok:
-            break
-        if cur in idxs:
-            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            frame = cv2.resize(frame, (size,size))
-            ret_frames.append(frame)
-        cur += 1
-    cap.release()
-    while len(ret_frames) < num_frames:
-        ret_frames.append(ret_frames[-1])
-    return ret_frames[:num_frames]
-@torch.no_grad()
-def predict(video):
     if model is None:
-        load_model()
-    frames = sample_frames(video)
-    inputs = processor(frames, return_tensors="pt").to(device)
     outputs = model(**inputs)
-    probs = torch.softmax(outputs.logits, dim=-1)[0].cpu().numpy()
-    p_cold, p_hot = probs
-    if p_hot > p_cold:
-        return f"🔥 더워요 (확률 {p_hot:.2f})"
     else:
-        return f"❄️ 추워요 (확률 {p_cold:.2f})"
 demo = gr.Interface(
     fn=predict,
-    inputs=gr.Video(label="사람 행동 영상 업로드"),
     outputs="text",
     title="Hot / Cold Action Recognition",
-    description="사람 행동 영상을 업로드하면 더운지/추운지 판별합니다."
 )
 if __name__ == "__main__":

+import os
 import numpy as np
 import torch
+import gradio as gr
 from transformers import AutoImageProcessor, VideoMAEForVideoClassification
+from decord import VideoReader, cpu
+MODEL_DIR = "models/hotcold_videomae"   # Space에 업로드한 모델 폴더 경로
+NUM_FRAMES = 16
+SIZE = 224
 device = "cuda" if torch.cuda.is_available() else "cpu"
 processor = None
 model = None
+def _extract_video_path(video_input):
+    """
+    Gradio Video input은 버전에 따라
+    - str (filepath)
+    - tuple (filepath, subtitle/...)
+    - dict {"name": filepath, ...}
+    형태로 올 수 있어서 전부 처리
+    """
+    if video_input is None:
+        return None
+    if isinstance(video_input, str):
+        return video_input
+    if isinstance(video_input, (tuple, list)) and len(video_input) > 0:
+        return video_input[0]
+    if isinstance(video_input, dict):
+        # 보통 {"name": ".../tmp/xxxx.mp4", ...}
+        return video_input.get("name") or video_input.get("path")
+    return None
+def _load_model():
     global processor, model
+    if not os.path.isdir(MODEL_DIR):
+        raise RuntimeError(
+            f"❌ 모델 폴더를 찾을 수 없어요: {MODEL_DIR}\n"
+            f"Space 파일 목록에 'models/hotcold_videomae/'가 있는지 확인해줘."
+        )
     processor = AutoImageProcessor.from_pretrained(MODEL_DIR)
     model = VideoMAEForVideoClassification.from_pretrained(MODEL_DIR)
     model.to(device)
     model.eval()
+def _sample_frames_decord(video_path, num_frames=NUM_FRAMES, size=SIZE):
+    vr = VideoReader(video_path, ctx=cpu(0))
+    total = len(vr)
+    if total <= 0:
+        raise RuntimeError("❌ 영상 프레임을 읽지 못했어요 (빈 영상일 수 있음).")
+    idxs = np.linspace(0, total - 1, num_frames).astype(int)
+    frames = vr.get_batch(idxs).asnumpy()  # (T, H, W, 3) RGB
+    # resize (간단 버전)
+    import cv2
+    out = []
+    for f in frames:
+        f = cv2.resize(f, (size, size), interpolation=cv2.INTER_LINEAR)
+        out.append(f)
+    return out
+@torch.no_grad()
+def predict(video_input):
+    global processor, model
+    video_path = _extract_video_path(video_input)
+    if not video_path:
+        return "❌ 영상 파일이 제대로 전달되지 않았어요. 다시 업로드해줘."
     if model is None:
+        _load_model()
+    frames = _sample_frames_decord(video_path)
+    inputs = processor(frames, return_tensors="pt")
+    inputs = {k: v.to(device) for k, v in inputs.items()}
     outputs = model(**inputs)
+    probs = torch.softmax(outputs.logits, dim=-1)[0].detach().cpu().numpy()
+    p_cold, p_hot = float(probs[0]), float(probs[1])
+    if p_hot >= p_cold:
+        return f"🔥 더워요 (hot={p_hot:.2f}, cold={p_cold:.2f})"
     else:
+        return f"❄️ 추워요 (cold={p_cold:.2f}, hot={p_hot:.2f})"
 demo = gr.Interface(
     fn=predict,
+    inputs=gr.Video(label="행동 영상 업로드"),
     outputs="text",
     title="Hot / Cold Action Recognition",
+    description="사람 행동 영상을 올리면 더운지/추운지 판별합니다.",
+    cache_examples=False,  # Spaces에서 mp4 캐시 문제 방지 팁
 )
 if __name__ == "__main__":