Spaces:

ericjedha
/

crazycat

Sleeping

App Files Files Community

ericjedha commited on Dec 30, 2025

Commit

283a965

verified ·

1 Parent(s): 04b404d

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -49

app.py CHANGED Viewed

@@ -128,55 +128,48 @@ def analyze_cat_v12_final(video_path):
         t_audio = time.time() - t_0
         # --- B. VISION (Processor FRESH à chaque appel) ---
-        t_1 = time.time()
-        # 🔑 CORRECTION : on charge le processor ICI
-        vlm_proc = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM2-256M-Video-Instruct")
-        # ✅ NOUVEAU PROMPT : pur, factuel, sans humeur ni assistant
-        vlm_prompt = (
-            "You are a feline behavior expert. "
-            "Analyze precisely: number and position of ears, state of mouth (open/closed/tense), tail position and movement, and overall body posture. "
-            "Do not interpret mood. Only describe observable features."
-        )
-        messages = [{"role": "user", "content": [{"type": "video", "path": video_path}, {"type": "text", "text": vlm_prompt}]}]
-        vlm_inputs = vlm_proc.apply_chat_template(
-            messages,
-            add_generation_prompt=True,
-            tokenize=True,
-            return_dict=True,
-            return_tensors="pt"
-        ).to(DEVICE)
-        with torch.no_grad():
-            vlm_out = vlm_model.generate(
-                **vlm_inputs,
-                max_new_tokens=100,
-                do_sample=True,
-                temperature=0.7,
-                top_p=0.9
-            )
-        vlm_res = vlm_proc.batch_decode(vlm_out, skip_special_tokens=True)[0]
-        # Supprimer systématiquement le prompt ET le rôle "Assistant:"
-        cleaned = vlm_res
-        if vlm_prompt in cleaned:
-            cleaned = cleaned.split(vlm_prompt, 1)[-1]
-        # Supprimer "Assistant:" (même avec majuscules ou espaces)
-        if "assistant:" in cleaned.lower():
-        # Trouver la première occurrence de "assistant:" de façon insensible à la casse
-            idx = cleaned.lower().find("assistant:")
-        if idx != -1:
-            cleaned = cleaned[idx + len("assistant:"):].strip()
-        # ✅ ICI : utiliser 'cleaned', pas 'vlm_clean'
-        vlm_clean = cleaned.split('\n')[0].strip()
-        t_vlm = time.time() - t_1
         # --- C. JUGE ---
         t_2 = time.time()

         t_audio = time.time() - t_0
         # --- B. VISION (Processor FRESH à chaque appel) ---
+t_1 = time.time()
+vlm_proc = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM2-256VLM2-Video-Instruct")
+vlm_prompt = (
+    "You are a feline behavior expert. "
+    "Analyze precisely: number and position of ears, state of mouth (open/closed/tense), tail position and movement, and overall body posture. "
+    "Do not interpret mood. Only describe observable features."
+)
+messages = [{"role": "user", "content": [{"type": "video", "path": video_path}, {"type": "text", "text": vlm_prompt}]}]
+# Tokenize avec retour des inputs
+vlm_inputs = vlm_proc.apply_chat_template(
+    messages,
+    add_generation_prompt=True,
+    tokenize=True,
+    return_dict=True,
+    return_tensors="pt"
+).to(DEVICE)
+input_length = vlm_inputs["input_ids"].shape[1]  # 🔑 nombre de tokens du prompt
+with torch.no_grad():
+    vlm_out = vlm_model.generate(
+        **vlm_inputs,
+        max_new_tokens=80,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.9
+    )
+# 🔑 DÉCODAGE SÉCURISÉ : uniquement les nouveaux tokens
+gen_tokens = vlm_out[0][input_length:]
+vlm_clean = vlm_proc.batch_decode(gen_tokens.unsqueeze(0), skip_special_tokens=True)[0]
+# Nettoyage final : une seule phrase, sans "Assistant:"
+vlm_clean = vlm_clean.strip().split('\n')[0]
+if vlm_clean.lower().startswith("assistant:"):
+    vlm_clean = vlm_clean.split(":", 1)[-1].strip()
+t_vlm = time.time() - t_1
         # --- C. JUGE ---
         t_2 = time.time()