Spaces:

somebeast
/

tribe-v2

Running on Zero

App Files Files Community

somebeast commited on Apr 4

Commit

5aa4075

verified ·

1 Parent(s): 2218541

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +95 -53

app.py CHANGED Viewed

@@ -188,62 +188,108 @@ def _insight(s):
 # ---- Handlers ----
-def score_text_with_chart(text):
-    if not text or not text.strip(): return "Enter text.", None, ""
-    try:
-        s = _predict(text.strip())
-        return _fmt(s), _radar(s), _insight(s)
-    except Exception as e:
-        import traceback
-        return f"Error: {e}\n{traceback.format_exc()}", None, ""
 @spaces.GPU(duration=120)
-def _transcribe_video(video_path):
-    """Extract audio from video and transcribe using Whisper."""
-    import subprocess, tempfile
-    # Extract audio with ffmpeg
-    audio_path = tempfile.mktemp(suffix=".wav")
     subprocess.run(["ffmpeg", "-i", video_path, "-vn", "-acodec", "pcm_s16le",
                      "-ar", "16000", "-ac", "1", audio_path, "-y"],
                    capture_output=True, timeout=60)
-    # Transcribe with Whisper
-    try:
-        import whisper
-        whisper_model = whisper.load_model("base")
-        result = whisper_model.transcribe(audio_path)
-        transcript = result["text"]
-    except ImportError:
-        # Fallback: use transformers pipeline
-        from transformers import pipeline
-        pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base",
-                       device="cuda", torch_dtype=torch.float16)
-        result = pipe(audio_path)
-        transcript = result["text"]
-    finally:
-        if os.path.exists(audio_path):
-            os.unlink(audio_path)
     torch.cuda.empty_cache()
-    return transcript
-def score_video(video):
-    if video is None: return "Upload a video.", None, ""
     try:
-        # Transcribe video audio
-        transcript = _transcribe_video(video)
-        if not transcript or not transcript.strip():
-            return "Could not extract speech from video.", None, ""
-        # Score the transcript
-        s = _predict(transcript.strip())
-        scores_text = f"Transcript: {transcript[:200]}{'...' if len(transcript) > 200 else ''}\n\n{_fmt(s)}"
-        return scores_text, _radar(s, title="Video Brain Engagement"), _insight(s)
     except Exception as e:
         import traceback
-        return f"Error: {e}\n{traceback.format_exc()}", None, ""
 def ab_test_safe(a, b):
@@ -277,21 +323,17 @@ with gr.Blocks(title="TRIBE V2 Brain Prediction", theme=gr.themes.Base(
     with gr.Tab("📝 Text"):
         t_in = gr.Textbox(label="Content", lines=5, placeholder="Paste script or hook...")
         t_btn = gr.Button("🧠 Analyze", variant="primary")
-        with gr.Row():
-            t_out = gr.Textbox(label="Scores", lines=10)
-            t_img = gr.Image(label="Brain Radar", type="filepath")
         t_ins = gr.Textbox(label="💡 Insight")
-        t_btn.click(score_text_with_chart, [t_in], [t_out, t_img, t_ins], api_name="predict")
     with gr.Tab("🎬 Video"):
-        gr.Markdown("Upload a video — audio is transcribed and scored. ~30-60s.")
         v_in = gr.Video(label="Upload Video")
         v_btn = gr.Button("🧠 Analyze Video", variant="primary")
-        with gr.Row():
-            v_out = gr.Textbox(label="Scores", lines=10)
-            v_img = gr.Image(label="Brain Radar", type="filepath")
         v_ins = gr.Textbox(label="💡 Insight")
-        v_btn.click(score_video, [v_in], [v_out, v_img, v_ins], api_name="predict_video")
     with gr.Tab("⚔️ A/B Test"):
         with gr.Row():

 # ---- Handlers ----
 @spaces.GPU(duration=120)
+def _transcribe_and_score(video_path):
+    """Extract audio, transcribe with Whisper, then score with Phi-2."""
+    import subprocess
+    # Extract audio
+    audio_path = os.path.join(os.path.dirname(video_path), "audio_extract.wav")
     subprocess.run(["ffmpeg", "-i", video_path, "-vn", "-acodec", "pcm_s16le",
                      "-ar", "16000", "-ac", "1", audio_path, "-y"],
                    capture_output=True, timeout=60)
+    # Transcribe
+    import whisper
+    whisper_model = whisper.load_model("base", device="cuda")
+    result = whisper_model.transcribe(audio_path)
+    transcript = result["text"]
+    if os.path.exists(audio_path):
+        os.unlink(audio_path)
+    if not transcript or not transcript.strip():
+        raise ValueError("No speech detected in video")
+    # Score transcript using Phi-2
+    m = ensure_model()
+    tok = m["tokenizer"]
+    llm = m["model"].cuda().half()
+    inputs = tok(transcript, return_tensors="pt", truncation=True, max_length=512).to("cuda")
+    with torch.inference_mode():
+        outputs = llm(**inputs)
+    logits = outputs.logits
+    hidden = outputs.hidden_states[-1]
+    shift_logits = logits[:, :-1, :].contiguous()
+    shift_labels = inputs["input_ids"][:, 1:].contiguous()
+    losses = torch.nn.CrossEntropyLoss(reduction="none")(
+        shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
+    perplexity = float(torch.exp(losses.mean()).cpu())
+    attention_raw = min(perplexity / 30.0, 1.0)
+    ids = inputs["input_ids"][0].cpu().tolist()
+    language_raw = len(set(ids)) / max(len(ids), 1)
+    hn = hidden.squeeze().cpu().float().numpy()
+    norms = np.linalg.norm(hn, axis=1)
+    emotion_raw = float(np.std(norms) / (np.mean(norms) + 1e-8))
+    tl = transcript.lower()
+    nums = sum(c.isdigit() for c in transcript) / max(len(transcript), 1)
+    caps = sum(c.isupper() for c in transcript) / max(len(transcript), 1)
+    urgency = sum(1 for w in ["now", "shock", "destroy", "change", "secret",
+        "never", "always", "must", "urgent", "breaking", "exclusive", "free",
+        "fastest", "cheapest", "worst", "best", "insane", "crazy"] if w in tl)
+    visual_raw = min(nums * 10 + caps * 5 + urgency * 0.15, 1.0)
+    words = tl.split()
+    personal = sum(1 for w in words if w in ["i", "me", "my", "you", "your", "we", "our"])
+    dm_raw = min(personal / max(len(words), 1) * 5, 1.0)
+    def sig(v, c=0.3, s=8.0):
+        return float(100.0 / (1.0 + np.exp(-s * (max(0, min(1, v)) - c))))
+    att = sig(attention_raw, 0.25, 6.0)
+    emo = sig(emotion_raw, 0.15, 10.0)
+    lang = sig(language_raw, 0.5, 8.0)
+    vis = sig(visual_raw, 0.2, 8.0)
+    dm = sig(dm_raw, 0.2, 6.0)
+    overall = (att + emo + lang + vis + dm) / 5.0
+    viral = att * 0.4 + emo * 0.4 + vis * 0.2
     torch.cuda.empty_cache()
+    return transcript, {
+        "overall_brain_engagement": round(overall, 1),
+        "viral_potential": round(viral, 1),
+        "attention_capture": round(att, 1),
+        "emotional_valence": round(emo, 1),
+        "language_processing": round(lang, 1),
+        "visual_imagery": round(vis, 1),
+        "hook_effectiveness": round(att, 1),
+        "retention_prediction": round(min(lang / max(att, 1) * 100, 100), 1),
+    }
+def score_video_safe(video):
+    if video is None: return "Upload a video.", ""
     try:
+        transcript, s = _transcribe_and_score(video)
+        preview = transcript[:300] + ("..." if len(transcript) > 300 else "")
+        return f"Transcript:\n{preview}\n\n{_fmt(s)}", _insight(s)
     except Exception as e:
         import traceback
+        return f"Error: {e}\n{traceback.format_exc()}", ""
+def score_text_safe(text):
+    if not text or not text.strip(): return "Enter text.", ""
+    try:
+        s = _predict(text.strip())
+        return _fmt(s), _insight(s)
+    except Exception as e:
+        import traceback
+        return f"Error: {e}\n{traceback.format_exc()}", ""
 def ab_test_safe(a, b):
     with gr.Tab("📝 Text"):
         t_in = gr.Textbox(label="Content", lines=5, placeholder="Paste script or hook...")
         t_btn = gr.Button("🧠 Analyze", variant="primary")
+        t_out = gr.Textbox(label="Scores", lines=10)
         t_ins = gr.Textbox(label="💡 Insight")
+        t_btn.click(score_text_safe, [t_in], [t_out, t_ins], api_name="predict")
     with gr.Tab("🎬 Video"):
+        gr.Markdown("Upload a video — audio is transcribed and scored. ~30-60s on GPU.")
         v_in = gr.Video(label="Upload Video")
         v_btn = gr.Button("🧠 Analyze Video", variant="primary")
+        v_out = gr.Textbox(label="Scores", lines=12)
         v_ins = gr.Textbox(label="💡 Insight")
+        v_btn.click(score_video_safe, [v_in], [v_out, v_ins], api_name="predict_video")
     with gr.Tab("⚔️ A/B Test"):
         with gr.Row():