Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -188,62 +188,108 @@ def _insight(s):
|
|
| 188 |
|
| 189 |
|
| 190 |
# ---- Handlers ----
|
| 191 |
-
def score_text_with_chart(text):
|
| 192 |
-
if not text or not text.strip(): return "Enter text.", None, ""
|
| 193 |
-
try:
|
| 194 |
-
s = _predict(text.strip())
|
| 195 |
-
return _fmt(s), _radar(s), _insight(s)
|
| 196 |
-
except Exception as e:
|
| 197 |
-
import traceback
|
| 198 |
-
return f"Error: {e}\n{traceback.format_exc()}", None, ""
|
| 199 |
-
|
| 200 |
-
|
| 201 |
@spaces.GPU(duration=120)
|
| 202 |
-
def
|
| 203 |
-
"""Extract audio
|
| 204 |
-
import subprocess
|
| 205 |
-
# Extract audio
|
| 206 |
-
audio_path =
|
| 207 |
subprocess.run(["ffmpeg", "-i", video_path, "-vn", "-acodec", "pcm_s16le",
|
| 208 |
"-ar", "16000", "-ac", "1", audio_path, "-y"],
|
| 209 |
capture_output=True, timeout=60)
|
| 210 |
|
| 211 |
-
# Transcribe
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
torch.cuda.empty_cache()
|
| 229 |
-
return transcript
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
|
| 232 |
-
def
|
| 233 |
-
if video is None: return "Upload a video.",
|
| 234 |
try:
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
return "Could not extract speech from video.", None, ""
|
| 239 |
-
|
| 240 |
-
# Score the transcript
|
| 241 |
-
s = _predict(transcript.strip())
|
| 242 |
-
scores_text = f"Transcript: {transcript[:200]}{'...' if len(transcript) > 200 else ''}\n\n{_fmt(s)}"
|
| 243 |
-
return scores_text, _radar(s, title="Video Brain Engagement"), _insight(s)
|
| 244 |
except Exception as e:
|
| 245 |
import traceback
|
| 246 |
-
return f"Error: {e}\n{traceback.format_exc()}",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
|
| 248 |
|
| 249 |
def ab_test_safe(a, b):
|
|
@@ -277,21 +323,17 @@ with gr.Blocks(title="TRIBE V2 Brain Prediction", theme=gr.themes.Base(
|
|
| 277 |
with gr.Tab("📝 Text"):
|
| 278 |
t_in = gr.Textbox(label="Content", lines=5, placeholder="Paste script or hook...")
|
| 279 |
t_btn = gr.Button("🧠 Analyze", variant="primary")
|
| 280 |
-
|
| 281 |
-
t_out = gr.Textbox(label="Scores", lines=10)
|
| 282 |
-
t_img = gr.Image(label="Brain Radar", type="filepath")
|
| 283 |
t_ins = gr.Textbox(label="💡 Insight")
|
| 284 |
-
t_btn.click(
|
| 285 |
|
| 286 |
with gr.Tab("🎬 Video"):
|
| 287 |
-
gr.Markdown("Upload a video — audio is transcribed and scored. ~30-60s.")
|
| 288 |
v_in = gr.Video(label="Upload Video")
|
| 289 |
v_btn = gr.Button("🧠 Analyze Video", variant="primary")
|
| 290 |
-
|
| 291 |
-
v_out = gr.Textbox(label="Scores", lines=10)
|
| 292 |
-
v_img = gr.Image(label="Brain Radar", type="filepath")
|
| 293 |
v_ins = gr.Textbox(label="💡 Insight")
|
| 294 |
-
v_btn.click(
|
| 295 |
|
| 296 |
with gr.Tab("⚔️ A/B Test"):
|
| 297 |
with gr.Row():
|
|
|
|
| 188 |
|
| 189 |
|
| 190 |
# ---- Handlers ----
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
@spaces.GPU(duration=120)
|
| 192 |
+
def _transcribe_and_score(video_path):
|
| 193 |
+
"""Extract audio, transcribe with Whisper, then score with Phi-2."""
|
| 194 |
+
import subprocess
|
| 195 |
+
# Extract audio
|
| 196 |
+
audio_path = os.path.join(os.path.dirname(video_path), "audio_extract.wav")
|
| 197 |
subprocess.run(["ffmpeg", "-i", video_path, "-vn", "-acodec", "pcm_s16le",
|
| 198 |
"-ar", "16000", "-ac", "1", audio_path, "-y"],
|
| 199 |
capture_output=True, timeout=60)
|
| 200 |
|
| 201 |
+
# Transcribe
|
| 202 |
+
import whisper
|
| 203 |
+
whisper_model = whisper.load_model("base", device="cuda")
|
| 204 |
+
result = whisper_model.transcribe(audio_path)
|
| 205 |
+
transcript = result["text"]
|
| 206 |
+
|
| 207 |
+
if os.path.exists(audio_path):
|
| 208 |
+
os.unlink(audio_path)
|
| 209 |
+
|
| 210 |
+
if not transcript or not transcript.strip():
|
| 211 |
+
raise ValueError("No speech detected in video")
|
| 212 |
+
|
| 213 |
+
# Score transcript using Phi-2
|
| 214 |
+
m = ensure_model()
|
| 215 |
+
tok = m["tokenizer"]
|
| 216 |
+
llm = m["model"].cuda().half()
|
| 217 |
+
inputs = tok(transcript, return_tensors="pt", truncation=True, max_length=512).to("cuda")
|
| 218 |
+
with torch.inference_mode():
|
| 219 |
+
outputs = llm(**inputs)
|
| 220 |
+
|
| 221 |
+
logits = outputs.logits
|
| 222 |
+
hidden = outputs.hidden_states[-1]
|
| 223 |
+
|
| 224 |
+
shift_logits = logits[:, :-1, :].contiguous()
|
| 225 |
+
shift_labels = inputs["input_ids"][:, 1:].contiguous()
|
| 226 |
+
losses = torch.nn.CrossEntropyLoss(reduction="none")(
|
| 227 |
+
shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
|
| 228 |
+
perplexity = float(torch.exp(losses.mean()).cpu())
|
| 229 |
+
attention_raw = min(perplexity / 30.0, 1.0)
|
| 230 |
+
|
| 231 |
+
ids = inputs["input_ids"][0].cpu().tolist()
|
| 232 |
+
language_raw = len(set(ids)) / max(len(ids), 1)
|
| 233 |
+
|
| 234 |
+
hn = hidden.squeeze().cpu().float().numpy()
|
| 235 |
+
norms = np.linalg.norm(hn, axis=1)
|
| 236 |
+
emotion_raw = float(np.std(norms) / (np.mean(norms) + 1e-8))
|
| 237 |
+
|
| 238 |
+
tl = transcript.lower()
|
| 239 |
+
nums = sum(c.isdigit() for c in transcript) / max(len(transcript), 1)
|
| 240 |
+
caps = sum(c.isupper() for c in transcript) / max(len(transcript), 1)
|
| 241 |
+
urgency = sum(1 for w in ["now", "shock", "destroy", "change", "secret",
|
| 242 |
+
"never", "always", "must", "urgent", "breaking", "exclusive", "free",
|
| 243 |
+
"fastest", "cheapest", "worst", "best", "insane", "crazy"] if w in tl)
|
| 244 |
+
visual_raw = min(nums * 10 + caps * 5 + urgency * 0.15, 1.0)
|
| 245 |
+
|
| 246 |
+
words = tl.split()
|
| 247 |
+
personal = sum(1 for w in words if w in ["i", "me", "my", "you", "your", "we", "our"])
|
| 248 |
+
dm_raw = min(personal / max(len(words), 1) * 5, 1.0)
|
| 249 |
+
|
| 250 |
+
def sig(v, c=0.3, s=8.0):
|
| 251 |
+
return float(100.0 / (1.0 + np.exp(-s * (max(0, min(1, v)) - c))))
|
| 252 |
+
|
| 253 |
+
att = sig(attention_raw, 0.25, 6.0)
|
| 254 |
+
emo = sig(emotion_raw, 0.15, 10.0)
|
| 255 |
+
lang = sig(language_raw, 0.5, 8.0)
|
| 256 |
+
vis = sig(visual_raw, 0.2, 8.0)
|
| 257 |
+
dm = sig(dm_raw, 0.2, 6.0)
|
| 258 |
+
overall = (att + emo + lang + vis + dm) / 5.0
|
| 259 |
+
viral = att * 0.4 + emo * 0.4 + vis * 0.2
|
| 260 |
|
| 261 |
torch.cuda.empty_cache()
|
| 262 |
+
return transcript, {
|
| 263 |
+
"overall_brain_engagement": round(overall, 1),
|
| 264 |
+
"viral_potential": round(viral, 1),
|
| 265 |
+
"attention_capture": round(att, 1),
|
| 266 |
+
"emotional_valence": round(emo, 1),
|
| 267 |
+
"language_processing": round(lang, 1),
|
| 268 |
+
"visual_imagery": round(vis, 1),
|
| 269 |
+
"hook_effectiveness": round(att, 1),
|
| 270 |
+
"retention_prediction": round(min(lang / max(att, 1) * 100, 100), 1),
|
| 271 |
+
}
|
| 272 |
|
| 273 |
|
| 274 |
+
def score_video_safe(video):
|
| 275 |
+
if video is None: return "Upload a video.", ""
|
| 276 |
try:
|
| 277 |
+
transcript, s = _transcribe_and_score(video)
|
| 278 |
+
preview = transcript[:300] + ("..." if len(transcript) > 300 else "")
|
| 279 |
+
return f"Transcript:\n{preview}\n\n{_fmt(s)}", _insight(s)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
except Exception as e:
|
| 281 |
import traceback
|
| 282 |
+
return f"Error: {e}\n{traceback.format_exc()}", ""
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def score_text_safe(text):
|
| 286 |
+
if not text or not text.strip(): return "Enter text.", ""
|
| 287 |
+
try:
|
| 288 |
+
s = _predict(text.strip())
|
| 289 |
+
return _fmt(s), _insight(s)
|
| 290 |
+
except Exception as e:
|
| 291 |
+
import traceback
|
| 292 |
+
return f"Error: {e}\n{traceback.format_exc()}", ""
|
| 293 |
|
| 294 |
|
| 295 |
def ab_test_safe(a, b):
|
|
|
|
| 323 |
with gr.Tab("📝 Text"):
|
| 324 |
t_in = gr.Textbox(label="Content", lines=5, placeholder="Paste script or hook...")
|
| 325 |
t_btn = gr.Button("🧠 Analyze", variant="primary")
|
| 326 |
+
t_out = gr.Textbox(label="Scores", lines=10)
|
|
|
|
|
|
|
| 327 |
t_ins = gr.Textbox(label="💡 Insight")
|
| 328 |
+
t_btn.click(score_text_safe, [t_in], [t_out, t_ins], api_name="predict")
|
| 329 |
|
| 330 |
with gr.Tab("🎬 Video"):
|
| 331 |
+
gr.Markdown("Upload a video — audio is transcribed and scored. ~30-60s on GPU.")
|
| 332 |
v_in = gr.Video(label="Upload Video")
|
| 333 |
v_btn = gr.Button("🧠 Analyze Video", variant="primary")
|
| 334 |
+
v_out = gr.Textbox(label="Scores", lines=12)
|
|
|
|
|
|
|
| 335 |
v_ins = gr.Textbox(label="💡 Insight")
|
| 336 |
+
v_btn.click(score_video_safe, [v_in], [v_out, v_ins], api_name="predict_video")
|
| 337 |
|
| 338 |
with gr.Tab("⚔️ A/B Test"):
|
| 339 |
with gr.Row():
|