Spaces:

chariscait
/

EmoSphere

Running

App Files Files Community

chariscait commited on Apr 13

Commit

ca849a5

verified ·

1 Parent(s): fabef15

Update app.py - live streaming primary, stop button, remaining time, updated description

Browse files

Files changed (1) hide show

app.py +280 -220

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ Flow:
   1. Landing page with email registration
   2. 6-digit code verification
   3. Demo: Upload video OR camera+mic quick capture
-  4. Full multimodal analysis (face, voice, text, posture)
   5. Session report with emotion timeline
   6. Trial-ended screen with contact info
 """
@@ -36,6 +36,13 @@ try:
 except ImportError:
     HAS_PIL = False
 from models import EmotionLabel, EMOTION_LABELS, CulturalRegion, EMOTION_EMOJI
 from face_detector import FaceEmotionDetector
 from text_detector import TextEmotionDetector
@@ -596,28 +603,29 @@ def show_demo():
     remaining = get_remaining_seconds(start)
     render_countdown_bar(remaining)
-    _schedule_rerun(remaining)
     # Load the live processor
     processor = load_live_processor()
     with st.sidebar:
         st.markdown("### EmoSphere Demo")
-        st.markdown("*Session: {}s remaining*".format(int(remaining)))
         st.divider()
         st.markdown("### How it works")
         st.markdown(
-            "1. **Upload a video** or **record with camera + mic**\n"
-            "2. All 4 modalities are analyzed\n"
             "3. Fused with **Mamdani fuzzy logic**\n"
-            "4. View full emotion report"
         )
         st.divider()
         st.markdown("### Modalities")
         st.markdown(
             "🧑 **Face** — ViT expression  \n"
             "🎙 **Voice** — Wav2Vec2 prosody  \n"
-            "💬 **Text** — DistilRoBERTa  \n"
             "🧍 **Posture** — MediaPipe pose"
         )
@@ -626,10 +634,10 @@ def show_demo():
         '<div style="text-align:center;">'
         '<img src="https://caitcore.com/images/emosphere-logo.png" '
         'style="width:80px; height:80px; border-radius:12px; margin-bottom:8px;" />'
-        '<h2 style="margin:0;">EmoSphere — Emotion Analysis</h2>'
         '<p style="color:#6B7B9D; font-size:14px; margin-top:4px;">'
-        'Multimodal emotion detection with Mamdani fuzzy fusion '
-        '— face, voice, text &amp; posture</p>'
         '</div>',
         unsafe_allow_html=True,
     )
@@ -638,7 +646,7 @@ def show_demo():
     if st.session_state.get("show_report"):
         from session_report import render_session_report
         render_session_report(processor)
-        if st.button("⬅ New Analysis", use_container_width=True):
             st.session_state.show_report = False
             st.rerun()
         return
@@ -648,237 +656,289 @@ def show_demo():
         _show_video_processing(processor, start)
         return
-    # ── Two options side by side ─────────────────────────────────────
-    col_upload, col_capture = st.columns(2)
-    # ── Option 1: Upload Video ───────────────────────────────────────
-    with col_upload:
-        st.markdown(
-            '<div class="glass-card" style="text-align: center; padding: 20px; min-height: 120px;">'
-            '<span style="font-size: 36px;">🎬</span>'
-            '<h3 style="margin: 6px 0 4px;">Upload Video</h3>'
-            '<p style="color: #6B7B9D; font-size: 12px; margin: 0;">'
-            'Upload a short video (MP4, max 60s).<br/>'
-            'Full 4-modality analysis + report.</p>'
-            '</div>',
-            unsafe_allow_html=True,
-        )
-        uploaded_video = st.file_uploader(
-            "Choose video file",
-            type=["mp4", "webm", "avi", "mov", "mkv"],
-            key="video_upload",
-            label_visibility="collapsed",
-        )
-        if uploaded_video is not None:
-            st.video(uploaded_video)
-            if st.button("🔍 Analyze Video", type="primary", use_container_width=True):
-                video_bytes = uploaded_video.read()
-                st.session_state.video_bytes = video_bytes
-                st.session_state.video_processing = True
                 st.rerun()
-    # ── Option 2: Camera + Mic Capture ───────────────────────────────
-    with col_capture:
-        st.markdown(
-            '<div class="glass-card" style="text-align: center; padding: 20px; min-height: 120px;">'
-            '<span style="font-size: 36px;">📸</span>'
-            '<h3 style="margin: 6px 0 4px;">Quick Capture</h3>'
-            '<p style="color: #6B7B9D; font-size: 12px; margin: 0;">'
-            'Snap a photo + record audio.<br/>'
-            'Instant face, voice &amp; posture analysis.</p>'
-            '</div>',
-            unsafe_allow_html=True,
-        )
-        camera_photo = st.camera_input(
-            "Take a photo",
-            key="camera_capture",
-            label_visibility="collapsed",
-        )
-        audio_recording = st.audio_input(
-            "Record audio (optional)",
-            key="audio_capture",
         )
-    # ── Process Camera + Audio if captured ───────────────────────────
-    if camera_photo is not None:
-        if is_trial_expired(start):
-            st.session_state.auth_stage = "ended"
             st.rerun()
-            return
-        _process_quick_capture(processor, camera_photo, audio_recording)
-    # Check trial expiry
-    if is_trial_expired(start):
-        if processor.is_active:
             processor.stop_session()
-        st.session_state.auth_stage = "ended"
-        st.rerun()
-def _process_quick_capture(processor, camera_photo, audio_recording):
-    """Process a camera snapshot + optional audio recording."""
-    st.divider()
-    st.markdown("### Analysis Results")
-    image_data = camera_photo.getvalue()
-    face_det = load_face_detector()
-    posture_det = load_posture_detector()
-    col_img, col_results = st.columns([1, 1])
-    with col_img:
-        st.image(image_data, caption="Captured", use_container_width=True)
-    with col_results:
-        with st.spinner("Analyzing face & posture..."):
-            face_result = face_det.detect(image_data)
-            posture_result = posture_det.detect(image_data)
-        # Face result
-        st.markdown("#### 🧑 Face Expression")
-        render_emotion_bubble(face_result.dominant, face_result.dominant_score)
-        face_scores = {s.label: s.score for s in face_result.scores}
-        render_emotion_bars(face_scores)
-        # Posture result
-        st.markdown("#### 🧍 Posture Signal")
-        render_emotion_bubble(posture_result.dominant, posture_result.dominant_score)
-        p_scores = {s.label: s.score for s in posture_result.scores}
-        render_emotion_bars(p_scores)
-    # Process audio if available
-    if audio_recording is not None:
-        st.markdown("#### 🎙 Voice Analysis")
-        audio_bytes = audio_recording.getvalue()
-        try:
-            from voice_detector import VoiceEmotionDetector
-            voice_det = VoiceEmotionDetector()
-            voice_det.load()
-            with st.spinner("Analyzing voice emotion..."):
-                voice_result = voice_det.detect(audio_bytes)
-            vcol1, vcol2 = st.columns([1, 1])
-            with vcol1:
-                render_emotion_bubble(voice_result.dominant, voice_result.dominant_score)
-            with vcol2:
-                v_scores = {s.label: s.score for s in voice_result.scores}
-                render_emotion_bars(v_scores)
-            # Try STT for text emotion
-            try:
-                from faster_whisper import WhisperModel
-                import tempfile, os
-                with st.spinner("Transcribing speech..."):
-                    # Write audio to temp file
-                    tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-                    tmp.write(audio_bytes)
-                    tmp.flush()
-                    tmp_path = tmp.name
-                    tmp.close()
-                    model = WhisperModel("tiny.en", device="cpu", compute_type="int8")
-                    segments, _info = model.transcribe(tmp_path, language="en", beam_size=1)
-                    text_parts = [seg.text.strip() for seg in segments if seg.text.strip()]
-                    transcript = " ".join(text_parts)
-                    os.unlink(tmp_path)
-                if transcript:
-                    st.markdown("#### 💬 Speech Transcript")
-                    st.markdown(
-                        '<div class="glass-card" style="padding: 12px;">'
-                        '<p style="color: #B0BCD0; font-size: 14px; margin: 0;">'
-                        '"<em>{}</em>"</p></div>'.format(transcript),
-                        unsafe_allow_html=True,
-                    )
-                    text_det = load_text_detector()
-                    text_result = text_det.detect(transcript)
-                    st.markdown("#### 💬 Text Emotion")
-                    tcol1, tcol2 = st.columns([1, 1])
-                    with tcol1:
-                        render_emotion_bubble(text_result.dominant, text_result.dominant_score)
-                    with tcol2:
-                        t_scores = {s.label: s.score for s in text_result.scores}
-                        render_emotion_bars(t_scores)
-                    # Fuzzy fusion of all modalities
-                    st.divider()
-                    st.markdown("### 🔮 Fused Result — Mamdani Fuzzy Logic")
-                    try:
-                        from fuzzy_fusion import FuzzyFusionEngine
-                        fusion = FuzzyFusionEngine()
-                        fused = fusion.fuse(
-                            face=face_result,
-                            voice=voice_result,
-                            text=text_result,
-                            posture=posture_result,
-                        )
-                        render_emotion_bubble(fused.dominant, fused.dominant_score)
-                        fused_scores = {s.label: s.score for s in fused.scores}
-                        render_emotion_bars(fused_scores)
-                        # Show modality weights
-                        st.markdown("##### Modality Weights")
-                        weight_html = ""
-                        weight_colors = {"face": "#E948A0", "voice": "#FFD700", "text": "#00D4FF", "posture": "#10B981"}
-                        for mod, w in fused.modality_weights.items():
-                            color = weight_colors.get(mod, "#94A3B8")
-                            weight_html += (
-                                '<span style="display:inline-block; margin:4px; padding:4px 12px; '
-                                'background:{}20; border:1px solid {}40; border-radius:12px; '
-                                'color:{}; font-size:12px; font-weight:600;">'
-                                '{}: {:.0f}%</span>'.format(color, color, color, mod.title(), w * 100)
-                            )
-                        st.markdown(weight_html, unsafe_allow_html=True)
-                        if fused.fired_rules:
-                            st.markdown("##### Fired Fuzzy Rules")
-                            for rule in fused.fired_rules[:5]:
-                                st.markdown(
-                                    '<span style="color: #6B7B9D; font-size: 12px;">• {}</span>'.format(rule),
-                                    unsafe_allow_html=True,
-                                )
-                    except Exception as e:
-                        st.warning(f"Fusion error: {e}")
-            except ImportError:
-                st.info("Speech-to-text not available. Voice emotion analyzed from audio signal only.")
-            except Exception as e:
-                st.warning(f"Transcription error: {e}")
-        except Exception as e:
-            st.warning(f"Voice analysis error: {e}")
-    else:
-        # No audio — just fuse face + posture
-        st.divider()
-        st.markdown("### 🔮 Fused Result — Mamdani Fuzzy Logic")
-        try:
-            from fuzzy_fusion import FuzzyFusionEngine
-            fusion = FuzzyFusionEngine()
-            fused = fusion.fuse(
-                face=face_result,
-                voice=None,
-                text=None,
-                posture=posture_result,
             )
-            render_emotion_bubble(fused.dominant, fused.dominant_score)
-            fused_scores = {s.label: s.score for s in fused.scores}
-            render_emotion_bars(fused_scores)
-        except Exception as e:
-            st.warning(f"Fusion error: {e}")
-        st.info("💡 Record audio above for voice emotion + speech-to-text analysis.")
 def _show_video_processing(processor, start):
@@ -894,7 +954,7 @@ def _show_video_processing(processor, start):
         '<span style="font-size: 42px;">⚙️</span>'
         '<h3 style="margin: 8px 0;">Analyzing Video...</h3>'
         '<p style="color: #6B7B9D; font-size: 13px;">'
-        'Processing all 4 modalities (face, voice, text, posture) with Mamdani fuzzy fusion.</p>'
         '</div>',
         unsafe_allow_html=True,
     )

   1. Landing page with email registration
   2. 6-digit code verification
   3. Demo: Upload video OR camera+mic quick capture
+  4. Full multimodal analysis (face, voice, speech, posture)
   5. Session report with emotion timeline
   6. Trial-ended screen with contact info
 """
 except ImportError:
     HAS_PIL = False
+try:
+    from streamlit_webrtc import webrtc_streamer, WebRtcMode
+    import av
+    HAS_WEBRTC = True
+except ImportError:
+    HAS_WEBRTC = False
 from models import EmotionLabel, EMOTION_LABELS, CulturalRegion, EMOTION_EMOJI
 from face_detector import FaceEmotionDetector
 from text_detector import TextEmotionDetector
     remaining = get_remaining_seconds(start)
     render_countdown_bar(remaining)
     # Load the live processor
     processor = load_live_processor()
     with st.sidebar:
         st.markdown("### EmoSphere Demo")
+        mins = int(remaining) // 60
+        secs = int(remaining) % 60
+        st.markdown("**Time remaining:** {}:{:02d}".format(mins, secs))
         st.divider()
         st.markdown("### How it works")
         st.markdown(
+            "1. Click **START** on the video stream\n"
+            "2. All 4 modalities analyzed in real-time\n"
             "3. Fused with **Mamdani fuzzy logic**\n"
+            "4. Click **Stop & View Report** when done"
         )
         st.divider()
         st.markdown("### Modalities")
         st.markdown(
             "🧑 **Face** — ViT expression  \n"
             "🎙 **Voice** — Wav2Vec2 prosody  \n"
+            "💬 **Text** — DistilRoBERTa NLP  \n"
             "🧍 **Posture** — MediaPipe pose"
         )
         '<div style="text-align:center;">'
         '<img src="https://caitcore.com/images/emosphere-logo.png" '
         'style="width:80px; height:80px; border-radius:12px; margin-bottom:8px;" />'
+        '<h2 style="margin:0;">EmoSphere — Live Emotion Analysis</h2>'
         '<p style="color:#6B7B9D; font-size:14px; margin-top:4px;">'
+        'Multimodal AI emotion detection with fuzzy fusion '
+        '— face, voice, speech &amp; posture</p>'
         '</div>',
         unsafe_allow_html=True,
     )
     if st.session_state.get("show_report"):
         from session_report import render_session_report
         render_session_report(processor)
+        if st.button("⬅ New Session", use_container_width=True):
             st.session_state.show_report = False
             st.rerun()
         return
         _show_video_processing(processor, start)
         return
+    # ── Primary: Live Streaming ──────────────────────────────────────
+    if HAS_WEBRTC:
+        _show_live_session(processor, remaining, start)
+    else:
+        st.warning("Live streaming requires streamlit-webrtc. Use video upload below.")
+    # ── Secondary: Video Upload ──────────────────────────────────────
+    st.divider()
+    st.markdown(
+        '<div style="text-align:center;">'
+        '<h3 style="margin: 0;">Or Upload a Video</h3>'
+        '<p style="color: #6B7B9D; font-size: 13px;">Upload a short video (MP4, max 60s) for full multimodal analysis.</p>'
+        '</div>',
+        unsafe_allow_html=True,
+    )
+    uploaded_video = st.file_uploader(
+        "Choose video file",
+        type=["mp4", "webm", "avi", "mov", "mkv"],
+        key="video_upload",
+        label_visibility="collapsed",
+    )
+    if uploaded_video is not None:
+        st.video(uploaded_video)
+        if st.button("🔍 Analyze Video", type="primary", use_container_width=True):
+            video_bytes = uploaded_video.read()
+            st.session_state.video_bytes = video_bytes
+            st.session_state.video_processing = True
+            st.rerun()
+    # Check trial expiry
+    if is_trial_expired(start):
+        if processor.is_active:
+            processor.stop_session()
+        st.session_state.auth_stage = "ended"
+        st.rerun()
+def _show_live_session(processor, remaining, start):
+    """Live session using streamlit-webrtc. Only ONE Start button (WebRTC's built-in)."""
+    # ── Stop button + timer (above the stream) ──────────────────────
+    col_stop, col_timer, col_report = st.columns([1, 1, 1])
+    with col_stop:
+        if processor.is_active:
+            if st.button("⏹ Stop Session", type="primary", use_container_width=True):
+                processor.stop_session()
+                st.session_state.show_report = True
                 st.rerun()
+    with col_timer:
+        if processor.is_active:
+            elapsed = processor.elapsed_seconds
+            session_remaining = max(0, 60 - elapsed)
+            st.markdown(
+                '<div style="text-align: center; padding: 8px;">'
+                '<span style="color: #FF4444; font-size: 14px; font-weight: 700;">● LIVE</span>'
+                '<span style="color: #00D4FF; margin-left: 12px; font-weight: 700; font-size: 16px;">'
+                '{:.0f}s remaining</span>'
+                '</div>'.format(session_remaining),
+                unsafe_allow_html=True,
+            )
+        else:
+            st.markdown(
+                '<div style="text-align: center; padding: 8px; color: #6B7B9D;">'
+                'Click <strong>START</strong> on the video to begin'
+                '</div>',
+                unsafe_allow_html=True,
+            )
+    with col_report:
+        if processor.is_active:
+            if st.button("📊 View Report", use_container_width=True):
+                processor.stop_session()
+                st.session_state.show_report = True
+                st.rerun()
+    # ── WebRTC Stream + Results side by side ─────────────────────────
+    col_video, col_results = st.columns([1, 1])
+    with col_video:
+        # The WebRTC component provides its own START/STOP button
+        webrtc_ctx = webrtc_streamer(
+            key="emosphere-live",
+            mode=WebRtcMode.SENDRECV,
+            video_frame_callback=processor.video_frame_callback,
+            audio_frame_callback=processor.audio_frame_callback,
+            media_stream_constraints={
+                "video": {"width": {"ideal": 640}, "height": {"ideal": 480}},
+                "audio": True,
+            },
+            rtc_configuration={
+                "iceServers": [
+                    {"urls": ["stun:stun.l.google.com:19302"]},
+                    {"urls": ["stun:stun1.l.google.com:19302"]},
+                    {"urls": ["stun:stun2.l.google.com:19302"]},
+                    {"urls": ["stun:stun3.l.google.com:19302"]},
+                    {"urls": ["stun:stun4.l.google.com:19302"]},
+                    {
+                        "urls": [
+                            "turn:openrelay.metered.ca:80",
+                            "turn:openrelay.metered.ca:80?transport=tcp",
+                            "turn:openrelay.metered.ca:443",
+                            "turns:openrelay.metered.ca:443",
+                        ],
+                        "username": "openrelayproject",
+                        "credential": "openrelayproject",
+                    },
+                ]
+            },
+            async_processing=True,
         )
+        # Auto-start processing when WebRTC connects
+        if webrtc_ctx.state.playing and not processor.is_active:
+            processor.start_session()
             st.rerun()
+        # Auto-stop when WebRTC disconnects
+        if not webrtc_ctx.state.playing and processor.is_active:
+            processor.stop_session()
+            st.session_state.show_report = True
+            st.rerun()
+        # Auto-stop after 60 seconds of session
+        if processor.is_active and processor.elapsed_seconds >= 60:
             processor.stop_session()
+            st.session_state.show_report = True
+            st.rerun()
+    with col_results:
+        if processor.is_active:
+            _render_live_results(processor)
+        else:
+            st.markdown(
+                '<div class="glass-card" style="text-align: center; padding: 40px;">'
+                '<span style="font-size: 48px;">🎬</span>'
+                '<h3 style="margin: 12px 0 8px; color: #B0BCD0 !important;">Ready to Stream</h3>'
+                '<p style="color: #6B7B9D; margin: 0; font-size: 13px;">'
+                'Click the <strong>START</strong> button on the left to begin '
+                'your 60-second live emotion analysis session.</p>'
+                '<div style="margin-top: 16px; padding: 12px; background: rgba(0,212,255,0.06); '
+                'border-radius: 8px; border: 1px solid rgba(0,212,255,0.15);">'
+                '<p style="color: #00D4FF; font-size: 12px; margin: 0;">'
+                '🧑 Face &bull; 🎙 Voice &bull; 💬 Speech &bull; 🧍 Posture<br/>'
+                'All fused with fuzzy logic in real-time.</p>'
+                '</div>'
+                '</div>',
+                unsafe_allow_html=True,
+            )
+    # Refresh while active to update results
+    if processor.is_active:
+        _schedule_rerun_fast()
+@st.fragment(run_every=1.5)
+def _render_live_results(processor):
+    """Auto-updating display of live emotion results."""
+    fused = processor.get_latest_fused()
+    face = processor.get_latest_face()
+    voice = processor.get_latest_voice()
+    text = processor.get_latest_text()
+    posture = processor.get_latest_posture()
+    stats = processor.get_stats()
+    transcript = processor.get_transcript()
+    topics = processor.get_topics()
+    if fused is None:
+        st.markdown(
+            '<div class="glass-card" style="text-align: center; padding: 20px;">'
+            '<span style="font-size: 36px;">🔮</span>'
+            '<p style="color: #6B7B9D; margin-top: 8px;">'
+            'Analyzing... Speak, move, or express yourself.</p>'
+            '</div>',
+            unsafe_allow_html=True,
+        )
+        return
+    # Dominant emotion bubble
+    render_emotion_bubble(fused.dominant, fused.dominant_score)
+    # Fused emotion bars
+    fused_scores = {s.label: s.score for s in fused.scores}
+    render_emotion_bars(fused_scores)
+    # Modality signals
+    st.markdown("#### Modality Signals")
+    mod_data = [
+        ("🧑 Face", face),
+        ("🎙 Voice", voice),
+        ("💬 Text", text),
+        ("🧍 Posture", posture),
+    ]
+    mod_colors = ["#E948A0", "#FFD700", "#00D4FF", "#10B981"]
+    for (mod_label, mod_result), color in zip(mod_data, mod_colors):
+        if mod_result is not None:
+            dom = mod_result.dominant
+            emoji = EMOTION_EMOJI.get(dom, "")
+            conf = mod_result.confidence * 100
+            st.markdown(
+                '<div style="display: flex; align-items: center; margin: 4px 0; font-size: 13px;">'
+                '<span style="width: 100px; flex-shrink: 0;">{}</span>'
+                '<span style="font-size: 16px; margin-right: 6px;">{}</span>'
+                '<span style="color: {}; font-weight: 600; width: 70px;">{}</span>'
+                '<div style="flex: 1; background: rgba(255,255,255,0.05); border-radius: 4px; height: 8px; overflow: hidden;">'
+                '<div style="width: {:.0f}%; height: 100%; background: {}; border-radius: 4px;"></div>'
+                '</div>'
+                '<span style="color: #6B7B9D; margin-left: 8px; font-size: 11px;">{:.0f}%</span>'
+                '</div>'.format(mod_label, emoji, color, dom.value, conf, color, conf),
+                unsafe_allow_html=True,
+            )
+        else:
+            st.markdown(
+                '<div style="display: flex; align-items: center; margin: 4px 0; font-size: 13px;">'
+                '<span style="width: 100px; flex-shrink: 0;">{}</span>'
+                '<span style="color: #6B7B9D; font-style: italic;">waiting...</span>'
+                '</div>'.format(mod_label),
+                unsafe_allow_html=True,
+            )
+    # Live transcript
+    if transcript:
+        st.markdown("#### Live Transcript")
+        recent = transcript[-5:]
+        html_parts = ['<div class="glass-card" style="max-height: 180px; overflow-y: auto; padding: 10px;">']
+        for seg in recent:
+            emoji = EMOTION_EMOJI.get(seg.emotion, "") if seg.emotion else ""
+            mins = int(seg.timestamp) // 60
+            secs = int(seg.timestamp) % 60
+            html_parts.append(
+                '<div style="padding: 4px 0; border-bottom: 1px solid rgba(255,255,255,0.05); font-size: 13px;">'
+                '<span style="color: #6B7B9D;">{}:{:02d}</span> '
+                '<span>{}</span> '
+                '<span style="color: #B0BCD0;">{}</span>'
+                '</div>'.format(mins, secs, emoji, seg.text)
             )
+        html_parts.append('</div>')
+        st.markdown("".join(html_parts), unsafe_allow_html=True)
+    # Topics
+    if topics:
+        topic_html = " ".join(
+            '<span style="display: inline-block; background: rgba(0,212,255,0.12); '
+            'border: 1px solid rgba(0,212,255,0.25); border-radius: 16px; '
+            'padding: 2px 10px; margin: 2px; font-size: 11px; color: #00D4FF;">{}</span>'.format(
+                t.replace("_", " ").title()
+            )
+            for t in topics
+        )
+        st.markdown(
+            '<div style="margin-top: 8px;">'
+            '<span style="color: #6B7B9D; font-size: 12px; font-weight: 600;">Topics: </span>'
+            '{}</div>'.format(topic_html),
+            unsafe_allow_html=True,
+        )
+    # Stats
+    st.markdown(
+        '<div style="color: #6B7B9D; font-size: 11px; margin-top: 8px; text-align: right;">'
+        'Frames: {} &bull; Audio: {} &bull; Transcript: {}'
+        '</div>'.format(
+            stats.get("video_frames", 0),
+            stats.get("audio_chunks", 0),
+            stats.get("transcript_segments", 0),
+        ),
+        unsafe_allow_html=True,
+    )
+def _schedule_rerun_fast():
+    """Schedule a fast rerun to keep live results updating."""
+    try:
+        import streamlit.components.v1 as components
+        components.html(
+            '<script>setTimeout(function() { window.location.reload(); }, 2000);</script>',
+            height=0,
+        )
+    except Exception:
+        pass
 def _show_video_processing(processor, start):
         '<span style="font-size: 42px;">⚙️</span>'
         '<h3 style="margin: 8px 0;">Analyzing Video...</h3>'
         '<p style="color: #6B7B9D; font-size: 13px;">'
+        'Processing all 4 modalities (face, voice, speech, posture) with fuzzy fusion.</p>'
         '</div>',
         unsafe_allow_html=True,
     )