chariscait commited on
Commit
ca849a5
·
verified ·
1 Parent(s): fabef15

Update app.py - live streaming primary, stop button, remaining time, updated description

Browse files
Files changed (1) hide show
  1. app.py +280 -220
app.py CHANGED
@@ -8,7 +8,7 @@ Flow:
8
  1. Landing page with email registration
9
  2. 6-digit code verification
10
  3. Demo: Upload video OR camera+mic quick capture
11
- 4. Full multimodal analysis (face, voice, text, posture)
12
  5. Session report with emotion timeline
13
  6. Trial-ended screen with contact info
14
  """
@@ -36,6 +36,13 @@ try:
36
  except ImportError:
37
  HAS_PIL = False
38
 
 
 
 
 
 
 
 
39
  from models import EmotionLabel, EMOTION_LABELS, CulturalRegion, EMOTION_EMOJI
40
  from face_detector import FaceEmotionDetector
41
  from text_detector import TextEmotionDetector
@@ -596,28 +603,29 @@ def show_demo():
596
  remaining = get_remaining_seconds(start)
597
 
598
  render_countdown_bar(remaining)
599
- _schedule_rerun(remaining)
600
 
601
  # Load the live processor
602
  processor = load_live_processor()
603
 
604
  with st.sidebar:
605
  st.markdown("### EmoSphere Demo")
606
- st.markdown("*Session: {}s remaining*".format(int(remaining)))
 
 
607
  st.divider()
608
  st.markdown("### How it works")
609
  st.markdown(
610
- "1. **Upload a video** or **record with camera + mic**\n"
611
- "2. All 4 modalities are analyzed\n"
612
  "3. Fused with **Mamdani fuzzy logic**\n"
613
- "4. View full emotion report"
614
  )
615
  st.divider()
616
  st.markdown("### Modalities")
617
  st.markdown(
618
  "🧑 **Face** — ViT expression \n"
619
  "🎙 **Voice** — Wav2Vec2 prosody \n"
620
- "💬 **Text** — DistilRoBERTa \n"
621
  "🧍 **Posture** — MediaPipe pose"
622
  )
623
 
@@ -626,10 +634,10 @@ def show_demo():
626
  '<div style="text-align:center;">'
627
  '<img src="https://caitcore.com/images/emosphere-logo.png" '
628
  'style="width:80px; height:80px; border-radius:12px; margin-bottom:8px;" />'
629
- '<h2 style="margin:0;">EmoSphere — Emotion Analysis</h2>'
630
  '<p style="color:#6B7B9D; font-size:14px; margin-top:4px;">'
631
- 'Multimodal emotion detection with Mamdani fuzzy fusion '
632
- '— face, voice, text &amp; posture</p>'
633
  '</div>',
634
  unsafe_allow_html=True,
635
  )
@@ -638,7 +646,7 @@ def show_demo():
638
  if st.session_state.get("show_report"):
639
  from session_report import render_session_report
640
  render_session_report(processor)
641
- if st.button("⬅ New Analysis", use_container_width=True):
642
  st.session_state.show_report = False
643
  st.rerun()
644
  return
@@ -648,237 +656,289 @@ def show_demo():
648
  _show_video_processing(processor, start)
649
  return
650
 
651
- # ── Two options side by side ─────────────────────────────────────
 
 
 
 
652
 
653
- col_upload, col_capture = st.columns(2)
 
 
 
 
 
 
 
 
654
 
655
- # ── Option 1: Upload Video ───────────────────────────────────────
656
- with col_upload:
657
- st.markdown(
658
- '<div class="glass-card" style="text-align: center; padding: 20px; min-height: 120px;">'
659
- '<span style="font-size: 36px;">🎬</span>'
660
- '<h3 style="margin: 6px 0 4px;">Upload Video</h3>'
661
- '<p style="color: #6B7B9D; font-size: 12px; margin: 0;">'
662
- 'Upload a short video (MP4, max 60s).<br/>'
663
- 'Full 4-modality analysis + report.</p>'
664
- '</div>',
665
- unsafe_allow_html=True,
666
- )
667
 
668
- uploaded_video = st.file_uploader(
669
- "Choose video file",
670
- type=["mp4", "webm", "avi", "mov", "mkv"],
671
- key="video_upload",
672
- label_visibility="collapsed",
673
- )
 
674
 
675
- if uploaded_video is not None:
676
- st.video(uploaded_video)
677
- if st.button("🔍 Analyze Video", type="primary", use_container_width=True):
678
- video_bytes = uploaded_video.read()
679
- st.session_state.video_bytes = video_bytes
680
- st.session_state.video_processing = True
 
 
 
 
 
 
 
 
 
 
 
 
 
681
  st.rerun()
682
 
683
- # ── Option 2: Camera + Mic Capture ───────────────────────────────
684
- with col_capture:
685
- st.markdown(
686
- '<div class="glass-card" style="text-align: center; padding: 20px; min-height: 120px;">'
687
- '<span style="font-size: 36px;">📸</span>'
688
- '<h3 style="margin: 6px 0 4px;">Quick Capture</h3>'
689
- '<p style="color: #6B7B9D; font-size: 12px; margin: 0;">'
690
- 'Snap a photo + record audio.<br/>'
691
- 'Instant face, voice &amp; posture analysis.</p>'
692
- '</div>',
693
- unsafe_allow_html=True,
694
- )
 
 
 
 
 
 
 
695
 
696
- camera_photo = st.camera_input(
697
- "Take a photo",
698
- key="camera_capture",
699
- label_visibility="collapsed",
700
- )
 
701
 
702
- audio_recording = st.audio_input(
703
- "Record audio (optional)",
704
- key="audio_capture",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705
  )
706
 
707
- # ── Process Camera + Audio if captured ───────────────────────────
708
- if camera_photo is not None:
709
- if is_trial_expired(start):
710
- st.session_state.auth_stage = "ended"
711
  st.rerun()
712
- return
713
 
714
- _process_quick_capture(processor, camera_photo, audio_recording)
 
 
 
 
715
 
716
- # Check trial expiry
717
- if is_trial_expired(start):
718
- if processor.is_active:
719
  processor.stop_session()
720
- st.session_state.auth_stage = "ended"
721
- st.rerun()
722
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
723
 
724
- def _process_quick_capture(processor, camera_photo, audio_recording):
725
- """Process a camera snapshot + optional audio recording."""
 
726
 
727
- st.divider()
728
- st.markdown("### Analysis Results")
729
 
730
- image_data = camera_photo.getvalue()
731
- face_det = load_face_detector()
732
- posture_det = load_posture_detector()
 
 
 
 
 
 
 
 
733
 
734
- col_img, col_results = st.columns([1, 1])
 
 
 
 
 
 
 
 
 
735
 
736
- with col_img:
737
- st.image(image_data, caption="Captured", use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
738
 
739
- with col_results:
740
- with st.spinner("Analyzing face & posture..."):
741
- face_result = face_det.detect(image_data)
742
- posture_result = posture_det.detect(image_data)
743
-
744
- # Face result
745
- st.markdown("#### 🧑 Face Expression")
746
- render_emotion_bubble(face_result.dominant, face_result.dominant_score)
747
- face_scores = {s.label: s.score for s in face_result.scores}
748
- render_emotion_bars(face_scores)
749
-
750
- # Posture result
751
- st.markdown("#### 🧍 Posture Signal")
752
- render_emotion_bubble(posture_result.dominant, posture_result.dominant_score)
753
- p_scores = {s.label: s.score for s in posture_result.scores}
754
- render_emotion_bars(p_scores)
755
-
756
- # Process audio if available
757
- if audio_recording is not None:
758
- st.markdown("#### 🎙 Voice Analysis")
759
- audio_bytes = audio_recording.getvalue()
760
-
761
- try:
762
- from voice_detector import VoiceEmotionDetector
763
- voice_det = VoiceEmotionDetector()
764
- voice_det.load()
765
-
766
- with st.spinner("Analyzing voice emotion..."):
767
- voice_result = voice_det.detect(audio_bytes)
768
-
769
- vcol1, vcol2 = st.columns([1, 1])
770
- with vcol1:
771
- render_emotion_bubble(voice_result.dominant, voice_result.dominant_score)
772
- with vcol2:
773
- v_scores = {s.label: s.score for s in voice_result.scores}
774
- render_emotion_bars(v_scores)
775
-
776
- # Try STT for text emotion
777
- try:
778
- from faster_whisper import WhisperModel
779
- import tempfile, os
780
-
781
- with st.spinner("Transcribing speech..."):
782
- # Write audio to temp file
783
- tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
784
- tmp.write(audio_bytes)
785
- tmp.flush()
786
- tmp_path = tmp.name
787
- tmp.close()
788
-
789
- model = WhisperModel("tiny.en", device="cpu", compute_type="int8")
790
- segments, _info = model.transcribe(tmp_path, language="en", beam_size=1)
791
- text_parts = [seg.text.strip() for seg in segments if seg.text.strip()]
792
- transcript = " ".join(text_parts)
793
-
794
- os.unlink(tmp_path)
795
-
796
- if transcript:
797
- st.markdown("#### 💬 Speech Transcript")
798
- st.markdown(
799
- '<div class="glass-card" style="padding: 12px;">'
800
- '<p style="color: #B0BCD0; font-size: 14px; margin: 0;">'
801
- '"<em>{}</em>"</p></div>'.format(transcript),
802
- unsafe_allow_html=True,
803
- )
804
-
805
- text_det = load_text_detector()
806
- text_result = text_det.detect(transcript)
807
- st.markdown("#### 💬 Text Emotion")
808
- tcol1, tcol2 = st.columns([1, 1])
809
- with tcol1:
810
- render_emotion_bubble(text_result.dominant, text_result.dominant_score)
811
- with tcol2:
812
- t_scores = {s.label: s.score for s in text_result.scores}
813
- render_emotion_bars(t_scores)
814
-
815
- # Fuzzy fusion of all modalities
816
- st.divider()
817
- st.markdown("### 🔮 Fused Result — Mamdani Fuzzy Logic")
818
- try:
819
- from fuzzy_fusion import FuzzyFusionEngine
820
- fusion = FuzzyFusionEngine()
821
- fused = fusion.fuse(
822
- face=face_result,
823
- voice=voice_result,
824
- text=text_result,
825
- posture=posture_result,
826
- )
827
- render_emotion_bubble(fused.dominant, fused.dominant_score)
828
- fused_scores = {s.label: s.score for s in fused.scores}
829
- render_emotion_bars(fused_scores)
830
-
831
- # Show modality weights
832
- st.markdown("##### Modality Weights")
833
- weight_html = ""
834
- weight_colors = {"face": "#E948A0", "voice": "#FFD700", "text": "#00D4FF", "posture": "#10B981"}
835
- for mod, w in fused.modality_weights.items():
836
- color = weight_colors.get(mod, "#94A3B8")
837
- weight_html += (
838
- '<span style="display:inline-block; margin:4px; padding:4px 12px; '
839
- 'background:{}20; border:1px solid {}40; border-radius:12px; '
840
- 'color:{}; font-size:12px; font-weight:600;">'
841
- '{}: {:.0f}%</span>'.format(color, color, color, mod.title(), w * 100)
842
- )
843
- st.markdown(weight_html, unsafe_allow_html=True)
844
-
845
- if fused.fired_rules:
846
- st.markdown("##### Fired Fuzzy Rules")
847
- for rule in fused.fired_rules[:5]:
848
- st.markdown(
849
- '<span style="color: #6B7B9D; font-size: 12px;">• {}</span>'.format(rule),
850
- unsafe_allow_html=True,
851
- )
852
- except Exception as e:
853
- st.warning(f"Fusion error: {e}")
854
-
855
- except ImportError:
856
- st.info("Speech-to-text not available. Voice emotion analyzed from audio signal only.")
857
- except Exception as e:
858
- st.warning(f"Transcription error: {e}")
859
-
860
- except Exception as e:
861
- st.warning(f"Voice analysis error: {e}")
862
- else:
863
- # No audio — just fuse face + posture
864
- st.divider()
865
- st.markdown("### 🔮 Fused Result — Mamdani Fuzzy Logic")
866
- try:
867
- from fuzzy_fusion import FuzzyFusionEngine
868
- fusion = FuzzyFusionEngine()
869
- fused = fusion.fuse(
870
- face=face_result,
871
- voice=None,
872
- text=None,
873
- posture=posture_result,
874
  )
875
- render_emotion_bubble(fused.dominant, fused.dominant_score)
876
- fused_scores = {s.label: s.score for s in fused.scores}
877
- render_emotion_bars(fused_scores)
878
- except Exception as e:
879
- st.warning(f"Fusion error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
880
 
881
- st.info("💡 Record audio above for voice emotion + speech-to-text analysis.")
 
 
 
 
 
 
 
 
 
882
 
883
 
884
  def _show_video_processing(processor, start):
@@ -894,7 +954,7 @@ def _show_video_processing(processor, start):
894
  '<span style="font-size: 42px;">⚙️</span>'
895
  '<h3 style="margin: 8px 0;">Analyzing Video...</h3>'
896
  '<p style="color: #6B7B9D; font-size: 13px;">'
897
- 'Processing all 4 modalities (face, voice, text, posture) with Mamdani fuzzy fusion.</p>'
898
  '</div>',
899
  unsafe_allow_html=True,
900
  )
 
8
  1. Landing page with email registration
9
  2. 6-digit code verification
10
  3. Demo: Upload video OR camera+mic quick capture
11
+ 4. Full multimodal analysis (face, voice, speech, posture)
12
  5. Session report with emotion timeline
13
  6. Trial-ended screen with contact info
14
  """
 
36
  except ImportError:
37
  HAS_PIL = False
38
 
39
+ try:
40
+ from streamlit_webrtc import webrtc_streamer, WebRtcMode
41
+ import av
42
+ HAS_WEBRTC = True
43
+ except ImportError:
44
+ HAS_WEBRTC = False
45
+
46
  from models import EmotionLabel, EMOTION_LABELS, CulturalRegion, EMOTION_EMOJI
47
  from face_detector import FaceEmotionDetector
48
  from text_detector import TextEmotionDetector
 
603
  remaining = get_remaining_seconds(start)
604
 
605
  render_countdown_bar(remaining)
 
606
 
607
  # Load the live processor
608
  processor = load_live_processor()
609
 
610
  with st.sidebar:
611
  st.markdown("### EmoSphere Demo")
612
+ mins = int(remaining) // 60
613
+ secs = int(remaining) % 60
614
+ st.markdown("**Time remaining:** {}:{:02d}".format(mins, secs))
615
  st.divider()
616
  st.markdown("### How it works")
617
  st.markdown(
618
+ "1. Click **START** on the video stream\n"
619
+ "2. All 4 modalities analyzed in real-time\n"
620
  "3. Fused with **Mamdani fuzzy logic**\n"
621
+ "4. Click **Stop & View Report** when done"
622
  )
623
  st.divider()
624
  st.markdown("### Modalities")
625
  st.markdown(
626
  "🧑 **Face** — ViT expression \n"
627
  "🎙 **Voice** — Wav2Vec2 prosody \n"
628
+ "💬 **Text** — DistilRoBERTa NLP \n"
629
  "🧍 **Posture** — MediaPipe pose"
630
  )
631
 
 
634
  '<div style="text-align:center;">'
635
  '<img src="https://caitcore.com/images/emosphere-logo.png" '
636
  'style="width:80px; height:80px; border-radius:12px; margin-bottom:8px;" />'
637
+ '<h2 style="margin:0;">EmoSphere — Live Emotion Analysis</h2>'
638
  '<p style="color:#6B7B9D; font-size:14px; margin-top:4px;">'
639
+ 'Multimodal AI emotion detection with fuzzy fusion '
640
+ '— face, voice, speech &amp; posture</p>'
641
  '</div>',
642
  unsafe_allow_html=True,
643
  )
 
646
  if st.session_state.get("show_report"):
647
  from session_report import render_session_report
648
  render_session_report(processor)
649
+ if st.button("⬅ New Session", use_container_width=True):
650
  st.session_state.show_report = False
651
  st.rerun()
652
  return
 
656
  _show_video_processing(processor, start)
657
  return
658
 
659
+ # ── Primary: Live Streaming ─────────────────────────────────────
660
+ if HAS_WEBRTC:
661
+ _show_live_session(processor, remaining, start)
662
+ else:
663
+ st.warning("Live streaming requires streamlit-webrtc. Use video upload below.")
664
 
665
+ # ── Secondary: Video Upload ──────────────────────────────────────
666
+ st.divider()
667
+ st.markdown(
668
+ '<div style="text-align:center;">'
669
+ '<h3 style="margin: 0;">Or Upload a Video</h3>'
670
+ '<p style="color: #6B7B9D; font-size: 13px;">Upload a short video (MP4, max 60s) for full multimodal analysis.</p>'
671
+ '</div>',
672
+ unsafe_allow_html=True,
673
+ )
674
 
675
+ uploaded_video = st.file_uploader(
676
+ "Choose video file",
677
+ type=["mp4", "webm", "avi", "mov", "mkv"],
678
+ key="video_upload",
679
+ label_visibility="collapsed",
680
+ )
 
 
 
 
 
 
681
 
682
+ if uploaded_video is not None:
683
+ st.video(uploaded_video)
684
+ if st.button("🔍 Analyze Video", type="primary", use_container_width=True):
685
+ video_bytes = uploaded_video.read()
686
+ st.session_state.video_bytes = video_bytes
687
+ st.session_state.video_processing = True
688
+ st.rerun()
689
 
690
+ # Check trial expiry
691
+ if is_trial_expired(start):
692
+ if processor.is_active:
693
+ processor.stop_session()
694
+ st.session_state.auth_stage = "ended"
695
+ st.rerun()
696
+
697
+
698
+ def _show_live_session(processor, remaining, start):
699
+ """Live session using streamlit-webrtc. Only ONE Start button (WebRTC's built-in)."""
700
+
701
+ # ── Stop button + timer (above the stream) ──────────────────────
702
+ col_stop, col_timer, col_report = st.columns([1, 1, 1])
703
+
704
+ with col_stop:
705
+ if processor.is_active:
706
+ if st.button("⏹ Stop Session", type="primary", use_container_width=True):
707
+ processor.stop_session()
708
+ st.session_state.show_report = True
709
  st.rerun()
710
 
711
+ with col_timer:
712
+ if processor.is_active:
713
+ elapsed = processor.elapsed_seconds
714
+ session_remaining = max(0, 60 - elapsed)
715
+ st.markdown(
716
+ '<div style="text-align: center; padding: 8px;">'
717
+ '<span style="color: #FF4444; font-size: 14px; font-weight: 700;">● LIVE</span>'
718
+ '<span style="color: #00D4FF; margin-left: 12px; font-weight: 700; font-size: 16px;">'
719
+ '{:.0f}s remaining</span>'
720
+ '</div>'.format(session_remaining),
721
+ unsafe_allow_html=True,
722
+ )
723
+ else:
724
+ st.markdown(
725
+ '<div style="text-align: center; padding: 8px; color: #6B7B9D;">'
726
+ 'Click <strong>START</strong> on the video to begin'
727
+ '</div>',
728
+ unsafe_allow_html=True,
729
+ )
730
 
731
+ with col_report:
732
+ if processor.is_active:
733
+ if st.button("📊 View Report", use_container_width=True):
734
+ processor.stop_session()
735
+ st.session_state.show_report = True
736
+ st.rerun()
737
 
738
+ # ── WebRTC Stream + Results side by side ─────────────────────────
739
+ col_video, col_results = st.columns([1, 1])
740
+
741
+ with col_video:
742
+ # The WebRTC component provides its own START/STOP button
743
+ webrtc_ctx = webrtc_streamer(
744
+ key="emosphere-live",
745
+ mode=WebRtcMode.SENDRECV,
746
+ video_frame_callback=processor.video_frame_callback,
747
+ audio_frame_callback=processor.audio_frame_callback,
748
+ media_stream_constraints={
749
+ "video": {"width": {"ideal": 640}, "height": {"ideal": 480}},
750
+ "audio": True,
751
+ },
752
+ rtc_configuration={
753
+ "iceServers": [
754
+ {"urls": ["stun:stun.l.google.com:19302"]},
755
+ {"urls": ["stun:stun1.l.google.com:19302"]},
756
+ {"urls": ["stun:stun2.l.google.com:19302"]},
757
+ {"urls": ["stun:stun3.l.google.com:19302"]},
758
+ {"urls": ["stun:stun4.l.google.com:19302"]},
759
+ {
760
+ "urls": [
761
+ "turn:openrelay.metered.ca:80",
762
+ "turn:openrelay.metered.ca:80?transport=tcp",
763
+ "turn:openrelay.metered.ca:443",
764
+ "turns:openrelay.metered.ca:443",
765
+ ],
766
+ "username": "openrelayproject",
767
+ "credential": "openrelayproject",
768
+ },
769
+ ]
770
+ },
771
+ async_processing=True,
772
  )
773
 
774
+ # Auto-start processing when WebRTC connects
775
+ if webrtc_ctx.state.playing and not processor.is_active:
776
+ processor.start_session()
 
777
  st.rerun()
 
778
 
779
+ # Auto-stop when WebRTC disconnects
780
+ if not webrtc_ctx.state.playing and processor.is_active:
781
+ processor.stop_session()
782
+ st.session_state.show_report = True
783
+ st.rerun()
784
 
785
+ # Auto-stop after 60 seconds of session
786
+ if processor.is_active and processor.elapsed_seconds >= 60:
 
787
  processor.stop_session()
788
+ st.session_state.show_report = True
789
+ st.rerun()
790
 
791
+ with col_results:
792
+ if processor.is_active:
793
+ _render_live_results(processor)
794
+ else:
795
+ st.markdown(
796
+ '<div class="glass-card" style="text-align: center; padding: 40px;">'
797
+ '<span style="font-size: 48px;">🎬</span>'
798
+ '<h3 style="margin: 12px 0 8px; color: #B0BCD0 !important;">Ready to Stream</h3>'
799
+ '<p style="color: #6B7B9D; margin: 0; font-size: 13px;">'
800
+ 'Click the <strong>START</strong> button on the left to begin '
801
+ 'your 60-second live emotion analysis session.</p>'
802
+ '<div style="margin-top: 16px; padding: 12px; background: rgba(0,212,255,0.06); '
803
+ 'border-radius: 8px; border: 1px solid rgba(0,212,255,0.15);">'
804
+ '<p style="color: #00D4FF; font-size: 12px; margin: 0;">'
805
+ '🧑 Face &bull; 🎙 Voice &bull; 💬 Speech &bull; 🧍 Posture<br/>'
806
+ 'All fused with fuzzy logic in real-time.</p>'
807
+ '</div>'
808
+ '</div>',
809
+ unsafe_allow_html=True,
810
+ )
811
 
812
+ # Refresh while active to update results
813
+ if processor.is_active:
814
+ _schedule_rerun_fast()
815
 
 
 
816
 
817
+ @st.fragment(run_every=1.5)
818
+ def _render_live_results(processor):
819
+ """Auto-updating display of live emotion results."""
820
+ fused = processor.get_latest_fused()
821
+ face = processor.get_latest_face()
822
+ voice = processor.get_latest_voice()
823
+ text = processor.get_latest_text()
824
+ posture = processor.get_latest_posture()
825
+ stats = processor.get_stats()
826
+ transcript = processor.get_transcript()
827
+ topics = processor.get_topics()
828
 
829
+ if fused is None:
830
+ st.markdown(
831
+ '<div class="glass-card" style="text-align: center; padding: 20px;">'
832
+ '<span style="font-size: 36px;">🔮</span>'
833
+ '<p style="color: #6B7B9D; margin-top: 8px;">'
834
+ 'Analyzing... Speak, move, or express yourself.</p>'
835
+ '</div>',
836
+ unsafe_allow_html=True,
837
+ )
838
+ return
839
 
840
+ # Dominant emotion bubble
841
+ render_emotion_bubble(fused.dominant, fused.dominant_score)
842
+
843
+ # Fused emotion bars
844
+ fused_scores = {s.label: s.score for s in fused.scores}
845
+ render_emotion_bars(fused_scores)
846
+
847
+ # Modality signals
848
+ st.markdown("#### Modality Signals")
849
+ mod_data = [
850
+ ("🧑 Face", face),
851
+ ("🎙 Voice", voice),
852
+ ("💬 Text", text),
853
+ ("🧍 Posture", posture),
854
+ ]
855
+ mod_colors = ["#E948A0", "#FFD700", "#00D4FF", "#10B981"]
856
+
857
+ for (mod_label, mod_result), color in zip(mod_data, mod_colors):
858
+ if mod_result is not None:
859
+ dom = mod_result.dominant
860
+ emoji = EMOTION_EMOJI.get(dom, "")
861
+ conf = mod_result.confidence * 100
862
+ st.markdown(
863
+ '<div style="display: flex; align-items: center; margin: 4px 0; font-size: 13px;">'
864
+ '<span style="width: 100px; flex-shrink: 0;">{}</span>'
865
+ '<span style="font-size: 16px; margin-right: 6px;">{}</span>'
866
+ '<span style="color: {}; font-weight: 600; width: 70px;">{}</span>'
867
+ '<div style="flex: 1; background: rgba(255,255,255,0.05); border-radius: 4px; height: 8px; overflow: hidden;">'
868
+ '<div style="width: {:.0f}%; height: 100%; background: {}; border-radius: 4px;"></div>'
869
+ '</div>'
870
+ '<span style="color: #6B7B9D; margin-left: 8px; font-size: 11px;">{:.0f}%</span>'
871
+ '</div>'.format(mod_label, emoji, color, dom.value, conf, color, conf),
872
+ unsafe_allow_html=True,
873
+ )
874
+ else:
875
+ st.markdown(
876
+ '<div style="display: flex; align-items: center; margin: 4px 0; font-size: 13px;">'
877
+ '<span style="width: 100px; flex-shrink: 0;">{}</span>'
878
+ '<span style="color: #6B7B9D; font-style: italic;">waiting...</span>'
879
+ '</div>'.format(mod_label),
880
+ unsafe_allow_html=True,
881
+ )
882
 
883
+ # Live transcript
884
+ if transcript:
885
+ st.markdown("#### Live Transcript")
886
+ recent = transcript[-5:]
887
+ html_parts = ['<div class="glass-card" style="max-height: 180px; overflow-y: auto; padding: 10px;">']
888
+ for seg in recent:
889
+ emoji = EMOTION_EMOJI.get(seg.emotion, "") if seg.emotion else ""
890
+ mins = int(seg.timestamp) // 60
891
+ secs = int(seg.timestamp) % 60
892
+ html_parts.append(
893
+ '<div style="padding: 4px 0; border-bottom: 1px solid rgba(255,255,255,0.05); font-size: 13px;">'
894
+ '<span style="color: #6B7B9D;">{}:{:02d}</span> '
895
+ '<span>{}</span> '
896
+ '<span style="color: #B0BCD0;">{}</span>'
897
+ '</div>'.format(mins, secs, emoji, seg.text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898
  )
899
+ html_parts.append('</div>')
900
+ st.markdown("".join(html_parts), unsafe_allow_html=True)
901
+
902
+ # Topics
903
+ if topics:
904
+ topic_html = " ".join(
905
+ '<span style="display: inline-block; background: rgba(0,212,255,0.12); '
906
+ 'border: 1px solid rgba(0,212,255,0.25); border-radius: 16px; '
907
+ 'padding: 2px 10px; margin: 2px; font-size: 11px; color: #00D4FF;">{}</span>'.format(
908
+ t.replace("_", " ").title()
909
+ )
910
+ for t in topics
911
+ )
912
+ st.markdown(
913
+ '<div style="margin-top: 8px;">'
914
+ '<span style="color: #6B7B9D; font-size: 12px; font-weight: 600;">Topics: </span>'
915
+ '{}</div>'.format(topic_html),
916
+ unsafe_allow_html=True,
917
+ )
918
+
919
+ # Stats
920
+ st.markdown(
921
+ '<div style="color: #6B7B9D; font-size: 11px; margin-top: 8px; text-align: right;">'
922
+ 'Frames: {} &bull; Audio: {} &bull; Transcript: {}'
923
+ '</div>'.format(
924
+ stats.get("video_frames", 0),
925
+ stats.get("audio_chunks", 0),
926
+ stats.get("transcript_segments", 0),
927
+ ),
928
+ unsafe_allow_html=True,
929
+ )
930
+
931
 
932
+ def _schedule_rerun_fast():
933
+ """Schedule a fast rerun to keep live results updating."""
934
+ try:
935
+ import streamlit.components.v1 as components
936
+ components.html(
937
+ '<script>setTimeout(function() { window.location.reload(); }, 2000);</script>',
938
+ height=0,
939
+ )
940
+ except Exception:
941
+ pass
942
 
943
 
944
  def _show_video_processing(processor, start):
 
954
  '<span style="font-size: 42px;">⚙️</span>'
955
  '<h3 style="margin: 8px 0;">Analyzing Video...</h3>'
956
  '<p style="color: #6B7B9D; font-size: 13px;">'
957
+ 'Processing all 4 modalities (face, voice, speech, posture) with fuzzy fusion.</p>'
958
  '</div>',
959
  unsafe_allow_html=True,
960
  )