Spaces:

daasime
/

sop-audio-analyzer

Sleeping

daasime Claude Opus 4.6 commited on Mar 2

Commit

5b529ce

1 Parent(s): 1d50892

Add Trends tab with analytics charts and Coming Soon sections

- Risk Score Over Time: line chart with green/yellow/red zones
- Fraud Type Distribution: stacked bar chart by day
- Voice Reuse Patterns: top 10 voices bar chart with color coding
- KPI summary: total tests, avg risk, high risk %, recurring voices
- Coming Soon: Batch Analysis and Webhook/API Integration with impact

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show

app.py +196 -4
src/database/models.py +101 -0

app.py CHANGED Viewed

@@ -925,6 +925,195 @@ def render_timeline(result):
         st.caption("Play audio while viewing the timeline above to follow speaker changes")
 def render_database_tab():
     """Render the voiceprint database tab."""
@@ -2113,7 +2302,7 @@ def main():
         )
     # Tabs
-    tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["Analyzer", "Compare", "Database", "Logs", "Features", "About"])
     with tab1:
         render_analyzer_tab()
@@ -2122,15 +2311,18 @@ def main():
         render_compare_tab()
     with tab3:
-        render_database_tab()
     with tab4:
-        render_logs_tab()
     with tab5:
-        render_features_tab()
     with tab6:
         render_about_tab()

         st.caption("Play audio while viewing the timeline above to follow speaker changes")
+def render_trends_tab():
+    """Render the Trends tab with analytics charts and Coming Soon sections."""
+    st.markdown("### Trend Analytics")
+    analyzer = get_analyzer()
+    trend_data = analyzer.db.get_trend_data()
+    summary = trend_data['summary']
+    if summary['total'] == 0:
+        st.info("No tests analyzed yet. Analyze some audio to see trends here.")
+        # Still show Coming Soon sections
+        _render_coming_soon_sections()
+        return
+    # ---- KPI Summary ----
+    k1, k2, k3, k4 = st.columns(4)
+    with k1:
+        with st.container(border=True):
+            st.metric("Total Tests", summary['total'])
+    with k2:
+        with st.container(border=True):
+            st.metric("Avg Risk Score", summary['avg_risk'])
+    with k3:
+        with st.container(border=True):
+            st.metric("High Risk %", f"{summary['high_risk_pct']}%")
+    with k4:
+        with st.container(border=True):
+            st.metric("Recurring Voices", summary['recurring'])
+    # ---- Risk Score Over Time ----
+    st.markdown("#### Risk Score Over Time")
+    daily_scores = trend_data['daily_scores']
+    if daily_scores:
+        dates = [d['date'] for d in daily_scores]
+        scores = [d['avg_score'] for d in daily_scores]
+        counts = [d['count'] for d in daily_scores]
+        fig = go.Figure()
+        # Risk zones as background
+        fig.add_hrect(y0=0, y1=30, fillcolor="#22c55e", opacity=0.08, line_width=0)
+        fig.add_hrect(y0=30, y1=60, fillcolor="#eab308", opacity=0.08, line_width=0)
+        fig.add_hrect(y0=60, y1=100, fillcolor="#ef4444", opacity=0.08, line_width=0)
+        # Score line
+        fig.add_trace(go.Scatter(
+            x=dates, y=scores, mode='lines+markers',
+            name='Avg Risk Score',
+            line=dict(color='#3b82f6', width=3),
+            marker=dict(size=8),
+            text=[f"Tests: {c}" for c in counts],
+            hovertemplate='%{x}<br>Avg Risk: %{y}<br>%{text}<extra></extra>',
+        ))
+        # Zone labels
+        fig.add_annotation(x=dates[-1], y=15, text="LOW", showarrow=False,
+                          font=dict(color='#22c55e', size=10), opacity=0.5)
+        fig.add_annotation(x=dates[-1], y=45, text="MEDIUM", showarrow=False,
+                          font=dict(color='#eab308', size=10), opacity=0.5)
+        fig.add_annotation(x=dates[-1], y=80, text="HIGH", showarrow=False,
+                          font=dict(color='#ef4444', size=10), opacity=0.5)
+        fig.update_layout(
+            height=300,
+            yaxis=dict(range=[0, 100], title='Risk Score'),
+            xaxis=dict(title='Date'),
+            margin=dict(l=40, r=20, t=10, b=40),
+            showlegend=False,
+        )
+        st.plotly_chart(fig, use_container_width=True)
+    else:
+        st.caption("Not enough data for risk trend chart.")
+    # ---- Fraud Type Distribution ----
+    st.markdown("#### Fraud Type Distribution")
+    daily_flags = trend_data['daily_flags']
+    if daily_flags:
+        dates = [d['date'] for d in daily_flags]
+        flag_types = {
+            'Synthetic': ('#ef4444', [d['synthetic'] for d in daily_flags]),
+            'Playback': ('#f97316', [d['playback'] for d in daily_flags]),
+            'Reading': ('#eab308', [d['reading'] for d in daily_flags]),
+            'Whispers': ('#a855f7', [d['whispers'] for d in daily_flags]),
+            'Pauses': ('#6b7280', [d['pauses'] for d in daily_flags]),
+            'Wake Words': ('#dc2626', [d['wake_words'] for d in daily_flags]),
+        }
+        fig = go.Figure()
+        for name, (color, values) in flag_types.items():
+            fig.add_trace(go.Bar(
+                x=dates, y=values, name=name,
+                marker_color=color, opacity=0.85,
+            ))
+        fig.update_layout(
+            barmode='stack',
+            height=280,
+            yaxis=dict(title='Tests Flagged'),
+            xaxis=dict(title='Date'),
+            margin=dict(l=40, r=20, t=10, b=40),
+            legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1),
+        )
+        st.plotly_chart(fig, use_container_width=True)
+    else:
+        st.caption("Not enough data for fraud distribution chart.")
+    # ---- Voice Reuse Patterns ----
+    st.markdown("#### Voice Reuse Patterns")
+    top_voices = trend_data['top_voices']
+    if top_voices:
+        labels = [v['label'] for v in top_voices]
+        counts = [v['times_seen'] for v in top_voices]
+        colors = [
+            '#ef4444' if v['flagged'] else '#eab308' if v['times_seen'] >= 2 else '#22c55e'
+            for v in top_voices
+        ]
+        fig = go.Figure()
+        fig.add_trace(go.Bar(
+            x=labels, y=counts,
+            marker_color=colors,
+            text=counts,
+            textposition='auto',
+        ))
+        fig.update_layout(
+            height=250,
+            yaxis=dict(title='Tests Appeared In'),
+            xaxis=dict(title='Voice ID', tickangle=-45),
+            margin=dict(l=40, r=20, t=10, b=80),
+            showlegend=False,
+        )
+        st.plotly_chart(fig, use_container_width=True)
+        st.caption("Red: flagged | Yellow: recurring (2+ tests) | Green: single appearance")
+    else:
+        st.caption("No voiceprints recorded yet.")
+    st.divider()
+    # ---- Coming Soon Sections ----
+    _render_coming_soon_sections()
+def _render_coming_soon_sections():
+    """Render Coming Soon mockups for Batch Analysis and API Integration."""
+    cs1, cs2 = st.columns(2)
+    with cs1:
+        with st.container(border=True):
+            st.markdown(
+                '<span style="background:#3b82f6;color:white;padding:0.2rem 0.6rem;'
+                'border-radius:10px;font-size:0.7rem;font-weight:bold">COMING SOON</span>',
+                unsafe_allow_html=True,
+            )
+            st.markdown("#### Batch Analysis")
+            st.markdown("""
+- Process **20+ audio files** in one upload (ZIP or multi-select)
+- Consolidated **cross-test fraud report** with shared voice detection
+- Automatic **voice matching** across all tests in a batch
+- **Reduce evaluator review time by 80%** for high-volume testing days
+            """)
+            st.markdown(
+                '<span style="color:#6b7280;font-size:0.8rem">'
+                'Impact: Enables evaluators processing 20+ tests/day to run all analyses '
+                'in a single operation instead of one-by-one.</span>',
+                unsafe_allow_html=True,
+            )
+    with cs2:
+        with st.container(border=True):
+            st.markdown(
+                '<span style="background:#3b82f6;color:white;padding:0.2rem 0.6rem;'
+                'border-radius:10px;font-size:0.7rem;font-weight:bold">COMING SOON</span>',
+                unsafe_allow_html=True,
+            )
+            st.markdown("#### Webhook / API Integration")
+            st.markdown("""
+- **REST API** for programmatic analysis from any platform
+- Real-time **webhook alerts** on high-risk tests
+- Direct integration with **SOP and proctoring platforms**
+- **Automate fraud screening** in existing test workflows
+            """)
+            st.markdown(
+                '<span style="color:#6b7280;font-size:0.8rem">'
+                'Impact: Connect the analyzer to your testing platform so every audio submission '
+                'is automatically screened without manual uploads.</span>',
+                unsafe_allow_html=True,
+            )
 def render_database_tab():
     """Render the voiceprint database tab."""
         )
     # Tabs
+    tab1, tab2, tab3, tab4, tab5, tab6, tab7 = st.tabs(["Analyzer", "Compare", "Trends", "Database", "Logs", "Features", "About"])
     with tab1:
         render_analyzer_tab()
         render_compare_tab()
     with tab3:
+        render_trends_tab()
     with tab4:
+        render_database_tab()
     with tab5:
+        render_logs_tab()
     with tab6:
+        render_features_tab()
+    with tab7:
         render_about_tab()

src/database/models.py CHANGED Viewed

@@ -387,3 +387,104 @@ class Database:
             ]
         finally:
             session.close()

             ]
         finally:
             session.close()
+    def get_trend_data(self):
+        """Get aggregated trend data for charts."""
+        import json as _json
+        from collections import defaultdict
+        session = self.get_session()
+        try:
+            all_tests = session.query(TestAnalysis).order_by(
+                TestAnalysis.analyzed_at
+            ).all()
+            daily_scores = defaultdict(lambda: {'scores': [], 'count': 0, 'high_risk': 0})
+            daily_flags = defaultdict(lambda: {
+                'synthetic': 0, 'playback': 0, 'reading': 0,
+                'whispers': 0, 'pauses': 0, 'wake_words': 0
+            })
+            total_risk = 0.0
+            high_risk_count = 0
+            for t in all_tests:
+                day = t.analyzed_at.strftime('%Y-%m-%d') if t.analyzed_at else 'unknown'
+                risk = 0
+                if t.results_json:
+                    try:
+                        r = _json.loads(t.results_json)
+                        risk = r.get('risk_score', 0)
+                        daily_scores[day]['scores'].append(risk)
+                        daily_scores[day]['count'] += 1
+                        if risk > 60:
+                            daily_scores[day]['high_risk'] += 1
+                            high_risk_count += 1
+                        total_risk += risk
+                        if r.get('main_speaker', {}).get('is_synthetic', False):
+                            daily_flags[day]['synthetic'] += 1
+                        if r.get('playback_detected', False):
+                            daily_flags[day]['playback'] += 1
+                        if r.get('reading_pattern_detected', False):
+                            daily_flags[day]['reading'] += 1
+                        if r.get('whisper_detected', False):
+                            daily_flags[day]['whispers'] += 1
+                        if r.get('suspicious_pauses_detected', False):
+                            daily_flags[day]['pauses'] += 1
+                        if len(r.get('wake_words', [])) > 0:
+                            daily_flags[day]['wake_words'] += 1
+                    except Exception:
+                        pass
+            total = len(all_tests)
+            avg_risk = (total_risk / total) if total > 0 else 0
+            high_risk_pct = (high_risk_count / total * 100) if total > 0 else 0
+            scores_list = []
+            for day in sorted(daily_scores.keys()):
+                d = daily_scores[day]
+                scores_list.append({
+                    'date': day,
+                    'avg_score': round(sum(d['scores']) / len(d['scores']), 1),
+                    'count': d['count'],
+                    'high_risk': d['high_risk'],
+                })
+            flags_list = []
+            for day in sorted(daily_flags.keys()):
+                entry = {'date': day}
+                entry.update(daily_flags[day])
+                flags_list.append(entry)
+            # Top voices
+            all_vps = session.query(Voiceprint).order_by(
+                Voiceprint.times_seen.desc()
+            ).limit(10).all()
+            top_voices = [
+                {
+                    'id': vp.id,
+                    'label': vp.label or vp.id,
+                    'times_seen': vp.times_seen,
+                    'flagged': vp.is_flagged,
+                }
+                for vp in all_vps
+            ]
+            recurring = session.query(Voiceprint).filter(
+                Voiceprint.times_seen >= 2
+            ).count()
+            return {
+                'daily_scores': scores_list,
+                'daily_flags': flags_list,
+                'top_voices': top_voices,
+                'summary': {
+                    'total': total,
+                    'avg_risk': round(avg_risk, 1),
+                    'high_risk_pct': round(high_risk_pct, 1),
+                    'recurring': recurring,
+                },
+            }
+        finally:
+            session.close()