| """ |
| Advanced Visualization Components for Multilingual Audio Intelligence System |
| |
| This module provides sophisticated visualization components for creating |
| interactive audio analysis interfaces. Features include waveform visualization, |
| speaker timelines, and processing feedback displays. |
| |
| Key Features: |
| - Interactive waveform with speaker segment overlays |
| - Speaker activity timeline visualization |
| - Processing progress indicators |
| - Exportable visualizations |
| |
| Dependencies: plotly, matplotlib, numpy |
| """ |
|
|
| import numpy as np |
| import logging |
| from typing import List, Dict, Optional, Tuple, Any |
| import base64 |
| import io |
| from datetime import datetime |
| import json |
|
|
| |
| try: |
| import plotly.graph_objects as go |
| import plotly.express as px |
| from plotly.subplots import make_subplots |
| PLOTLY_AVAILABLE = True |
| except ImportError: |
| PLOTLY_AVAILABLE = False |
| logging.warning("Plotly not available. Some visualizations will be limited.") |
|
|
| try: |
| import matplotlib.pyplot as plt |
| import matplotlib.patches as patches |
| MATPLOTLIB_AVAILABLE = True |
| except ImportError: |
| MATPLOTLIB_AVAILABLE = False |
| logging.warning("Matplotlib not available. Fallback visualizations will be used.") |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| class WaveformVisualizer: |
| """Advanced waveform visualization with speaker overlays.""" |
| |
| def __init__(self, width: int = 1000, height: int = 300): |
| self.width = width |
| self.height = height |
| self.colors = [ |
| '#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7', |
| '#DDA0DD', '#98D8C8', '#F7DC6F', '#BB8FCE', '#85C1E9' |
| ] |
| |
| def create_interactive_waveform(self, |
| audio_data: np.ndarray, |
| sample_rate: int, |
| speaker_segments: List[Dict], |
| transcription_segments: List[Dict] = None): |
| """ |
| Create interactive waveform visualization with speaker overlays. |
| |
| Args: |
| audio_data: Audio waveform data |
| sample_rate: Audio sample rate |
| speaker_segments: List of speaker segment dicts |
| transcription_segments: Optional transcription data |
| |
| Returns: |
| plotly.graph_objects.Figure: Plotly figure object |
| """ |
| if not PLOTLY_AVAILABLE: |
| return self._create_fallback_visualization(audio_data, sample_rate, speaker_segments) |
| |
| try: |
| |
| time_axis = np.linspace(0, len(audio_data) / sample_rate, len(audio_data)) |
| |
| |
| if len(audio_data) > 50000: |
| step = len(audio_data) // 50000 |
| audio_data = audio_data[::step] |
| time_axis = time_axis[::step] |
| |
| |
| fig = make_subplots( |
| rows=2, cols=1, |
| row_heights=[0.7, 0.3], |
| subplot_titles=("Audio Waveform with Speaker Segments", "Speaker Timeline"), |
| vertical_spacing=0.1 |
| ) |
| |
| |
| fig.add_trace( |
| go.Scatter( |
| x=time_axis, |
| y=audio_data, |
| mode='lines', |
| name='Waveform', |
| line=dict(color='#2C3E50', width=1), |
| hovertemplate='Time: %{x:.2f}s<br>Amplitude: %{y:.3f}<extra></extra>' |
| ), |
| row=1, col=1 |
| ) |
| |
| |
| speaker_colors = {} |
| for i, segment in enumerate(speaker_segments): |
| speaker_id = segment.get('speaker_id', f'Speaker_{i}') |
| |
| if speaker_id not in speaker_colors: |
| speaker_colors[speaker_id] = self.colors[len(speaker_colors) % len(self.colors)] |
| |
| |
| fig.add_vrect( |
| x0=segment['start_time'], |
| x1=segment['end_time'], |
| fillcolor=speaker_colors[speaker_id], |
| opacity=0.3, |
| layer="below", |
| line_width=0, |
| row=1, col=1 |
| ) |
| |
| |
| mid_time = (segment['start_time'] + segment['end_time']) / 2 |
| if len(audio_data) > 0: |
| fig.add_annotation( |
| x=mid_time, |
| y=max(audio_data) * 0.8, |
| text=speaker_id.replace('SPEAKER_', 'S'), |
| showarrow=False, |
| font=dict(color=speaker_colors[speaker_id], size=10, family="Arial Black"), |
| row=1, col=1 |
| ) |
| |
| |
| for i, (speaker_id, color) in enumerate(speaker_colors.items()): |
| speaker_segments_filtered = [s for s in speaker_segments if s['speaker_id'] == speaker_id] |
| |
| for segment in speaker_segments_filtered: |
| fig.add_trace( |
| go.Scatter( |
| x=[segment['start_time'], segment['end_time']], |
| y=[i, i], |
| mode='lines', |
| name=speaker_id, |
| line=dict(color=color, width=8), |
| showlegend=(segment == speaker_segments_filtered[0]), |
| hovertemplate=f'{speaker_id}<br>%{{x:.2f}}s<extra></extra>' |
| ), |
| row=2, col=1 |
| ) |
| |
| |
| fig.update_layout( |
| title=dict( |
| text="π΅ Multilingual Audio Intelligence Visualization", |
| font=dict(size=20, family="Arial Black"), |
| x=0.5 |
| ), |
| height=600, |
| hovermode='x unified', |
| showlegend=True, |
| legend=dict( |
| orientation="h", |
| yanchor="bottom", |
| y=1.02, |
| xanchor="right", |
| x=1 |
| ), |
| plot_bgcolor='white', |
| paper_bgcolor='#F8F9FA' |
| ) |
| |
| fig.update_xaxes(title_text="Time (seconds)", row=2, col=1) |
| fig.update_yaxes(title_text="Amplitude", row=1, col=1) |
| if speaker_colors: |
| fig.update_yaxes(title_text="Speaker", row=2, col=1, |
| ticktext=list(speaker_colors.keys()), |
| tickvals=list(range(len(speaker_colors)))) |
| |
| return fig |
| |
| except Exception as e: |
| logger.error(f"Error creating waveform visualization: {e}") |
| return self._create_fallback_visualization(audio_data, sample_rate, speaker_segments) |
| |
| def _create_fallback_visualization(self, audio_data, sample_rate, speaker_segments): |
| """Create a simple fallback visualization when Plotly is not available.""" |
| if PLOTLY_AVAILABLE: |
| fig = go.Figure() |
| fig.add_annotation( |
| text="Waveform visualization temporarily unavailable", |
| x=0.5, y=0.5, showarrow=False, |
| font=dict(size=16, color="gray") |
| ) |
| fig.update_layout( |
| title="Audio Waveform Visualization", |
| xaxis_title="Time (seconds)", |
| yaxis_title="Amplitude" |
| ) |
| return fig |
| else: |
| |
| return None |
| |
| def create_language_distribution_chart(self, segments: List[Dict]): |
| """Create language distribution visualization.""" |
| if not PLOTLY_AVAILABLE: |
| return None |
| |
| try: |
| |
| language_counts = {} |
| language_durations = {} |
| |
| for segment in segments: |
| lang = segment.get('original_language', 'unknown') |
| duration = segment.get('end_time', 0) - segment.get('start_time', 0) |
| |
| language_counts[lang] = language_counts.get(lang, 0) + 1 |
| language_durations[lang] = language_durations.get(lang, 0) + duration |
| |
| |
| fig = make_subplots( |
| rows=1, cols=2, |
| subplot_titles=('Language Distribution by Segments', 'Language Distribution by Duration'), |
| specs=[[{'type': 'domain'}, {'type': 'domain'}]] |
| ) |
| |
| |
| fig.add_trace( |
| go.Pie( |
| labels=list(language_counts.keys()), |
| values=list(language_counts.values()), |
| name="Segments", |
| hovertemplate='%{label}<br>%{value} segments<br>%{percent}<extra></extra>' |
| ), |
| row=1, col=1 |
| ) |
| |
| |
| fig.add_trace( |
| go.Pie( |
| labels=list(language_durations.keys()), |
| values=list(language_durations.values()), |
| name="Duration", |
| hovertemplate='%{label}<br>%{value:.1f}s<br>%{percent}<extra></extra>' |
| ), |
| row=1, col=2 |
| ) |
| |
| fig.update_layout( |
| title_text="π Language Analysis", |
| height=400, |
| showlegend=True |
| ) |
| |
| return fig |
| |
| except Exception as e: |
| logger.error(f"Error creating language distribution chart: {e}") |
| return None |
|
|
|
|
| class SubtitleRenderer: |
| """Advanced subtitle rendering with synchronization.""" |
| |
| def __init__(self): |
| self.subtitle_style = """ |
| <style> |
| .subtitle-container { |
| max-height: 400px; |
| overflow-y: auto; |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
| border-radius: 15px; |
| padding: 20px; |
| box-shadow: 0 10px 30px rgba(0,0,0,0.2); |
| margin: 10px 0; |
| } |
| .subtitle-segment { |
| background: rgba(255,255,255,0.95); |
| margin: 10px 0; |
| padding: 15px; |
| border-radius: 10px; |
| border-left: 4px solid #4ECDC4; |
| box-shadow: 0 2px 10px rgba(0,0,0,0.1); |
| transition: all 0.3s ease; |
| } |
| .subtitle-segment:hover { |
| transform: translateY(-2px); |
| box-shadow: 0 5px 20px rgba(0,0,0,0.15); |
| } |
| .subtitle-header { |
| display: flex; |
| justify-content: space-between; |
| align-items: center; |
| margin-bottom: 10px; |
| font-weight: bold; |
| } |
| .speaker-label { |
| background: linear-gradient(45deg, #FF6B6B, #4ECDC4); |
| color: white; |
| padding: 5px 12px; |
| border-radius: 20px; |
| font-size: 12px; |
| font-weight: bold; |
| } |
| .timestamp { |
| color: #666; |
| font-size: 12px; |
| font-family: 'Courier New', monospace; |
| } |
| .language-tag { |
| background: #45B7D1; |
| color: white; |
| padding: 2px 8px; |
| border-radius: 10px; |
| font-size: 10px; |
| margin-left: 5px; |
| } |
| .original-text { |
| margin: 8px 0; |
| font-size: 16px; |
| color: #2C3E50; |
| line-height: 1.4; |
| } |
| .translated-text { |
| margin: 8px 0; |
| font-size: 14px; |
| color: #7F8C8D; |
| font-style: italic; |
| line-height: 1.4; |
| border-top: 1px solid #ECF0F1; |
| padding-top: 8px; |
| } |
| .confidence-bar { |
| width: 100%; |
| height: 4px; |
| background: #ECF0F1; |
| border-radius: 2px; |
| overflow: hidden; |
| margin-top: 5px; |
| } |
| .confidence-fill { |
| height: 100%; |
| background: linear-gradient(90deg, #FF6B6B, #4ECDC4, #45B7D1); |
| transition: width 0.3s ease; |
| } |
| </style> |
| """ |
| |
| def render_subtitles(self, segments: List[Dict], show_translations: bool = True) -> str: |
| """ |
| Render beautiful HTML subtitles with speaker attribution. |
| |
| Args: |
| segments: List of processed segments |
| show_translations: Whether to show translations |
| |
| Returns: |
| str: HTML formatted subtitles |
| """ |
| try: |
| html_parts = [self.subtitle_style] |
| html_parts.append('<div class="subtitle-container">') |
| |
| for i, segment in enumerate(segments): |
| speaker_id = segment.get('speaker_id', f'Speaker_{i}') |
| start_time = segment.get('start_time', 0) |
| end_time = segment.get('end_time', 0) |
| original_text = segment.get('original_text', '') |
| translated_text = segment.get('translated_text', '') |
| original_language = segment.get('original_language', 'unknown') |
| confidence = segment.get('confidence_transcription', 0.0) |
| |
| |
| start_str = self._format_timestamp(start_time) |
| end_str = self._format_timestamp(end_time) |
| |
| html_parts.append('<div class="subtitle-segment">') |
| |
| |
| html_parts.append('<div class="subtitle-header">') |
| html_parts.append(f'<span class="speaker-label">{speaker_id.replace("SPEAKER_", "Speaker ")}</span>') |
| html_parts.append(f'<span class="timestamp">{start_str} - {end_str}</span>') |
| html_parts.append('</div>') |
| |
| |
| if original_text: |
| html_parts.append('<div class="original-text">') |
| html_parts.append(f'π£οΈ {original_text}') |
| html_parts.append(f'<span class="language-tag">{original_language.upper()}</span>') |
| html_parts.append('</div>') |
| |
| |
| if show_translations and translated_text and translated_text != original_text: |
| html_parts.append('<div class="translated-text">') |
| html_parts.append(f'π {translated_text}') |
| html_parts.append('</div>') |
| |
| |
| confidence_percent = confidence * 100 |
| html_parts.append('<div class="confidence-bar">') |
| html_parts.append(f'<div class="confidence-fill" style="width: {confidence_percent}%"></div>') |
| html_parts.append('</div>') |
| |
| html_parts.append('</div>') |
| |
| html_parts.append('</div>') |
| return ''.join(html_parts) |
| |
| except Exception as e: |
| logger.error(f"Error rendering subtitles: {e}") |
| return f'<div style="color: red; padding: 20px;">Error rendering subtitles: {str(e)}</div>' |
| |
| def _format_timestamp(self, seconds: float) -> str: |
| """Format timestamp in MM:SS format.""" |
| try: |
| minutes = int(seconds // 60) |
| secs = seconds % 60 |
| return f"{minutes:02d}:{secs:05.2f}" |
| except: |
| return "00:00.00" |
|
|
|
|
| class PerformanceMonitor: |
| """Real-time performance monitoring component.""" |
| |
| def create_performance_dashboard(self, processing_stats: Dict) -> str: |
| """Create performance monitoring dashboard.""" |
| try: |
| component_times = processing_stats.get('component_times', {}) |
| total_time = processing_stats.get('total_time', 0) |
| |
| if PLOTLY_AVAILABLE and component_times: |
| |
| components = list(component_times.keys()) |
| times = list(component_times.values()) |
| |
| fig = go.Figure(data=[ |
| go.Bar( |
| x=components, |
| y=times, |
| marker_color=['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7'][:len(components)], |
| text=[f'{t:.2f}s' for t in times], |
| textposition='auto', |
| ) |
| ]) |
| |
| fig.update_layout( |
| title='β‘ Processing Performance Breakdown', |
| xaxis_title='Pipeline Components', |
| yaxis_title='Processing Time (seconds)', |
| height=400, |
| plot_bgcolor='white', |
| paper_bgcolor='#F8F9FA' |
| ) |
| |
| |
| plot_html = fig.to_html(include_plotlyjs='cdn', div_id='performance-chart') |
| else: |
| plot_html = '<div style="text-align: center; padding: 40px;">Performance chart temporarily unavailable</div>' |
| |
| |
| stats_html = f""" |
| <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
| color: white; padding: 20px; border-radius: 15px; margin: 10px 0; |
| box-shadow: 0 10px 30px rgba(0,0,0,0.2);"> |
| <h3 style="margin: 0 0 15px 0;">π Processing Summary</h3> |
| <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px;"> |
| <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 10px;"> |
| <div style="font-size: 24px; font-weight: bold;">{total_time:.2f}s</div> |
| <div style="opacity: 0.8;">Total Processing Time</div> |
| </div> |
| <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 10px;"> |
| <div style="font-size: 24px; font-weight: bold;">{processing_stats.get('num_speakers', 0)}</div> |
| <div style="opacity: 0.8;">Speakers Detected</div> |
| </div> |
| <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 10px;"> |
| <div style="font-size: 24px; font-weight: bold;">{processing_stats.get('num_segments', 0)}</div> |
| <div style="opacity: 0.8;">Speech Segments</div> |
| </div> |
| <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 10px;"> |
| <div style="font-size: 24px; font-weight: bold;">{len(processing_stats.get('languages_detected', []))}</div> |
| <div style="opacity: 0.8;">Languages Found</div> |
| </div> |
| </div> |
| </div> |
| """ |
| |
| return stats_html + plot_html |
| |
| except Exception as e: |
| logger.error(f"Error creating performance dashboard: {e}") |
| return f'<div style="color: red;">Performance Dashboard Error: {str(e)}</div>' |
|
|
|
|
| class FileDownloader: |
| """Enhanced file download component with preview.""" |
| |
| def create_download_section(self, outputs: Dict[str, str], filename_base: str) -> str: |
| """Create download section with file previews.""" |
| download_html = """ |
| <div style="margin-top: 20px;"> |
| <h3 style="margin-bottom: 10px;">π₯ Download Results</h3> |
| <div style="display: flex; flex-direction: column; gap: 10px;"> |
| """ |
| |
| |
| for format_name, content in outputs.items(): |
| if format_name in ['json', 'srt_original', 'srt_translated', 'text', 'csv', 'summary']: |
| download_html += f""" |
| <div style="background: #f0f0f0; padding: 15px; border-radius: 10px; border: 1px solid #ccc;"> |
| <h4 style="margin: 0 0 5px 0;">{format_name.upper()} Preview</h4> |
| <pre style="font-size: 14px; white-space: pre-wrap; word-wrap: break-word; background: #fff; padding: 10px; border-radius: 5px; border: 1px solid #eee; overflow-x: auto;"> |
| {content[:500]}... |
| </pre> |
| <a href="data:text/{self._get_file_extension(format_name)};base64,{base64.b64encode(content.encode()).decode()}" |
| download="{filename_base}.{self._get_file_extension(format_name)}" |
| style="background: linear-gradient(45deg, #FF6B6B, #4ECDC4); color: white; padding: 10px 20px; border-radius: 8px; text-decoration: none; display: inline-block; margin-top: 10px;"> |
| Download {format_name.upper()} |
| </a> |
| </div> |
| """ |
| |
| download_html += """ |
| </div> |
| </div> |
| """ |
| return download_html |
| |
| def _get_file_extension(self, format_name: str) -> str: |
| """Get appropriate file extension for format.""" |
| extensions = { |
| 'json': 'json', |
| 'srt_original': 'srt', |
| 'srt_translated': 'en.srt', |
| 'text': 'txt', |
| 'csv': 'csv', |
| 'summary': 'summary.txt' |
| } |
| return extensions.get(format_name, 'txt') |
|
|
|
|
| def create_custom_css() -> str: |
| """Create custom CSS for the entire application.""" |
| return """ |
| /* Global Styles */ |
| .gradio-container { |
| font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; |
| background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); |
| min-height: 100vh; |
| } |
| |
| /* Header Styles */ |
| .main-header { |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
| color: white; |
| text-align: center; |
| padding: 30px; |
| border-radius: 0 0 20px 20px; |
| margin-bottom: 20px; |
| box-shadow: 0 5px 15px rgba(0,0,0,0.1); |
| } |
| |
| .main-title { |
| font-size: 2.5em; |
| font-weight: bold; |
| margin: 0; |
| text-shadow: 2px 2px 4px rgba(0,0,0,0.3); |
| } |
| |
| .main-subtitle { |
| font-size: 1.2em; |
| opacity: 0.9; |
| margin-top: 10px; |
| } |
| |
| /* Upload Area */ |
| .upload-area { |
| border: 3px dashed #4ECDC4; |
| border-radius: 15px; |
| padding: 40px; |
| text-align: center; |
| background: rgba(78, 205, 196, 0.1); |
| transition: all 0.3s ease; |
| } |
| |
| .upload-area:hover { |
| border-color: #45B7D1; |
| background: rgba(69, 183, 209, 0.15); |
| transform: translateY(-2px); |
| } |
| |
| /* Button Styles */ |
| .primary-button { |
| background: linear-gradient(45deg, #FF6B6B, #4ECDC4); |
| border: none; |
| color: white; |
| padding: 15px 30px; |
| border-radius: 25px; |
| font-weight: bold; |
| transition: all 0.3s ease; |
| box-shadow: 0 4px 15px rgba(0,0,0,0.2); |
| } |
| |
| .primary-button:hover { |
| transform: translateY(-3px); |
| box-shadow: 0 6px 20px rgba(0,0,0,0.3); |
| } |
| |
| /* Card Styles */ |
| .info-card { |
| background: white; |
| border-radius: 15px; |
| padding: 20px; |
| margin: 10px; |
| box-shadow: 0 5px 15px rgba(0,0,0,0.1); |
| transition: all 0.3s ease; |
| } |
| |
| .info-card:hover { |
| transform: translateY(-3px); |
| box-shadow: 0 8px 25px rgba(0,0,0,0.15); |
| } |
| |
| /* Progress Animations */ |
| @keyframes pulse { |
| 0% { opacity: 1; } |
| 50% { opacity: 0.5; } |
| 100% { opacity: 1; } |
| } |
| |
| .processing { |
| animation: pulse 1.5s infinite; |
| } |
| |
| /* Responsive Design */ |
| @media (max-width: 768px) { |
| .main-title { |
| font-size: 2em; |
| } |
| .main-subtitle { |
| font-size: 1em; |
| } |
| } |
| """ |
|
|
|
|
| def create_loading_animation() -> str: |
| """Create loading animation HTML.""" |
| return """ |
| <div style="text-align: center; padding: 40px;"> |
| <div style="display: inline-block; width: 50px; height: 50px; border: 3px solid #f3f3f3; |
| border-top: 3px solid #4ECDC4; border-radius: 50%; animation: spin 1s linear infinite;"></div> |
| <div style="margin-top: 20px; font-size: 18px; color: #666;"> |
| π΅ Processing your audio with AI magic... |
| </div> |
| <div style="margin-top: 10px; font-size: 14px; color: #999;"> |
| This may take a few moments depending on audio length |
| </div> |
| </div> |
| <style> |
| @keyframes spin { |
| 0% { transform: rotate(0deg); } |
| 100% { transform: rotate(360deg); } |
| } |
| </style> |
| """ |
|
|
|
|
| |
| __all__ = [ |
| 'WaveformVisualizer', |
| 'SubtitleRenderer', |
| 'PerformanceMonitor', |
| 'FileDownloader', |
| 'create_custom_css', |
| 'create_loading_animation' |
| ] |