Spaces:
Sleeping
Sleeping
| import matplotlib.pyplot as plt | |
| import matplotlib.patches as mpatches | |
| import pandas as pd | |
| import numpy as np | |
| from collections import Counter | |
| from typing import List, Dict | |
| import re | |
| def generate_dashboard(data): | |
| """Legacy function - kept for backwards compatibility""" | |
| return generate_comprehensive_dashboard(data, "Other") | |
| def extract_items_from_field(data: List[Dict], field_name: str) -> List[str]: | |
| """Extract and split items from semicolon-separated field""" | |
| items = [] | |
| for row in data: | |
| value = row.get(field_name, "") | |
| if value and isinstance(value, str): | |
| # Split by semicolon and clean | |
| parts = [p.strip() for p in value.split(';') if p.strip()] | |
| items.extend(parts) | |
| return items | |
| def generate_comprehensive_dashboard( | |
| data: List[Dict], | |
| interviewee_type: str | |
| ) -> plt.Figure: | |
| """ | |
| Generate comprehensive dashboard with multiple visualizations | |
| """ | |
| if not data or len(data) == 0: | |
| # Return empty figure with message | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| ax.text(0.5, 0.5, 'No data available for visualization', | |
| ha='center', va='center', fontsize=14) | |
| ax.axis('off') | |
| return fig | |
| df = pd.DataFrame(data) | |
| # Determine number of subplots based on interviewee type | |
| if interviewee_type == "HCP": | |
| fig = create_hcp_dashboard(df) | |
| elif interviewee_type == "Patient": | |
| fig = create_patient_dashboard(df) | |
| else: | |
| fig = create_general_dashboard(df) | |
| plt.tight_layout() | |
| return fig | |
| def create_hcp_dashboard(df: pd.DataFrame) -> plt.Figure: | |
| """Create dashboard for HCP interviews""" | |
| fig, axes = plt.subplots(2, 2, figsize=(14, 10)) | |
| fig.suptitle('Healthcare Professional Interview Analysis', fontsize=16, fontweight='bold') | |
| # 1. Quality Score Distribution | |
| ax1 = axes[0, 0] | |
| if 'Quality Score' in df.columns: | |
| quality_scores = pd.to_numeric(df['Quality Score'], errors='coerce').dropna() | |
| if len(quality_scores) > 0: | |
| ax1.hist(quality_scores, bins=10, color='#3498db', edgecolor='black', alpha=0.7) | |
| ax1.axvline(quality_scores.mean(), color='red', linestyle='--', | |
| label=f'Mean: {quality_scores.mean():.2f}') | |
| ax1.set_xlabel('Quality Score') | |
| ax1.set_ylabel('Frequency') | |
| ax1.set_title('Transcript Quality Distribution') | |
| ax1.legend() | |
| ax1.grid(axis='y', alpha=0.3) | |
| # 2. Top Diagnoses | |
| ax2 = axes[0, 1] | |
| if 'Diagnoses' in df.columns: | |
| diagnoses = extract_items_from_field(df.to_dict('records'), 'Diagnoses') | |
| if diagnoses: | |
| diagnosis_counts = Counter(diagnoses) | |
| top_diagnoses = dict(diagnosis_counts.most_common(8)) | |
| if top_diagnoses: | |
| labels = list(top_diagnoses.keys()) | |
| # Truncate long labels | |
| labels = [label[:30] + '...' if len(label) > 30 else label for label in labels] | |
| values = list(top_diagnoses.values()) | |
| bars = ax2.barh(labels, values, color='#2ecc71', edgecolor='black') | |
| ax2.set_xlabel('Frequency') | |
| ax2.set_title('Most Common Diagnoses') | |
| ax2.invert_yaxis() | |
| # Add value labels | |
| for i, bar in enumerate(bars): | |
| width = bar.get_width() | |
| ax2.text(width, bar.get_y() + bar.get_height()/2, | |
| f' {int(width)}', ha='left', va='center', fontsize=9) | |
| # 3. Prescription Analysis | |
| ax3 = axes[1, 0] | |
| if 'Prescriptions' in df.columns: | |
| prescriptions = extract_items_from_field(df.to_dict('records'), 'Prescriptions') | |
| if prescriptions: | |
| rx_counts = Counter(prescriptions) | |
| top_rx = dict(rx_counts.most_common(8)) | |
| if top_rx: | |
| labels = list(top_rx.keys()) | |
| labels = [label[:30] + '...' if len(label) > 30 else label for label in labels] | |
| values = list(top_rx.values()) | |
| bars = ax3.barh(labels, values, color='#e74c3c', edgecolor='black') | |
| ax3.set_xlabel('Frequency') | |
| ax3.set_title('Most Mentioned Prescriptions') | |
| ax3.invert_yaxis() | |
| for i, bar in enumerate(bars): | |
| width = bar.get_width() | |
| ax3.text(width, bar.get_y() + bar.get_height()/2, | |
| f' {int(width)}', ha='left', va='center', fontsize=9) | |
| # 4. Word Count by Transcript | |
| ax4 = axes[1, 1] | |
| if 'Word Count' in df.columns and 'Transcript ID' in df.columns: | |
| word_counts = pd.to_numeric(df['Word Count'], errors='coerce').dropna() | |
| transcript_ids = df['Transcript ID'][:len(word_counts)] | |
| if len(word_counts) > 0: | |
| bars = ax4.bar(range(len(word_counts)), word_counts, color='#9b59b6', | |
| edgecolor='black', alpha=0.7) | |
| ax4.set_xlabel('Transcript') | |
| ax4.set_ylabel('Word Count') | |
| ax4.set_title('Interview Length by Transcript') | |
| ax4.set_xticks(range(len(word_counts))) | |
| ax4.set_xticklabels(transcript_ids, rotation=45, ha='right') | |
| ax4.grid(axis='y', alpha=0.3) | |
| # Add mean line | |
| ax4.axhline(word_counts.mean(), color='red', linestyle='--', | |
| label=f'Average: {int(word_counts.mean())}') | |
| ax4.legend() | |
| return fig | |
| def create_patient_dashboard(df: pd.DataFrame) -> plt.Figure: | |
| """Create dashboard for Patient interviews""" | |
| fig, axes = plt.subplots(2, 2, figsize=(14, 10)) | |
| fig.suptitle('Patient Interview Analysis', fontsize=16, fontweight='bold') | |
| # 1. Quality Score Distribution | |
| ax1 = axes[0, 0] | |
| if 'Quality Score' in df.columns: | |
| quality_scores = pd.to_numeric(df['Quality Score'], errors='coerce').dropna() | |
| if len(quality_scores) > 0: | |
| ax1.hist(quality_scores, bins=10, color='#3498db', edgecolor='black', alpha=0.7) | |
| ax1.axvline(quality_scores.mean(), color='red', linestyle='--', | |
| label=f'Mean: {quality_scores.mean():.2f}') | |
| ax1.set_xlabel('Quality Score') | |
| ax1.set_ylabel('Frequency') | |
| ax1.set_title('Transcript Quality Distribution') | |
| ax1.legend() | |
| ax1.grid(axis='y', alpha=0.3) | |
| # 2. Top Symptoms | |
| ax2 = axes[0, 1] | |
| if 'Primary Symptoms' in df.columns: | |
| symptoms = extract_items_from_field(df.to_dict('records'), 'Primary Symptoms') | |
| if symptoms: | |
| symptom_counts = Counter(symptoms) | |
| top_symptoms = dict(symptom_counts.most_common(8)) | |
| if top_symptoms: | |
| labels = list(top_symptoms.keys()) | |
| labels = [label[:30] + '...' if len(label) > 30 else label for label in labels] | |
| values = list(top_symptoms.values()) | |
| bars = ax2.barh(labels, values, color='#e67e22', edgecolor='black') | |
| ax2.set_xlabel('Frequency') | |
| ax2.set_title('Most Common Symptoms') | |
| ax2.invert_yaxis() | |
| for i, bar in enumerate(bars): | |
| width = bar.get_width() | |
| ax2.text(width, bar.get_y() + bar.get_height()/2, | |
| f' {int(width)}', ha='left', va='center', fontsize=9) | |
| # 3. Patient Concerns | |
| ax3 = axes[1, 0] | |
| if 'Main Concerns' in df.columns: | |
| concerns = extract_items_from_field(df.to_dict('records'), 'Main Concerns') | |
| if concerns: | |
| concern_counts = Counter(concerns) | |
| top_concerns = dict(concern_counts.most_common(6)) | |
| if top_concerns: | |
| # Create word cloud style pie chart | |
| labels = list(top_concerns.keys()) | |
| labels = [label[:25] + '...' if len(label) > 25 else label for label in labels] | |
| sizes = list(top_concerns.values()) | |
| colors_list = ['#ff6b6b', '#4ecdc4', '#45b7d1', '#f9ca24', '#6c5ce7', '#a29bfe'] | |
| ax3.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, | |
| colors=colors_list[:len(sizes)]) | |
| ax3.set_title('Distribution of Patient Concerns') | |
| # 4. Side Effects | |
| ax4 = axes[1, 1] | |
| if 'Side Effects' in df.columns: | |
| side_effects = extract_items_from_field(df.to_dict('records'), 'Side Effects') | |
| if side_effects: | |
| se_counts = Counter(side_effects) | |
| top_se = dict(se_counts.most_common(6)) | |
| if top_se: | |
| labels = list(top_se.keys()) | |
| labels = [label[:30] + '...' if len(label) > 30 else label for label in labels] | |
| values = list(top_se.values()) | |
| bars = ax4.barh(labels, values, color='#e74c3c', edgecolor='black') | |
| ax4.set_xlabel('Frequency') | |
| ax4.set_title('Reported Side Effects') | |
| ax4.invert_yaxis() | |
| for i, bar in enumerate(bars): | |
| width = bar.get_width() | |
| ax4.text(width, bar.get_y() + bar.get_height()/2, | |
| f' {int(width)}', ha='left', va='center', fontsize=9) | |
| else: | |
| ax4.text(0.5, 0.5, 'No side effects reported', | |
| ha='center', va='center', transform=ax4.transAxes, fontsize=12) | |
| ax4.axis('off') | |
| return fig | |
| def create_general_dashboard(df: pd.DataFrame) -> plt.Figure: | |
| """Create general dashboard""" | |
| fig, axes = plt.subplots(2, 2, figsize=(14, 10)) | |
| fig.suptitle('General Interview Analysis', fontsize=16, fontweight='bold') | |
| # 1. Quality Score Distribution | |
| ax1 = axes[0, 0] | |
| if 'Quality Score' in df.columns: | |
| quality_scores = pd.to_numeric(df['Quality Score'], errors='coerce').dropna() | |
| if len(quality_scores) > 0: | |
| ax1.hist(quality_scores, bins=10, color='#3498db', edgecolor='black', alpha=0.7) | |
| ax1.axvline(quality_scores.mean(), color='red', linestyle='--', | |
| label=f'Mean: {quality_scores.mean():.2f}') | |
| ax1.set_xlabel('Quality Score') | |
| ax1.set_ylabel('Frequency') | |
| ax1.set_title('Transcript Quality Distribution') | |
| ax1.legend() | |
| ax1.grid(axis='y', alpha=0.3) | |
| # 2. Word Count Distribution | |
| ax2 = axes[0, 1] | |
| if 'Word Count' in df.columns: | |
| word_counts = pd.to_numeric(df['Word Count'], errors='coerce').dropna() | |
| if len(word_counts) > 0: | |
| ax2.hist(word_counts, bins=15, color='#2ecc71', edgecolor='black', alpha=0.7) | |
| ax2.set_xlabel('Word Count') | |
| ax2.set_ylabel('Frequency') | |
| ax2.set_title('Interview Length Distribution') | |
| ax2.grid(axis='y', alpha=0.3) | |
| # 3. Processing Summary | |
| ax3 = axes[1, 0] | |
| if 'Quality Score' in df.columns: | |
| quality_scores = pd.to_numeric(df['Quality Score'], errors='coerce').dropna() | |
| categories = ['Excellent\n(>0.8)', 'Good\n(0.6-0.8)', 'Fair\n(0.4-0.6)', 'Poor\n(<0.4)'] | |
| counts = [ | |
| sum(quality_scores > 0.8), | |
| sum((quality_scores >= 0.6) & (quality_scores <= 0.8)), | |
| sum((quality_scores >= 0.4) & (quality_scores < 0.6)), | |
| sum(quality_scores < 0.4) | |
| ] | |
| colors_list = ['#2ecc71', '#f39c12', '#e67e22', '#e74c3c'] | |
| bars = ax3.bar(categories, counts, color=colors_list, edgecolor='black', alpha=0.7) | |
| ax3.set_ylabel('Number of Transcripts') | |
| ax3.set_title('Quality Score Categories') | |
| ax3.grid(axis='y', alpha=0.3) | |
| # Add value labels | |
| for bar in bars: | |
| height = bar.get_height() | |
| if height > 0: | |
| ax3.text(bar.get_x() + bar.get_width()/2., height, | |
| f'{int(height)}', ha='center', va='bottom', fontsize=10) | |
| # 4. Summary Statistics Table | |
| ax4 = axes[1, 1] | |
| ax4.axis('off') | |
| stats_data = [] | |
| if 'Transcript ID' in df.columns: | |
| stats_data.append(['Total Transcripts', str(len(df))]) | |
| if 'Quality Score' in df.columns: | |
| quality_scores = pd.to_numeric(df['Quality Score'], errors='coerce').dropna() | |
| if len(quality_scores) > 0: | |
| stats_data.append(['Avg Quality Score', f"{quality_scores.mean():.2f}"]) | |
| stats_data.append(['Min Quality Score', f"{quality_scores.min():.2f}"]) | |
| stats_data.append(['Max Quality Score', f"{quality_scores.max():.2f}"]) | |
| if 'Word Count' in df.columns: | |
| word_counts = pd.to_numeric(df['Word Count'], errors='coerce').dropna() | |
| if len(word_counts) > 0: | |
| stats_data.append(['Avg Word Count', f"{int(word_counts.mean()):,}"]) | |
| stats_data.append(['Total Words', f"{int(word_counts.sum()):,}"]) | |
| if stats_data: | |
| table = ax4.table(cellText=stats_data, cellLoc='left', | |
| colWidths=[0.5, 0.3], loc='center', | |
| colLabels=['Metric', 'Value']) | |
| table.auto_set_font_size(False) | |
| table.set_fontsize(11) | |
| table.scale(1, 2) | |
| # Style the table | |
| for i in range(len(stats_data) + 1): | |
| if i == 0: | |
| table[(i, 0)].set_facecolor('#34495e') | |
| table[(i, 1)].set_facecolor('#34495e') | |
| table[(i, 0)].set_text_props(weight='bold', color='white') | |
| table[(i, 1)].set_text_props(weight='bold', color='white') | |
| else: | |
| if i % 2 == 0: | |
| table[(i, 0)].set_facecolor('#ecf0f1') | |
| table[(i, 1)].set_facecolor('#ecf0f1') | |
| ax4.set_title('Summary Statistics', fontsize=12, fontweight='bold', pad=20) | |
| return fig |