| import matplotlib.pyplot as plt
|
| import matplotlib.patches as mpatches
|
| import pandas as pd
|
| import numpy as np
|
| from collections import Counter
|
| from typing import List, Dict
|
| import re
|
|
|
| def generate_dashboard(data):
|
| """Legacy function - kept for backwards compatibility"""
|
| return generate_comprehensive_dashboard(data, "Other")
|
|
|
|
|
| def extract_items_from_field(data: List[Dict], field_name: str) -> List[str]:
|
| """Extract and split items from semicolon-separated field"""
|
| items = []
|
| for row in data:
|
| value = row.get(field_name, "")
|
| if value and isinstance(value, str):
|
|
|
| parts = [p.strip() for p in value.split(';') if p.strip()]
|
| items.extend(parts)
|
| return items
|
|
|
|
|
| def generate_comprehensive_dashboard(
|
| data: List[Dict],
|
| interviewee_type: str
|
| ) -> plt.Figure:
|
| """
|
| Generate comprehensive dashboard with multiple visualizations
|
| """
|
|
|
| if not data or len(data) == 0:
|
|
|
| fig, ax = plt.subplots(figsize=(10, 6))
|
| ax.text(0.5, 0.5, 'No data available for visualization',
|
| ha='center', va='center', fontsize=14)
|
| ax.axis('off')
|
| return fig
|
|
|
| df = pd.DataFrame(data)
|
|
|
|
|
| if interviewee_type == "HCP":
|
| fig = create_hcp_dashboard(df)
|
| elif interviewee_type == "Patient":
|
| fig = create_patient_dashboard(df)
|
| else:
|
| fig = create_general_dashboard(df)
|
|
|
| plt.tight_layout()
|
| return fig
|
|
|
|
|
| def create_hcp_dashboard(df: pd.DataFrame) -> plt.Figure:
|
| """Create dashboard for HCP interviews"""
|
|
|
| fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
| fig.suptitle('Healthcare Professional Interview Analysis', fontsize=16, fontweight='bold')
|
|
|
|
|
| ax1 = axes[0, 0]
|
| if 'Quality Score' in df.columns:
|
| quality_scores = pd.to_numeric(df['Quality Score'], errors='coerce').dropna()
|
| if len(quality_scores) > 0:
|
| ax1.hist(quality_scores, bins=10, color='#3498db', edgecolor='black', alpha=0.7)
|
| ax1.axvline(quality_scores.mean(), color='red', linestyle='--',
|
| label=f'Mean: {quality_scores.mean():.2f}')
|
| ax1.set_xlabel('Quality Score')
|
| ax1.set_ylabel('Frequency')
|
| ax1.set_title('Transcript Quality Distribution')
|
| ax1.legend()
|
| ax1.grid(axis='y', alpha=0.3)
|
|
|
|
|
| ax2 = axes[0, 1]
|
| if 'Diagnoses' in df.columns:
|
| diagnoses = extract_items_from_field(df.to_dict('records'), 'Diagnoses')
|
| if diagnoses:
|
| diagnosis_counts = Counter(diagnoses)
|
| top_diagnoses = dict(diagnosis_counts.most_common(8))
|
|
|
| if top_diagnoses:
|
| labels = list(top_diagnoses.keys())
|
|
|
| labels = [label[:30] + '...' if len(label) > 30 else label for label in labels]
|
| values = list(top_diagnoses.values())
|
|
|
| bars = ax2.barh(labels, values, color='#2ecc71', edgecolor='black')
|
| ax2.set_xlabel('Frequency')
|
| ax2.set_title('Most Common Diagnoses')
|
| ax2.invert_yaxis()
|
|
|
|
|
| for i, bar in enumerate(bars):
|
| width = bar.get_width()
|
| ax2.text(width, bar.get_y() + bar.get_height()/2,
|
| f' {int(width)}', ha='left', va='center', fontsize=9)
|
|
|
|
|
| ax3 = axes[1, 0]
|
| if 'Prescriptions' in df.columns:
|
| prescriptions = extract_items_from_field(df.to_dict('records'), 'Prescriptions')
|
| if prescriptions:
|
| rx_counts = Counter(prescriptions)
|
| top_rx = dict(rx_counts.most_common(8))
|
|
|
| if top_rx:
|
| labels = list(top_rx.keys())
|
| labels = [label[:30] + '...' if len(label) > 30 else label for label in labels]
|
| values = list(top_rx.values())
|
|
|
| bars = ax3.barh(labels, values, color='#e74c3c', edgecolor='black')
|
| ax3.set_xlabel('Frequency')
|
| ax3.set_title('Most Mentioned Prescriptions')
|
| ax3.invert_yaxis()
|
|
|
| for i, bar in enumerate(bars):
|
| width = bar.get_width()
|
| ax3.text(width, bar.get_y() + bar.get_height()/2,
|
| f' {int(width)}', ha='left', va='center', fontsize=9)
|
|
|
|
|
| ax4 = axes[1, 1]
|
| if 'Word Count' in df.columns and 'Transcript ID' in df.columns:
|
| word_counts = pd.to_numeric(df['Word Count'], errors='coerce').dropna()
|
| transcript_ids = df['Transcript ID'][:len(word_counts)]
|
|
|
| if len(word_counts) > 0:
|
| bars = ax4.bar(range(len(word_counts)), word_counts, color='#9b59b6',
|
| edgecolor='black', alpha=0.7)
|
| ax4.set_xlabel('Transcript')
|
| ax4.set_ylabel('Word Count')
|
| ax4.set_title('Interview Length by Transcript')
|
| ax4.set_xticks(range(len(word_counts)))
|
| ax4.set_xticklabels(transcript_ids, rotation=45, ha='right')
|
| ax4.grid(axis='y', alpha=0.3)
|
|
|
|
|
| ax4.axhline(word_counts.mean(), color='red', linestyle='--',
|
| label=f'Average: {int(word_counts.mean())}')
|
| ax4.legend()
|
|
|
| return fig
|
|
|
|
|
| def create_patient_dashboard(df: pd.DataFrame) -> plt.Figure:
|
| """Create dashboard for Patient interviews"""
|
|
|
| fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
| fig.suptitle('Patient Interview Analysis', fontsize=16, fontweight='bold')
|
|
|
|
|
| ax1 = axes[0, 0]
|
| if 'Quality Score' in df.columns:
|
| quality_scores = pd.to_numeric(df['Quality Score'], errors='coerce').dropna()
|
| if len(quality_scores) > 0:
|
| ax1.hist(quality_scores, bins=10, color='#3498db', edgecolor='black', alpha=0.7)
|
| ax1.axvline(quality_scores.mean(), color='red', linestyle='--',
|
| label=f'Mean: {quality_scores.mean():.2f}')
|
| ax1.set_xlabel('Quality Score')
|
| ax1.set_ylabel('Frequency')
|
| ax1.set_title('Transcript Quality Distribution')
|
| ax1.legend()
|
| ax1.grid(axis='y', alpha=0.3)
|
|
|
|
|
| ax2 = axes[0, 1]
|
| if 'Primary Symptoms' in df.columns:
|
| symptoms = extract_items_from_field(df.to_dict('records'), 'Primary Symptoms')
|
| if symptoms:
|
| symptom_counts = Counter(symptoms)
|
| top_symptoms = dict(symptom_counts.most_common(8))
|
|
|
| if top_symptoms:
|
| labels = list(top_symptoms.keys())
|
| labels = [label[:30] + '...' if len(label) > 30 else label for label in labels]
|
| values = list(top_symptoms.values())
|
|
|
| bars = ax2.barh(labels, values, color='#e67e22', edgecolor='black')
|
| ax2.set_xlabel('Frequency')
|
| ax2.set_title('Most Common Symptoms')
|
| ax2.invert_yaxis()
|
|
|
| for i, bar in enumerate(bars):
|
| width = bar.get_width()
|
| ax2.text(width, bar.get_y() + bar.get_height()/2,
|
| f' {int(width)}', ha='left', va='center', fontsize=9)
|
|
|
|
|
| ax3 = axes[1, 0]
|
| if 'Main Concerns' in df.columns:
|
| concerns = extract_items_from_field(df.to_dict('records'), 'Main Concerns')
|
| if concerns:
|
| concern_counts = Counter(concerns)
|
| top_concerns = dict(concern_counts.most_common(6))
|
|
|
| if top_concerns:
|
|
|
| labels = list(top_concerns.keys())
|
| labels = [label[:25] + '...' if len(label) > 25 else label for label in labels]
|
| sizes = list(top_concerns.values())
|
| colors_list = ['#ff6b6b', '#4ecdc4', '#45b7d1', '#f9ca24', '#6c5ce7', '#a29bfe']
|
|
|
| ax3.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90,
|
| colors=colors_list[:len(sizes)])
|
| ax3.set_title('Distribution of Patient Concerns')
|
|
|
|
|
| ax4 = axes[1, 1]
|
| if 'Side Effects' in df.columns:
|
| side_effects = extract_items_from_field(df.to_dict('records'), 'Side Effects')
|
| if side_effects:
|
| se_counts = Counter(side_effects)
|
| top_se = dict(se_counts.most_common(6))
|
|
|
| if top_se:
|
| labels = list(top_se.keys())
|
| labels = [label[:30] + '...' if len(label) > 30 else label for label in labels]
|
| values = list(top_se.values())
|
|
|
| bars = ax4.barh(labels, values, color='#e74c3c', edgecolor='black')
|
| ax4.set_xlabel('Frequency')
|
| ax4.set_title('Reported Side Effects')
|
| ax4.invert_yaxis()
|
|
|
| for i, bar in enumerate(bars):
|
| width = bar.get_width()
|
| ax4.text(width, bar.get_y() + bar.get_height()/2,
|
| f' {int(width)}', ha='left', va='center', fontsize=9)
|
| else:
|
| ax4.text(0.5, 0.5, 'No side effects reported',
|
| ha='center', va='center', transform=ax4.transAxes, fontsize=12)
|
| ax4.axis('off')
|
|
|
| return fig
|
|
|
|
|
| def create_general_dashboard(df: pd.DataFrame) -> plt.Figure:
|
| """Create general dashboard"""
|
|
|
| fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
| fig.suptitle('General Interview Analysis', fontsize=16, fontweight='bold')
|
|
|
|
|
| ax1 = axes[0, 0]
|
| if 'Quality Score' in df.columns:
|
| quality_scores = pd.to_numeric(df['Quality Score'], errors='coerce').dropna()
|
| if len(quality_scores) > 0:
|
| ax1.hist(quality_scores, bins=10, color='#3498db', edgecolor='black', alpha=0.7)
|
| ax1.axvline(quality_scores.mean(), color='red', linestyle='--',
|
| label=f'Mean: {quality_scores.mean():.2f}')
|
| ax1.set_xlabel('Quality Score')
|
| ax1.set_ylabel('Frequency')
|
| ax1.set_title('Transcript Quality Distribution')
|
| ax1.legend()
|
| ax1.grid(axis='y', alpha=0.3)
|
|
|
|
|
| ax2 = axes[0, 1]
|
| if 'Word Count' in df.columns:
|
| word_counts = pd.to_numeric(df['Word Count'], errors='coerce').dropna()
|
| if len(word_counts) > 0:
|
| ax2.hist(word_counts, bins=15, color='#2ecc71', edgecolor='black', alpha=0.7)
|
| ax2.set_xlabel('Word Count')
|
| ax2.set_ylabel('Frequency')
|
| ax2.set_title('Interview Length Distribution')
|
| ax2.grid(axis='y', alpha=0.3)
|
|
|
|
|
| ax3 = axes[1, 0]
|
| if 'Quality Score' in df.columns:
|
| quality_scores = pd.to_numeric(df['Quality Score'], errors='coerce').dropna()
|
|
|
| categories = ['Excellent\n(>0.8)', 'Good\n(0.6-0.8)', 'Fair\n(0.4-0.6)', 'Poor\n(<0.4)']
|
| counts = [
|
| sum(quality_scores > 0.8),
|
| sum((quality_scores >= 0.6) & (quality_scores <= 0.8)),
|
| sum((quality_scores >= 0.4) & (quality_scores < 0.6)),
|
| sum(quality_scores < 0.4)
|
| ]
|
|
|
| colors_list = ['#2ecc71', '#f39c12', '#e67e22', '#e74c3c']
|
| bars = ax3.bar(categories, counts, color=colors_list, edgecolor='black', alpha=0.7)
|
| ax3.set_ylabel('Number of Transcripts')
|
| ax3.set_title('Quality Score Categories')
|
| ax3.grid(axis='y', alpha=0.3)
|
|
|
|
|
| for bar in bars:
|
| height = bar.get_height()
|
| if height > 0:
|
| ax3.text(bar.get_x() + bar.get_width()/2., height,
|
| f'{int(height)}', ha='center', va='bottom', fontsize=10)
|
|
|
|
|
| ax4 = axes[1, 1]
|
| ax4.axis('off')
|
|
|
| stats_data = []
|
| if 'Transcript ID' in df.columns:
|
| stats_data.append(['Total Transcripts', str(len(df))])
|
|
|
| if 'Quality Score' in df.columns:
|
| quality_scores = pd.to_numeric(df['Quality Score'], errors='coerce').dropna()
|
| if len(quality_scores) > 0:
|
| stats_data.append(['Avg Quality Score', f"{quality_scores.mean():.2f}"])
|
| stats_data.append(['Min Quality Score', f"{quality_scores.min():.2f}"])
|
| stats_data.append(['Max Quality Score', f"{quality_scores.max():.2f}"])
|
|
|
| if 'Word Count' in df.columns:
|
| word_counts = pd.to_numeric(df['Word Count'], errors='coerce').dropna()
|
| if len(word_counts) > 0:
|
| stats_data.append(['Avg Word Count', f"{int(word_counts.mean()):,}"])
|
| stats_data.append(['Total Words', f"{int(word_counts.sum()):,}"])
|
|
|
| if stats_data:
|
| table = ax4.table(cellText=stats_data, cellLoc='left',
|
| colWidths=[0.5, 0.3], loc='center',
|
| colLabels=['Metric', 'Value'])
|
| table.auto_set_font_size(False)
|
| table.set_fontsize(11)
|
| table.scale(1, 2)
|
|
|
|
|
| for i in range(len(stats_data) + 1):
|
| if i == 0:
|
| table[(i, 0)].set_facecolor('#34495e')
|
| table[(i, 1)].set_facecolor('#34495e')
|
| table[(i, 0)].set_text_props(weight='bold', color='white')
|
| table[(i, 1)].set_text_props(weight='bold', color='white')
|
| else:
|
| if i % 2 == 0:
|
| table[(i, 0)].set_facecolor('#ecf0f1')
|
| table[(i, 1)].set_facecolor('#ecf0f1')
|
|
|
| ax4.set_title('Summary Statistics', fontsize=12, fontweight='bold', pad=20)
|
|
|
| return fig |