import pandas as pd from typing import Dict from collections import Counter def build_participant_profile_table(metadata: Dict) -> pd.DataFrame: return pd.DataFrame({ "Metric": ["Total Participants", "Avg Quality Score", "Avg Words"], "Value": [ metadata.get("total_transcripts", 0), f"{metadata.get('avg_quality_score', 0):.2f}", f"{metadata.get('avg_word_count', 0):,.0f}" ] }) def build_quality_distribution_table(stats: Dict) -> pd.DataFrame: if "quality" not in stats: return pd.DataFrame() q = stats["quality"] df = pd.DataFrame({ "Quality Tier": ["Excellent (>0.8)", "Good (0.6-0.8)", "Fair (0.4-0.6)", "Poor (<0.4)"], "Count": [q.get("excellent_count", 0), q.get("good_count", 0), q.get("fair_count", 0), q.get("poor_count", 0)] }) df["Percentage"] = (df["Count"] / df["Count"].sum() * 100).round(1) return df def build_frequency_table(themes: Dict) -> pd.DataFrame: rows = [] for theme_name, items in themes.items(): for item in items[:10]: rows.append({"Category": theme_name, "Item": item["item"], "Frequency": item["count"]}) return pd.DataFrame(rows) if rows else pd.DataFrame() def build_all_tables(parsed_data: Dict) -> Dict[str, pd.DataFrame]: tables = {} df = parsed_data["dataframe"] metadata = parsed_data["metadata"] themes = parsed_data["themes"] stats = parsed_data["statistics"] tables["participant_profile"] = build_participant_profile_table(metadata) quality_table = build_quality_distribution_table(stats) if not quality_table.empty: tables["quality_distribution"] = quality_table freq_table = build_frequency_table(themes) if not freq_table.empty: tables["theme_frequency"] = freq_table return tables