TranscriptWriting / table_builder.py
jmisak's picture
Upload 23 files
54c99ad verified
raw
history blame
1.93 kB
import pandas as pd
from typing import Dict
from collections import Counter
def build_participant_profile_table(metadata: Dict) -> pd.DataFrame:
return pd.DataFrame({
"Metric": ["Total Participants", "Avg Quality Score", "Avg Words"],
"Value": [
metadata.get("total_transcripts", 0),
f"{metadata.get('avg_quality_score', 0):.2f}",
f"{metadata.get('avg_word_count', 0):,.0f}"
]
})
def build_quality_distribution_table(stats: Dict) -> pd.DataFrame:
if "quality" not in stats:
return pd.DataFrame()
q = stats["quality"]
df = pd.DataFrame({
"Quality Tier": ["Excellent (>0.8)", "Good (0.6-0.8)", "Fair (0.4-0.6)", "Poor (<0.4)"],
"Count": [q.get("excellent_count", 0), q.get("good_count", 0),
q.get("fair_count", 0), q.get("poor_count", 0)]
})
df["Percentage"] = (df["Count"] / df["Count"].sum() * 100).round(1)
return df
def build_frequency_table(themes: Dict) -> pd.DataFrame:
rows = []
for theme_name, items in themes.items():
for item in items[:10]:
rows.append({"Category": theme_name, "Item": item["item"], "Frequency": item["count"]})
return pd.DataFrame(rows) if rows else pd.DataFrame()
def build_all_tables(parsed_data: Dict) -> Dict[str, pd.DataFrame]:
tables = {}
df = parsed_data["dataframe"]
metadata = parsed_data["metadata"]
themes = parsed_data["themes"]
stats = parsed_data["statistics"]
tables["participant_profile"] = build_participant_profile_table(metadata)
quality_table = build_quality_distribution_table(stats)
if not quality_table.empty:
tables["quality_distribution"] = quality_table
freq_table = build_frequency_table(themes)
if not freq_table.empty:
tables["theme_frequency"] = freq_table
return tables