File size: 1,928 Bytes
54c99ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import pandas as pd
from typing import Dict
from collections import Counter

def build_participant_profile_table(metadata: Dict) -> pd.DataFrame:
    return pd.DataFrame({
        "Metric": ["Total Participants", "Avg Quality Score", "Avg Words"],
        "Value": [
            metadata.get("total_transcripts", 0),
            f"{metadata.get('avg_quality_score', 0):.2f}",
            f"{metadata.get('avg_word_count', 0):,.0f}"
        ]
    })

def build_quality_distribution_table(stats: Dict) -> pd.DataFrame:
    if "quality" not in stats:
        return pd.DataFrame()
    q = stats["quality"]
    df = pd.DataFrame({
        "Quality Tier": ["Excellent (>0.8)", "Good (0.6-0.8)", "Fair (0.4-0.6)", "Poor (<0.4)"],
        "Count": [q.get("excellent_count", 0), q.get("good_count", 0), 
                  q.get("fair_count", 0), q.get("poor_count", 0)]
    })
    df["Percentage"] = (df["Count"] / df["Count"].sum() * 100).round(1)
    return df

def build_frequency_table(themes: Dict) -> pd.DataFrame:
    rows = []
    for theme_name, items in themes.items():
        for item in items[:10]:
            rows.append({"Category": theme_name, "Item": item["item"], "Frequency": item["count"]})
    return pd.DataFrame(rows) if rows else pd.DataFrame()

def build_all_tables(parsed_data: Dict) -> Dict[str, pd.DataFrame]:
    tables = {}
    df = parsed_data["dataframe"]
    metadata = parsed_data["metadata"]
    themes = parsed_data["themes"]
    stats = parsed_data["statistics"]
    
    tables["participant_profile"] = build_participant_profile_table(metadata)
    
    quality_table = build_quality_distribution_table(stats)
    if not quality_table.empty:
        tables["quality_distribution"] = quality_table
    
    freq_table = build_frequency_table(themes)
    if not freq_table.empty:
        tables["theme_frequency"] = freq_table
    
    return tables