Spaces:

hubnemo
/

peft-issues

Running

App Files Files Community

nemo commited on 17 days ago

Commit

c03ae2c

1 Parent(s): c978915

Initial

Browse files

Files changed (2) hide show

app.py +480 -0
peft_issues_analyzed_500.json +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,480 @@

+#!/usr/bin/env python3
+"""Gradio dashboard for visualizing analyzed peft issues with time range filtering."""
+import json
+from datetime import datetime
+from pathlib import Path
+import gradio as gr
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+ANALYZED_FILE = Path("peft_issues_merged_500.json")
+def parse_date(date_str):
+    """Parse ISO date string to year-month string."""
+    try:
+        dt = datetime.fromisoformat(date_str.replace('Z', '+00:00'))
+        return dt.strftime('%Y-%m')
+    except:
+        return "unknown"
+def parse_date_full(date_str):
+    """Parse ISO date string to datetime object."""
+    try:
+        return datetime.fromisoformat(date_str.replace('Z', '+00:00'))
+    except:
+        return datetime.min
+def load_data():
+    """Load analyzed issues data with date parsing."""
+    with open(ANALYZED_FILE, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    # Add parsed dates
+    for item in data:
+        item['year_month'] = parse_date(item.get('created_at', ''))
+        item['date_obj'] = parse_date_full(item.get('created_at', ''))
+    return data
+def create_dataframe(data):
+    """Create a pandas DataFrame from analyzed data."""
+    df_data = []
+    for item in data:
+        df_data.append({
+            "Issue #": item["number"],
+            "Title": item["title"][:100] + "..." if len(item["title"]) > 100 else item["title"],
+            "State": item["state"],
+            "Date": item.get("year_month", "unknown"),
+            "Model": item["model"],
+            "Trainer": item["trainer"],
+            "PEFT Method": item["peft_method"],
+            "Training Type": item["training_type"],
+            "Experience": item["experience_score"],
+            "Specialties": ", ".join(item["specialties"]) if item["specialties"] != ["none"] else "-",
+            "URL": item["html_url"],
+        })
+    return pd.DataFrame(df_data)
+def filter_data(df, model_filter, trainer_filter, peft_filter, training_filter, min_score, max_score, min_month, max_month):
+    """Filter dataframe based on user selections including date range."""
+    if model_filter != "All":
+        df = df[df["Model"] == model_filter]
+    if trainer_filter != "All":
+        df = df[df["Trainer"] == trainer_filter]
+    if peft_filter != "All":
+        df = df[df["PEFT Method"] == peft_filter]
+    if training_filter != "All":
+        df = df[df["Training Type"] == training_filter]
+    df = df[(df["Experience"] >= min_score) & (df["Experience"] <= max_score)]
+    # Date range filtering
+    df = df[(df["Date"] >= min_month) & (df["Date"] <= max_month)]
+    return df
+def get_unique_values(data, key):
+    """Get unique values for a filter dropdown."""
+    values = sorted(set(item[key] for item in data))
+    return ["All"] + values
+def get_month_range(data):
+    """Get min and max month from data."""
+    months = sorted(set(item.get("year_month", "unknown") for item in data if item.get("year_month") != "unknown"))
+    if not months:
+        return ["2023-01", "2026-12"]
+    return [months[0], months[-1]]
+def get_all_months(data):
+    """Get all unique months in chronological order."""
+    months = sorted(set(item.get("year_month", "unknown") for item in data if item.get("year_month") != "unknown"))
+    return months
+def create_peft_method_chart(data):
+    """Create PEFT method distribution chart."""
+    if not data:
+        return go.Figure()
+    methods = {}
+    for item in data:
+        m = item["peft_method"]
+        methods[m] = methods.get(m, 0) + 1
+    df = pd.DataFrame(list(methods.items()), columns=["PEFT Method", "Count"])
+    fig = px.bar(df, x="PEFT Method", y="Count", title="PEFT Method Distribution",
+                 color="PEFT Method", template="plotly_white")
+    fig.update_layout(showlegend=False)
+    return fig
+def create_trainer_chart(data):
+    """Create trainer framework distribution chart."""
+    if not data:
+        return go.Figure()
+    trainers = {}
+    for item in data:
+        t = item["trainer"]
+        trainers[t] = trainers.get(t, 0) + 1
+    df = pd.DataFrame(list(trainers.items()), columns=["Trainer", "Count"])
+    fig = px.pie(df, values="Count", names="Trainer", title="Trainer Framework Distribution",
+                 template="plotly_white")
+    return fig
+def create_training_type_chart(data):
+    """Create training type distribution chart."""
+    if not data:
+        return go.Figure()
+    types = {}
+    for item in data:
+        t = item["training_type"]
+        types[t] = types.get(t, 0) + 1
+    df = pd.DataFrame(list(types.items()), columns=["Training Type", "Count"])
+    fig = px.bar(df, x="Training Type", y="Count", title="Training Type Distribution",
+                 color="Training Type", template="plotly_white")
+    fig.update_layout(showlegend=False)
+    return fig
+def create_experience_chart(data):
+    """Create experience score histogram."""
+    if not data:
+        return go.Figure()
+    scores = [item["experience_score"] for item in data]
+    fig = px.histogram(x=scores, nbins=10, title="Experience Score Distribution",
+                       labels={"x": "Experience Score", "y": "Count"},
+                       template="plotly_white")
+    fig.update_traces(marker_color="steelblue")
+    return fig
+def create_experience_by_method_chart(data):
+    """Create average experience score by PEFT method."""
+    if not data:
+        return go.Figure()
+    method_scores = {}
+    method_counts = {}
+    for item in data:
+        m = item["peft_method"]
+        method_scores[m] = method_scores.get(m, 0) + item["experience_score"]
+        method_counts[m] = method_counts.get(m, 0) + 1
+    avg_scores = {m: method_scores[m] / method_counts[m] for m in method_scores}
+    df = pd.DataFrame(list(avg_scores.items()), columns=["PEFT Method", "Avg Score"])
+    fig = px.bar(df, x="PEFT Method", y="Avg Score", title="Average Experience Score by PEFT Method",
+                 color="PEFT Method", template="plotly_white")
+    fig.update_layout(showlegend=False, yaxis_range=[0, 10])
+    return fig
+def create_specialties_chart(data):
+    """Create specialties distribution chart."""
+    if not data:
+        return go.Figure()
+    specialties = {}
+    for item in data:
+        for s in item["specialties"]:
+            if s != "none":
+                specialties[s] = specialties.get(s, 0) + 1
+    if not specialties:
+        return go.Figure()
+    df = pd.DataFrame(list(specialties.items()), columns=["Specialty", "Count"])
+    fig = px.bar(df, x="Specialty", y="Count", title="Special Technologies Distribution",
+                 color="Specialty", template="plotly_white")
+    fig.update_layout(showlegend=False)
+    return fig
+def create_model_chart(data):
+    """Create model distribution chart."""
+    if not data:
+        return go.Figure()
+    models = {}
+    for item in data:
+        m = item["model"]
+        if m != "unknown":
+            models[m] = models.get(m, 0) + 1
+    if not models:
+        return go.Figure()
+    df = pd.DataFrame(list(models.items()), columns=["Model", "Count"])
+    fig = px.bar(df, x="Model", y="Count", title="Model Distribution",
+                 color="Model", template="plotly_white")
+    fig.update_layout(showlegend=False)
+    return fig
+def create_temporal_chart(data):
+    """Create issues over time chart."""
+    if not data:
+        return go.Figure()
+    months = {}
+    for item in data:
+        m = item.get("year_month", "unknown")
+        if m != "unknown":
+            months[m] = months.get(m, 0) + 1
+    if not months:
+        return go.Figure()
+    sorted_months = sorted(months.items())
+    df = pd.DataFrame(sorted_months, columns=["Month", "Issues"])
+    fig = px.line(df, x="Month", y="Issues", title="Issues Over Time",
+                  markers=True, template="plotly_white")
+    fig.update_layout(xaxis_tickangle=-45)
+    return fig
+def show_issue_details(issue_number, data):
+    """Show detailed information for a specific issue."""
+    for item in data:
+        if item["number"] == issue_number:
+            return (
+                f"**Issue #{item['number']}**: [{item['title']}]({item['html_url']})\n\n"
+                f"**State**: {item['state']}\n"
+                f"**Author**: {item['author']}\n"
+                f"**Created**: {item['created_at'][:10] if item.get('created_at') else 'unknown'}\n"
+                f"**Labels**: {', '.join(item['labels']) or 'None'}\n\n"
+                f"**Model**: {item['model']}\n"
+                f"**Trainer**: {item['trainer']}\n"
+                f"**PEFT Method**: {item['peft_method']}\n"
+                f"**Training Type**: {item['training_type']}\n"
+                f"**Specialties**: {', '.join(item['specialties'])}\n\n"
+                f"**Experience Score**: {item['experience_score']}/10\n"
+                f"**Rationale**: {item['experience_rationale']}\n\n"
+                f"**Confidence**:\n"
+                f"- Model: {item['confidence'].get('model', 'N/A')}\n"
+                f"- Trainer: {item['confidence'].get('trainer_framework', item['confidence'].get('trainer', 'N/A'))}\n"
+                f"- PEFT Method: {item['confidence'].get('peft_method', 'N/A')}\n"
+                f"- Training Type: {item['confidence'].get('training_type', 'N/A')}\n"
+                f"- Experience: {item['confidence'].get('experience_score', 'N/A')}\n"
+            )
+    return "Issue not found"
+def filter_data_by_months(data, min_month, max_month):
+    """Filter raw data by month range."""
+    return [item for item in data if min_month <= item.get("year_month", "unknown") <= max_month]
+def build_app():
+    """Build the Gradio application."""
+    data = load_data()
+    df = create_dataframe(data)
+    # Get month range
+    month_range = get_month_range(data)
+    all_months = get_all_months(data)
+    with gr.Blocks(title="PEFT Issues Analysis Dashboard") as app:
+        gr.Markdown("# 🔍 PEFT Issues Analysis Dashboard")
+        gr.Markdown("Analysis of 345 most recent issues from [huggingface/peft](https://github.com/huggingface/peft) — classified by LLM")
+        # Global date range filter at the top
+        with gr.Row():
+            with gr.Column(scale=2):
+                gr.Markdown("### 📅 Global Time Range Filter")
+            with gr.Column(scale=8):
+                # Use dropdowns for month selection since Gradio slider doesn't support strings well
+                month_options = all_months
+                min_month = gr.Dropdown(
+                    choices=month_options,
+                    value=month_range[0],
+                    label="From Month",
+                    allow_custom_value=False
+                )
+                max_month = gr.Dropdown(
+                    choices=month_options,
+                    value=month_range[-1],
+                    label="To Month",
+                    allow_custom_value=False
+                )
+        with gr.Tabs():
+            with gr.Tab("📊 Data Table"):
+                with gr.Row():
+                    model_filter = gr.Dropdown(
+                        choices=get_unique_values(data, "model"),
+                        value="All",
+                        label="Model"
+                    )
+                    trainer_filter = gr.Dropdown(
+                        choices=get_unique_values(data, "trainer"),
+                        value="All",
+                        label="Trainer"
+                    )
+                    peft_filter = gr.Dropdown(
+                        choices=get_unique_values(data, "peft_method"),
+                        value="All",
+                        label="PEFT Method"
+                    )
+                    training_filter = gr.Dropdown(
+                        choices=get_unique_values(data, "training_type"),
+                        value="All",
+                        label="Training Type"
+                    )
+                with gr.Row():
+                    min_score = gr.Slider(0, 10, value=0, step=1, label="Min Experience Score")
+                    max_score = gr.Slider(0, 10, value=10, step=1, label="Max Experience Score")
+                table = gr.DataFrame(
+                    value=df,
+                    headers=["Issue #", "Title", "State", "Date", "Model", "Trainer", "PEFT Method",
+                            "Training Type", "Experience", "Specialties", "URL"],
+                    interactive=False,
+                    wrap=True
+                )
+                def update_table(m, t, p, tr, min_s, max_s, min_m, max_m):
+                    filtered = filter_data(df.copy(), m, t, p, tr, min_s, max_s, min_m, max_m)
+                    return filtered
+                all_filters = [model_filter, trainer_filter, peft_filter, training_filter,
+                              min_score, max_score, min_month, max_month]
+                for component in all_filters:
+                    component.change(
+                        fn=update_table,
+                        inputs=all_filters,
+                        outputs=table
+                    )
+            with gr.Tab("🔎 Issue Details"):
+                issue_number = gr.Number(label="Issue Number", value=data[0]["number"], precision=0)
+                details = gr.Markdown()
+                def update_details(num):
+                    return show_issue_details(int(num), data)
+                issue_number.change(fn=update_details, inputs=issue_number, outputs=details)
+                details.value = show_issue_details(data[0]["number"], data)
+            with gr.Tab("📈 Analytics"):
+                with gr.Row():
+                    temporal_chart = gr.Plot(value=create_temporal_chart(data))
+                with gr.Row():
+                    peft_chart = gr.Plot(value=create_peft_method_chart(data))
+                    trainer_chart = gr.Plot(value=create_trainer_chart(data))
+                with gr.Row():
+                    training_chart = gr.Plot(value=create_training_type_chart(data))
+                    experience_chart = gr.Plot(value=create_experience_chart(data))
+                with gr.Row():
+                    exp_method_chart = gr.Plot(value=create_experience_by_method_chart(data))
+                    specialties_chart = gr.Plot(value=create_specialties_chart(data))
+                with gr.Row():
+                    model_chart = gr.Plot(value=create_model_chart(data))
+                def update_charts(min_m, max_m):
+                    filtered_data = filter_data_by_months(data, min_m, max_m)
+                    return (
+                        create_temporal_chart(filtered_data),
+                        create_peft_method_chart(filtered_data),
+                        create_trainer_chart(filtered_data),
+                        create_training_type_chart(filtered_data),
+                        create_experience_chart(filtered_data),
+                        create_experience_by_method_chart(filtered_data),
+                        create_specialties_chart(filtered_data),
+                        create_model_chart(filtered_data)
+                    )
+                for component in [min_month, max_month]:
+                    component.change(
+                        fn=update_charts,
+                        inputs=[min_month, max_month],
+                        outputs=[
+                            temporal_chart, peft_chart, trainer_chart,
+                            training_chart, experience_chart,
+                            exp_method_chart, specialties_chart, model_chart
+                        ]
+                    )
+            with gr.Tab("ℹ️ About"):
+                gr.Markdown(f"""
+                ## About This Dashboard
+                This dashboard analyzes 345 recent issues from the [huggingface/peft](https://github.com/huggingface/peft) repository.
+                **Time Range**: {month_range[0]} to {month_range[1]}
+                **Total Issues**: {len(data)}
+                ### Data Collection Method
+                **LLM Classification** (current view):
+                - All 345 issues classified by a language model reading the full title + body
+                - More accurate than regex-based extraction, especially for nuanced classifications
+                - Experience scores and training types are LLM-inferred from context
+                **Validation Process**:
+                1. **Static analysis** (rule-based): Extracted via regex patterns
+                2. **LLM classification**: Language model read all 345 issues in 4 chunks
+                3. **Comparison**: Identified systematic biases in the static analyzer
+                4. **Merged results**: This dashboard uses the LLM classification (more accurate)
+                ### Why LLM Classification?
+                LLM outperforms static analysis on nuanced tasks:
+                - **Experience score**: LLM understands issue quality, tone, and depth (44.3% agreement with static)
+                - **Training type**: LLM distinguishes actual training from feature requests (61.2% agreement)
+                - **PEFT method**: LLM detects context (73.9% agreement)
+                ### Metrics Explained
+                **Experience Score (0-10)**:
+                - **Code reproduction**: +2 if runnable code snippet or clear numbered steps
+                - **Error details**: +2 if actual traceback or error block
+                - **Environment info**: +2 if actual version numbers or system info table
+                - **Clarity**: +2 if clear title (4+ words) and substantial body (50+ words)
+                - **Technical depth**: +2 if 2+ technical terms used in proper context
+                **Confidence Levels**:
+                - **High**: Strong evidence in the issue text
+                - **Medium**: Some evidence or inference
+                - **Low**: Limited or no evidence
+                ### Known Limitations
+                - Model detection: Many PEFT issues are framework-level bugs without model mentions
+                - Trainer detection: Most users don't specify their training framework
+                - Training type: "unsure" means the issue lacks clear training context (often infrastructure/bug reports)
+                - LLM may occasionally hallucinate or misread complex technical details
+                ### Data Sources
+                - Issues fetched via GitHub API on 2026-05-12 (345 issues, most recently updated)
+                - LLM classification performed on all 345 issues in 4 batches
+                - Raw data preserved for transparency and re-analysis
+                """)
+    return app
+if __name__ == "__main__":
+    app = build_app()
+    app.launch(share=False, server_name="0.0.0.0", server_port=7860, theme=gr.themes.Soft())

peft_issues_analyzed_500.json ADDED Viewed

The diff for this file is too large to render. See raw diff