Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """Gradio dashboard for visualizing analyzed peft issues with time range filtering.""" | |
| import json | |
| from datetime import datetime | |
| from pathlib import Path | |
| import gradio as gr | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| ANALYZED_FILE = Path("peft_issues_merged_500.json") | |
| def parse_date(date_str): | |
| """Parse ISO date string to year-month string.""" | |
| try: | |
| dt = datetime.fromisoformat(date_str.replace('Z', '+00:00')) | |
| return dt.strftime('%Y-%m') | |
| except: | |
| return "unknown" | |
| def parse_date_full(date_str): | |
| """Parse ISO date string to datetime object.""" | |
| try: | |
| return datetime.fromisoformat(date_str.replace('Z', '+00:00')) | |
| except: | |
| return datetime.min | |
| def load_data(): | |
| """Load analyzed issues data with date parsing.""" | |
| with open(ANALYZED_FILE, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| # Add parsed dates | |
| for item in data: | |
| item['year_month'] = parse_date(item.get('created_at', '')) | |
| item['date_obj'] = parse_date_full(item.get('created_at', '')) | |
| return data | |
| def create_dataframe(data): | |
| """Create a pandas DataFrame from analyzed data.""" | |
| df_data = [] | |
| for item in data: | |
| df_data.append({ | |
| "Issue #": item["number"], | |
| "Title": item["title"][:100] + "..." if len(item["title"]) > 100 else item["title"], | |
| "State": item["state"], | |
| "Date": item.get("year_month", "unknown"), | |
| "Model": item["model"], | |
| "Trainer": item["trainer"], | |
| "PEFT Method": item["peft_method"], | |
| "Training Type": item["training_type"], | |
| "Experience": item["experience_score"], | |
| "Specialties": ", ".join(item["specialties"]) if item["specialties"] != ["none"] else "-", | |
| "URL": item["html_url"], | |
| }) | |
| return pd.DataFrame(df_data) | |
| def filter_data(df, model_filter, trainer_filter, peft_filter, training_filter, min_score, max_score, min_month, max_month): | |
| """Filter dataframe based on user selections including date range.""" | |
| if model_filter != "All": | |
| df = df[df["Model"] == model_filter] | |
| if trainer_filter != "All": | |
| df = df[df["Trainer"] == trainer_filter] | |
| if peft_filter != "All": | |
| df = df[df["PEFT Method"] == peft_filter] | |
| if training_filter != "All": | |
| df = df[df["Training Type"] == training_filter] | |
| df = df[(df["Experience"] >= min_score) & (df["Experience"] <= max_score)] | |
| # Date range filtering | |
| df = df[(df["Date"] >= min_month) & (df["Date"] <= max_month)] | |
| return df | |
| def get_unique_values(data, key): | |
| """Get unique values for a filter dropdown.""" | |
| values = sorted(set(item[key] for item in data)) | |
| return ["All"] + values | |
| def get_month_range(data): | |
| """Get min and max month from data.""" | |
| months = sorted(set(item.get("year_month", "unknown") for item in data if item.get("year_month") != "unknown")) | |
| if not months: | |
| return ["2023-01", "2026-12"] | |
| return [months[0], months[-1]] | |
| def get_all_months(data): | |
| """Get all unique months in chronological order.""" | |
| months = sorted(set(item.get("year_month", "unknown") for item in data if item.get("year_month") != "unknown")) | |
| return months | |
| def create_peft_method_chart(data): | |
| """Create PEFT method distribution chart.""" | |
| if not data: | |
| return go.Figure() | |
| methods = {} | |
| for item in data: | |
| m = item["peft_method"] | |
| methods[m] = methods.get(m, 0) + 1 | |
| df = pd.DataFrame(list(methods.items()), columns=["PEFT Method", "Count"]) | |
| fig = px.bar(df, x="PEFT Method", y="Count", title="PEFT Method Distribution", | |
| color="PEFT Method", template="plotly_white") | |
| fig.update_layout(showlegend=False) | |
| return fig | |
| def create_trainer_chart(data): | |
| """Create trainer framework distribution chart.""" | |
| if not data: | |
| return go.Figure() | |
| trainers = {} | |
| for item in data: | |
| t = item["trainer"] | |
| trainers[t] = trainers.get(t, 0) + 1 | |
| df = pd.DataFrame(list(trainers.items()), columns=["Trainer", "Count"]) | |
| fig = px.pie(df, values="Count", names="Trainer", title="Trainer Framework Distribution", | |
| template="plotly_white") | |
| return fig | |
| def create_training_type_chart(data): | |
| """Create training type distribution chart.""" | |
| if not data: | |
| return go.Figure() | |
| types = {} | |
| for item in data: | |
| t = item["training_type"] | |
| types[t] = types.get(t, 0) + 1 | |
| df = pd.DataFrame(list(types.items()), columns=["Training Type", "Count"]) | |
| fig = px.bar(df, x="Training Type", y="Count", title="Training Type Distribution", | |
| color="Training Type", template="plotly_white") | |
| fig.update_layout(showlegend=False) | |
| return fig | |
| def create_experience_chart(data): | |
| """Create experience score histogram.""" | |
| if not data: | |
| return go.Figure() | |
| scores = [item["experience_score"] for item in data] | |
| fig = px.histogram(x=scores, nbins=10, title="Experience Score Distribution", | |
| labels={"x": "Experience Score", "y": "Count"}, | |
| template="plotly_white") | |
| fig.update_traces(marker_color="steelblue") | |
| return fig | |
| def create_experience_by_method_chart(data): | |
| """Create average experience score by PEFT method.""" | |
| if not data: | |
| return go.Figure() | |
| method_scores = {} | |
| method_counts = {} | |
| for item in data: | |
| m = item["peft_method"] | |
| method_scores[m] = method_scores.get(m, 0) + item["experience_score"] | |
| method_counts[m] = method_counts.get(m, 0) + 1 | |
| avg_scores = {m: method_scores[m] / method_counts[m] for m in method_scores} | |
| df = pd.DataFrame(list(avg_scores.items()), columns=["PEFT Method", "Avg Score"]) | |
| fig = px.bar(df, x="PEFT Method", y="Avg Score", title="Average Experience Score by PEFT Method", | |
| color="PEFT Method", template="plotly_white") | |
| fig.update_layout(showlegend=False, yaxis_range=[0, 10]) | |
| return fig | |
| def create_specialties_chart(data): | |
| """Create specialties distribution chart.""" | |
| if not data: | |
| return go.Figure() | |
| specialties = {} | |
| for item in data: | |
| for s in item["specialties"]: | |
| if s != "none": | |
| specialties[s] = specialties.get(s, 0) + 1 | |
| if not specialties: | |
| return go.Figure() | |
| df = pd.DataFrame(list(specialties.items()), columns=["Specialty", "Count"]) | |
| fig = px.bar(df, x="Specialty", y="Count", title="Special Technologies Distribution", | |
| color="Specialty", template="plotly_white") | |
| fig.update_layout(showlegend=False) | |
| return fig | |
| def create_model_chart(data): | |
| """Create model distribution chart.""" | |
| if not data: | |
| return go.Figure() | |
| models = {} | |
| for item in data: | |
| m = item["model"] | |
| if m != "unknown": | |
| models[m] = models.get(m, 0) + 1 | |
| if not models: | |
| return go.Figure() | |
| df = pd.DataFrame(list(models.items()), columns=["Model", "Count"]) | |
| fig = px.bar(df, x="Model", y="Count", title="Model Distribution", | |
| color="Model", template="plotly_white") | |
| fig.update_layout(showlegend=False) | |
| return fig | |
| def create_temporal_chart(data): | |
| """Create issues over time chart.""" | |
| if not data: | |
| return go.Figure() | |
| months = {} | |
| for item in data: | |
| m = item.get("year_month", "unknown") | |
| if m != "unknown": | |
| months[m] = months.get(m, 0) + 1 | |
| if not months: | |
| return go.Figure() | |
| sorted_months = sorted(months.items()) | |
| df = pd.DataFrame(sorted_months, columns=["Month", "Issues"]) | |
| fig = px.line(df, x="Month", y="Issues", title="Issues Over Time", | |
| markers=True, template="plotly_white") | |
| fig.update_layout(xaxis_tickangle=-45) | |
| return fig | |
| def show_issue_details(issue_number, data): | |
| """Show detailed information for a specific issue.""" | |
| for item in data: | |
| if item["number"] == issue_number: | |
| return ( | |
| f"**Issue #{item['number']}**: [{item['title']}]({item['html_url']})\n\n" | |
| f"**State**: {item['state']}\n" | |
| f"**Author**: {item['author']}\n" | |
| f"**Created**: {item['created_at'][:10] if item.get('created_at') else 'unknown'}\n" | |
| f"**Labels**: {', '.join(item['labels']) or 'None'}\n\n" | |
| f"**Model**: {item['model']}\n" | |
| f"**Trainer**: {item['trainer']}\n" | |
| f"**PEFT Method**: {item['peft_method']}\n" | |
| f"**Training Type**: {item['training_type']}\n" | |
| f"**Specialties**: {', '.join(item['specialties'])}\n\n" | |
| f"**Experience Score**: {item['experience_score']}/10\n" | |
| f"**Rationale**: {item['experience_rationale']}\n\n" | |
| f"**Confidence**:\n" | |
| f"- Model: {item['confidence'].get('model', 'N/A')}\n" | |
| f"- Trainer: {item['confidence'].get('trainer_framework', item['confidence'].get('trainer', 'N/A'))}\n" | |
| f"- PEFT Method: {item['confidence'].get('peft_method', 'N/A')}\n" | |
| f"- Training Type: {item['confidence'].get('training_type', 'N/A')}\n" | |
| f"- Experience: {item['confidence'].get('experience_score', 'N/A')}\n" | |
| ) | |
| return "Issue not found" | |
| def filter_data_by_months(data, min_month, max_month): | |
| """Filter raw data by month range.""" | |
| return [item for item in data if min_month <= item.get("year_month", "unknown") <= max_month] | |
| def build_app(): | |
| """Build the Gradio application.""" | |
| data = load_data() | |
| df = create_dataframe(data) | |
| # Get month range | |
| month_range = get_month_range(data) | |
| all_months = get_all_months(data) | |
| with gr.Blocks(title="PEFT Issues Analysis Dashboard") as app: | |
| gr.Markdown("# 🔍 PEFT Issues Analysis Dashboard") | |
| gr.Markdown("Analysis of 345 most recent issues from [huggingface/peft](https://github.com/huggingface/peft) — classified by LLM") | |
| # Global date range filter at the top | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| gr.Markdown("### 📅 Global Time Range Filter") | |
| with gr.Column(scale=8): | |
| # Use dropdowns for month selection since Gradio slider doesn't support strings well | |
| month_options = all_months | |
| min_month = gr.Dropdown( | |
| choices=month_options, | |
| value=month_range[0], | |
| label="From Month", | |
| allow_custom_value=False | |
| ) | |
| max_month = gr.Dropdown( | |
| choices=month_options, | |
| value=month_range[-1], | |
| label="To Month", | |
| allow_custom_value=False | |
| ) | |
| with gr.Tabs(): | |
| with gr.Tab("📊 Data Table"): | |
| with gr.Row(): | |
| model_filter = gr.Dropdown( | |
| choices=get_unique_values(data, "model"), | |
| value="All", | |
| label="Model" | |
| ) | |
| trainer_filter = gr.Dropdown( | |
| choices=get_unique_values(data, "trainer"), | |
| value="All", | |
| label="Trainer" | |
| ) | |
| peft_filter = gr.Dropdown( | |
| choices=get_unique_values(data, "peft_method"), | |
| value="All", | |
| label="PEFT Method" | |
| ) | |
| training_filter = gr.Dropdown( | |
| choices=get_unique_values(data, "training_type"), | |
| value="All", | |
| label="Training Type" | |
| ) | |
| with gr.Row(): | |
| min_score = gr.Slider(0, 10, value=0, step=1, label="Min Experience Score") | |
| max_score = gr.Slider(0, 10, value=10, step=1, label="Max Experience Score") | |
| table = gr.DataFrame( | |
| value=df, | |
| headers=["Issue #", "Title", "State", "Date", "Model", "Trainer", "PEFT Method", | |
| "Training Type", "Experience", "Specialties", "URL"], | |
| interactive=False, | |
| wrap=True | |
| ) | |
| def update_table(m, t, p, tr, min_s, max_s, min_m, max_m): | |
| filtered = filter_data(df.copy(), m, t, p, tr, min_s, max_s, min_m, max_m) | |
| return filtered | |
| all_filters = [model_filter, trainer_filter, peft_filter, training_filter, | |
| min_score, max_score, min_month, max_month] | |
| for component in all_filters: | |
| component.change( | |
| fn=update_table, | |
| inputs=all_filters, | |
| outputs=table | |
| ) | |
| with gr.Tab("🔎 Issue Details"): | |
| issue_number = gr.Number(label="Issue Number", value=data[0]["number"], precision=0) | |
| details = gr.Markdown() | |
| def update_details(num): | |
| return show_issue_details(int(num), data) | |
| issue_number.change(fn=update_details, inputs=issue_number, outputs=details) | |
| details.value = show_issue_details(data[0]["number"], data) | |
| with gr.Tab("📈 Analytics"): | |
| with gr.Row(): | |
| temporal_chart = gr.Plot(value=create_temporal_chart(data)) | |
| with gr.Row(): | |
| peft_chart = gr.Plot(value=create_peft_method_chart(data)) | |
| trainer_chart = gr.Plot(value=create_trainer_chart(data)) | |
| with gr.Row(): | |
| training_chart = gr.Plot(value=create_training_type_chart(data)) | |
| experience_chart = gr.Plot(value=create_experience_chart(data)) | |
| with gr.Row(): | |
| exp_method_chart = gr.Plot(value=create_experience_by_method_chart(data)) | |
| specialties_chart = gr.Plot(value=create_specialties_chart(data)) | |
| with gr.Row(): | |
| model_chart = gr.Plot(value=create_model_chart(data)) | |
| def update_charts(min_m, max_m): | |
| filtered_data = filter_data_by_months(data, min_m, max_m) | |
| return ( | |
| create_temporal_chart(filtered_data), | |
| create_peft_method_chart(filtered_data), | |
| create_trainer_chart(filtered_data), | |
| create_training_type_chart(filtered_data), | |
| create_experience_chart(filtered_data), | |
| create_experience_by_method_chart(filtered_data), | |
| create_specialties_chart(filtered_data), | |
| create_model_chart(filtered_data) | |
| ) | |
| for component in [min_month, max_month]: | |
| component.change( | |
| fn=update_charts, | |
| inputs=[min_month, max_month], | |
| outputs=[ | |
| temporal_chart, peft_chart, trainer_chart, | |
| training_chart, experience_chart, | |
| exp_method_chart, specialties_chart, model_chart | |
| ] | |
| ) | |
| with gr.Tab("ℹ️ About"): | |
| gr.Markdown(f""" | |
| ## About This Dashboard | |
| This dashboard analyzes 345 recent issues from the [huggingface/peft](https://github.com/huggingface/peft) repository. | |
| **Time Range**: {month_range[0]} to {month_range[1]} | |
| **Total Issues**: {len(data)} | |
| ### Data Collection Method | |
| **LLM Classification** (current view): | |
| - All 345 issues classified by a language model reading the full title + body | |
| - More accurate than regex-based extraction, especially for nuanced classifications | |
| - Experience scores and training types are LLM-inferred from context | |
| **Validation Process**: | |
| 1. **Static analysis** (rule-based): Extracted via regex patterns | |
| 2. **LLM classification**: Language model read all 345 issues in 4 chunks | |
| 3. **Comparison**: Identified systematic biases in the static analyzer | |
| 4. **Merged results**: This dashboard uses the LLM classification (more accurate) | |
| ### Why LLM Classification? | |
| LLM outperforms static analysis on nuanced tasks: | |
| - **Experience score**: LLM understands issue quality, tone, and depth (44.3% agreement with static) | |
| - **Training type**: LLM distinguishes actual training from feature requests (61.2% agreement) | |
| - **PEFT method**: LLM detects context (73.9% agreement) | |
| ### Metrics Explained | |
| **Experience Score (0-10)**: | |
| - **Code reproduction**: +2 if runnable code snippet or clear numbered steps | |
| - **Error details**: +2 if actual traceback or error block | |
| - **Environment info**: +2 if actual version numbers or system info table | |
| - **Clarity**: +2 if clear title (4+ words) and substantial body (50+ words) | |
| - **Technical depth**: +2 if 2+ technical terms used in proper context | |
| **Confidence Levels**: | |
| - **High**: Strong evidence in the issue text | |
| - **Medium**: Some evidence or inference | |
| - **Low**: Limited or no evidence | |
| ### Known Limitations | |
| - Model detection: Many PEFT issues are framework-level bugs without model mentions | |
| - Trainer detection: Most users don't specify their training framework | |
| - Training type: "unsure" means the issue lacks clear training context (often infrastructure/bug reports) | |
| - LLM may occasionally hallucinate or misread complex technical details | |
| ### Data Sources | |
| - Issues fetched via GitHub API on 2026-05-12 (345 issues, most recently updated) | |
| - LLM classification performed on all 345 issues in 4 batches | |
| - Raw data preserved for transparency and re-analysis | |
| """) | |
| return app | |
| if __name__ == "__main__": | |
| app = build_app() | |
| app.launch(share=False, server_name="0.0.0.0", server_port=7860, theme=gr.themes.Soft()) | |