Upload 2 files
Browse files- app.py +156 -0
- requirements.txt +4 -0
app.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
\
|
| 2 |
+
import os
|
| 3 |
+
import io
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import numpy as np
|
| 6 |
+
import gradio as gr
|
| 7 |
+
|
| 8 |
+
# Matplotlib only (no seaborn), single chart per plot, no explicit colors
|
| 9 |
+
import matplotlib
|
| 10 |
+
matplotlib.use("Agg")
|
| 11 |
+
import matplotlib.pyplot as plt
|
| 12 |
+
|
| 13 |
+
DEFAULT_CSV = "Trend_of_Top_10_Hard_Skills.csv" # Place at repo root
|
| 14 |
+
|
| 15 |
+
def load_dataframe(file):
|
| 16 |
+
"""Load CSV from uploaded file or the default CSV on repo root"""
|
| 17 |
+
if file is not None:
|
| 18 |
+
return pd.read_csv(file.name if hasattr(file, "name") else file)
|
| 19 |
+
if os.path.exists(DEFAULT_CSV):
|
| 20 |
+
return pd.read_csv(DEFAULT_CSV)
|
| 21 |
+
raise gr.Error("No CSV provided and default file not found. Please upload a CSV.")
|
| 22 |
+
|
| 23 |
+
def prepare_dataframe(df):
|
| 24 |
+
"""
|
| 25 |
+
Expect columns: YearMonth, <skill1>, <skill2>, ...
|
| 26 |
+
YearMonth as YYYY-MM, convert to datetime for sorting, back to string for ticks.
|
| 27 |
+
"""
|
| 28 |
+
if "YearMonth" not in df.columns:
|
| 29 |
+
raise gr.Error("CSV must have a 'YearMonth' column.")
|
| 30 |
+
# Parse YearMonth to datetime (coerce errors to NaT)
|
| 31 |
+
dt = pd.to_datetime(df["YearMonth"], format="%Y-%m", errors="coerce")
|
| 32 |
+
# If parsing fails, try general parse
|
| 33 |
+
if dt.isna().any():
|
| 34 |
+
dt = pd.to_datetime(df["YearMonth"], errors="coerce")
|
| 35 |
+
df = df.copy()
|
| 36 |
+
df["_dt"] = dt
|
| 37 |
+
df = df.sort_values("_dt")
|
| 38 |
+
# Coerce numeric columns
|
| 39 |
+
for c in df.columns:
|
| 40 |
+
if c not in ["YearMonth", "_dt"]:
|
| 41 |
+
df[c] = pd.to_numeric(df[c], errors="coerce")
|
| 42 |
+
return df
|
| 43 |
+
|
| 44 |
+
def list_skill_columns(df):
|
| 45 |
+
return [c for c in df.columns if c not in ["YearMonth", "_dt"]]
|
| 46 |
+
|
| 47 |
+
def apply_smoothing(series, window):
|
| 48 |
+
if window and window > 1:
|
| 49 |
+
return series.rolling(window=window, min_periods=1, center=False).mean()
|
| 50 |
+
return series
|
| 51 |
+
|
| 52 |
+
def normalize_series(series, mode):
|
| 53 |
+
if mode == "none":
|
| 54 |
+
return series
|
| 55 |
+
s = series.copy()
|
| 56 |
+
if mode == "min-max (per skill)":
|
| 57 |
+
mn, mx = s.min(), s.max()
|
| 58 |
+
if mx > mn:
|
| 59 |
+
return (s - mn) / (mx - mn)
|
| 60 |
+
else:
|
| 61 |
+
return s.fillna(0.0)
|
| 62 |
+
if mode == "z-score (per skill)":
|
| 63 |
+
mu, sd = s.mean(), s.std(ddof=0)
|
| 64 |
+
if sd > 0:
|
| 65 |
+
return (s - mu) / sd
|
| 66 |
+
else:
|
| 67 |
+
return s.fillna(0.0)
|
| 68 |
+
return series
|
| 69 |
+
|
| 70 |
+
def plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label):
|
| 71 |
+
if not selected_skills:
|
| 72 |
+
raise gr.Error("Please select at least one skill.")
|
| 73 |
+
|
| 74 |
+
# Prepare x
|
| 75 |
+
x_dt = df["_dt"]
|
| 76 |
+
x_labels = df["YearMonth"].astype(str).tolist()
|
| 77 |
+
|
| 78 |
+
# Create single chart
|
| 79 |
+
fig = plt.figure(figsize=(10, 5.5))
|
| 80 |
+
ax = fig.add_subplot(111)
|
| 81 |
+
|
| 82 |
+
for skill in selected_skills:
|
| 83 |
+
if skill not in df.columns:
|
| 84 |
+
continue
|
| 85 |
+
y = df[skill]
|
| 86 |
+
y = apply_smoothing(y, smoothing_window)
|
| 87 |
+
y = normalize_series(y, normalize_mode)
|
| 88 |
+
if show_markers:
|
| 89 |
+
ax.plot(x_dt, y, marker="o", label=skill)
|
| 90 |
+
else:
|
| 91 |
+
ax.plot(x_dt, y, label=skill)
|
| 92 |
+
|
| 93 |
+
ax.set_xlabel("Year-Month")
|
| 94 |
+
ax.set_ylabel(y_label if y_label.strip() else ("Normalized value" if normalize_mode!="none" else "Frequency"))
|
| 95 |
+
ax.set_title("Trend of Selected Hard Skills")
|
| 96 |
+
ax.legend(loc="best")
|
| 97 |
+
ax.grid(True, which="both", axis="both", alpha=0.35)
|
| 98 |
+
|
| 99 |
+
# Format x ticks with Month labels
|
| 100 |
+
ax.set_xticks(x_dt)
|
| 101 |
+
ax.set_xticklabels(x_labels, rotation=45, ha="right")
|
| 102 |
+
|
| 103 |
+
fig.tight_layout()
|
| 104 |
+
import io as _io
|
| 105 |
+
buf = _io.BytesIO()
|
| 106 |
+
fig.savefig(buf, format="png", dpi=160, bbox_inches="tight")
|
| 107 |
+
plt.close(fig)
|
| 108 |
+
buf.seek(0)
|
| 109 |
+
return buf # return BytesIO; Gradio Image can accept bytes-like
|
| 110 |
+
|
| 111 |
+
def run(
|
| 112 |
+
csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label
|
| 113 |
+
):
|
| 114 |
+
df = load_dataframe(csv_file)
|
| 115 |
+
df = prepare_dataframe(df)
|
| 116 |
+
available = list_skill_columns(df)
|
| 117 |
+
|
| 118 |
+
# Auto-select if user didn't pick yet
|
| 119 |
+
if not selected_skills:
|
| 120 |
+
selected_skills = available
|
| 121 |
+
|
| 122 |
+
# Keep only existing skills
|
| 123 |
+
selected_skills = [s for s in selected_skills if s in available]
|
| 124 |
+
|
| 125 |
+
img_buf = plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label)
|
| 126 |
+
# Also return a preview table for selected columns
|
| 127 |
+
preview_cols = ["YearMonth"] + selected_skills
|
| 128 |
+
preview = df[preview_cols].reset_index(drop=True)
|
| 129 |
+
return img_buf, gr.update(choices=available, value=selected_skills), preview
|
| 130 |
+
|
| 131 |
+
with gr.Blocks(title="Hard Skills Trend Line Chart") as demo:
|
| 132 |
+
gr.Markdown("# Hard Skills Trend — Line Chart\nUpload a CSV or place **Trend_of_Top_10_Hard_Skills.csv** in the repo root.")
|
| 133 |
+
|
| 134 |
+
with gr.Row():
|
| 135 |
+
with gr.Column(scale=1):
|
| 136 |
+
csv_file = gr.File(label="Upload CSV (optional)", file_count="single", file_types=[".csv"])
|
| 137 |
+
selected_skills = gr.CheckboxGroup(choices=[], label="Select skills to plot")
|
| 138 |
+
smoothing_window = gr.Slider(1, 6, value=1, step=1, label="Smoothing (moving average window in months)")
|
| 139 |
+
normalize_mode = gr.Dropdown(choices=["none", "min-max (per skill)", "z-score (per skill)"], value="none", label="Normalize")
|
| 140 |
+
show_markers = gr.Checkbox(value=True, label="Show markers on lines")
|
| 141 |
+
y_label = gr.Textbox(value="", label="Y-axis label (optional)")
|
| 142 |
+
btn = gr.Button("Plot", variant="primary")
|
| 143 |
+
|
| 144 |
+
with gr.Column(scale=1):
|
| 145 |
+
out_img = gr.Image(label="Line Chart")
|
| 146 |
+
out_table = gr.Dataframe(label="Data preview")
|
| 147 |
+
|
| 148 |
+
# On click, return image, refresh skill choices, and table
|
| 149 |
+
btn.click(
|
| 150 |
+
fn=run,
|
| 151 |
+
inputs=[csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label],
|
| 152 |
+
outputs=[out_img, selected_skills, out_table]
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
if __name__ == "__main__":
|
| 156 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.26.0
|
| 2 |
+
pandas>=2.0.0
|
| 3 |
+
matplotlib>=3.8
|
| 4 |
+
numpy
|