|
|
\ |
|
|
import os |
|
|
import io |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import gradio as gr |
|
|
|
|
|
|
|
|
import matplotlib |
|
|
matplotlib.use("Agg") |
|
|
import matplotlib.pyplot as plt |
|
|
from PIL import Image |
|
|
|
|
|
DEFAULT_CSV = "Trend_of_Top_10_Hard_Skills.csv" |
|
|
|
|
|
def load_dataframe(file): |
|
|
"""Load CSV from uploaded file or the default CSV on repo root""" |
|
|
if file is not None: |
|
|
return pd.read_csv(file.name if hasattr(file, "name") else file) |
|
|
if os.path.exists(DEFAULT_CSV): |
|
|
return pd.read_csv(DEFAULT_CSV) |
|
|
raise gr.Error("No CSV provided and default file not found. Please upload a CSV.") |
|
|
|
|
|
def prepare_dataframe(df): |
|
|
""" |
|
|
Expect columns: YearMonth, <skill1>, <skill2>, ... |
|
|
YearMonth as YYYY-MM, convert to datetime for sorting, back to string for ticks. |
|
|
""" |
|
|
if "YearMonth" not in df.columns: |
|
|
raise gr.Error("CSV must have a 'YearMonth' column.") |
|
|
dt = pd.to_datetime(df["YearMonth"], format="%Y-%m", errors="coerce") |
|
|
if dt.isna().any(): |
|
|
dt = pd.to_datetime(df["YearMonth"], errors="coerce") |
|
|
df = df.copy() |
|
|
df["_dt"] = dt |
|
|
df = df.sort_values("_dt") |
|
|
for c in df.columns: |
|
|
if c not in ["YearMonth", "_dt"]: |
|
|
df[c] = pd.to_numeric(df[c], errors="coerce") |
|
|
return df |
|
|
|
|
|
def list_skill_columns(df): |
|
|
return [c for c in df.columns if c not in ["YearMonth", "_dt"]] |
|
|
|
|
|
def apply_smoothing(series, window): |
|
|
if window and window > 1: |
|
|
return series.rolling(window=window, min_periods=1).mean() |
|
|
return series |
|
|
|
|
|
def normalize_series(series, mode): |
|
|
if mode == "none": |
|
|
return series |
|
|
s = series.copy() |
|
|
if mode == "min-max (per skill)": |
|
|
mn, mx = s.min(), s.max() |
|
|
return (s - mn) / (mx - mn) if mx > mn else s.fillna(0.0) |
|
|
if mode == "z-score (per skill)": |
|
|
mu, sd = s.mean(), s.std(ddof=0) |
|
|
return (s - mu) / sd if sd > 0 else s.fillna(0.0) |
|
|
return series |
|
|
|
|
|
def plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label): |
|
|
if not selected_skills: |
|
|
raise gr.Error("Please select at least one skill.") |
|
|
|
|
|
x_dt = df["_dt"] |
|
|
x_labels = df["YearMonth"].astype(str).tolist() |
|
|
|
|
|
fig = plt.figure(figsize=(10, 5.5)) |
|
|
ax = fig.add_subplot(111) |
|
|
|
|
|
for skill in selected_skills: |
|
|
if skill not in df.columns: |
|
|
continue |
|
|
y = df[skill] |
|
|
y = apply_smoothing(y, smoothing_window) |
|
|
y = normalize_series(y, normalize_mode) |
|
|
if show_markers: |
|
|
ax.plot(x_dt, y, marker="o", label=skill) |
|
|
else: |
|
|
ax.plot(x_dt, y, label=skill) |
|
|
|
|
|
ax.set_xlabel("Year-Month") |
|
|
ax.set_ylabel(y_label if y_label.strip() else ("Normalized value" if normalize_mode!="none" else "Frequency")) |
|
|
ax.set_title("Trend of Selected Hard Skills") |
|
|
ax.legend(loc="best") |
|
|
ax.grid(True, which="both", axis="both", alpha=0.35) |
|
|
|
|
|
ax.set_xticks(x_dt) |
|
|
ax.set_xticklabels(x_labels, rotation=45, ha="right") |
|
|
|
|
|
fig.tight_layout() |
|
|
buf = io.BytesIO() |
|
|
fig.savefig(buf, format="png", dpi=160, bbox_inches="tight") |
|
|
plt.close(fig) |
|
|
buf.seek(0) |
|
|
|
|
|
return Image.open(buf) |
|
|
|
|
|
def run(csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label): |
|
|
df = load_dataframe(csv_file) |
|
|
df = prepare_dataframe(df) |
|
|
available = list_skill_columns(df) |
|
|
if not selected_skills: |
|
|
selected_skills = available |
|
|
selected_skills = [s for s in selected_skills if s in available] |
|
|
img = plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label) |
|
|
preview_cols = ["YearMonth"] + selected_skills |
|
|
preview = df[preview_cols].reset_index(drop=True) |
|
|
return img, gr.update(choices=available, value=selected_skills), preview |
|
|
|
|
|
with gr.Blocks(title="Hard Skills Trend Line Chart") as demo: |
|
|
gr.Markdown("# Hard Skills Trend — Line Chart\\nUpload a CSV or place **Trend_of_Top_10_Hard_Skills.csv** in the repo root.") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
csv_file = gr.File(label="Upload CSV (optional)", file_count="single", file_types=[".csv"]) |
|
|
selected_skills = gr.CheckboxGroup(choices=[], label="Select skills to plot") |
|
|
smoothing_window = gr.Slider(1, 6, value=1, step=1, label="Smoothing (moving average window in months)") |
|
|
normalize_mode = gr.Dropdown(choices=["none", "min-max (per skill)", "z-score (per skill)"], value="none", label="Normalize") |
|
|
show_markers = gr.Checkbox(value=True, label="Show markers on lines") |
|
|
y_label = gr.Textbox(value="", label="Y-axis label (optional)") |
|
|
btn = gr.Button("Plot", variant="primary") |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
out_img = gr.Image(label="Line Chart", type="pil") |
|
|
out_table = gr.Dataframe(label="Data preview") |
|
|
|
|
|
btn.click( |
|
|
fn=run, |
|
|
inputs=[csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label], |
|
|
outputs=[out_img, selected_skills, out_table] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|