File size: 5,247 Bytes
3607f17 e70aeee 3607f17 e70aeee 3607f17 e70aeee 3607f17 e70aeee 3607f17 e70aeee 3607f17 e70aeee 3607f17 e70aeee 3607f17 e70aeee 3607f17 e70aeee 3607f17 e70aeee 3607f17 e70aeee 3607f17 e70aeee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
\
import os
import io
import pandas as pd
import numpy as np
import gradio as gr
# Matplotlib only (no seaborn), single chart per plot, no explicit colors
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from PIL import Image # <-- add PIL for returning PIL.Image
DEFAULT_CSV = "Trend_of_Top_10_Hard_Skills.csv" # Place at repo root
def load_dataframe(file):
"""Load CSV from uploaded file or the default CSV on repo root"""
if file is not None:
return pd.read_csv(file.name if hasattr(file, "name") else file)
if os.path.exists(DEFAULT_CSV):
return pd.read_csv(DEFAULT_CSV)
raise gr.Error("No CSV provided and default file not found. Please upload a CSV.")
def prepare_dataframe(df):
"""
Expect columns: YearMonth, <skill1>, <skill2>, ...
YearMonth as YYYY-MM, convert to datetime for sorting, back to string for ticks.
"""
if "YearMonth" not in df.columns:
raise gr.Error("CSV must have a 'YearMonth' column.")
dt = pd.to_datetime(df["YearMonth"], format="%Y-%m", errors="coerce")
if dt.isna().any():
dt = pd.to_datetime(df["YearMonth"], errors="coerce")
df = df.copy()
df["_dt"] = dt
df = df.sort_values("_dt")
for c in df.columns:
if c not in ["YearMonth", "_dt"]:
df[c] = pd.to_numeric(df[c], errors="coerce")
return df
def list_skill_columns(df):
return [c for c in df.columns if c not in ["YearMonth", "_dt"]]
def apply_smoothing(series, window):
if window and window > 1:
return series.rolling(window=window, min_periods=1).mean()
return series
def normalize_series(series, mode):
if mode == "none":
return series
s = series.copy()
if mode == "min-max (per skill)":
mn, mx = s.min(), s.max()
return (s - mn) / (mx - mn) if mx > mn else s.fillna(0.0)
if mode == "z-score (per skill)":
mu, sd = s.mean(), s.std(ddof=0)
return (s - mu) / sd if sd > 0 else s.fillna(0.0)
return series
def plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label):
if not selected_skills:
raise gr.Error("Please select at least one skill.")
x_dt = df["_dt"]
x_labels = df["YearMonth"].astype(str).tolist()
fig = plt.figure(figsize=(10, 5.5))
ax = fig.add_subplot(111)
for skill in selected_skills:
if skill not in df.columns:
continue
y = df[skill]
y = apply_smoothing(y, smoothing_window)
y = normalize_series(y, normalize_mode)
if show_markers:
ax.plot(x_dt, y, marker="o", label=skill)
else:
ax.plot(x_dt, y, label=skill)
ax.set_xlabel("Year-Month")
ax.set_ylabel(y_label if y_label.strip() else ("Normalized value" if normalize_mode!="none" else "Frequency"))
ax.set_title("Trend of Selected Hard Skills")
ax.legend(loc="best")
ax.grid(True, which="both", axis="both", alpha=0.35)
ax.set_xticks(x_dt)
ax.set_xticklabels(x_labels, rotation=45, ha="right")
fig.tight_layout()
buf = io.BytesIO()
fig.savefig(buf, format="png", dpi=160, bbox_inches="tight")
plt.close(fig)
buf.seek(0)
# Return PIL Image instead of BytesIO to satisfy gr.Image postprocess
return Image.open(buf)
def run(csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label):
df = load_dataframe(csv_file)
df = prepare_dataframe(df)
available = list_skill_columns(df)
if not selected_skills:
selected_skills = available
selected_skills = [s for s in selected_skills if s in available]
img = plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label)
preview_cols = ["YearMonth"] + selected_skills
preview = df[preview_cols].reset_index(drop=True)
return img, gr.update(choices=available, value=selected_skills), preview
with gr.Blocks(title="Hard Skills Trend Line Chart") as demo:
gr.Markdown("# Hard Skills Trend — Line Chart\\nUpload a CSV or place **Trend_of_Top_10_Hard_Skills.csv** in the repo root.")
with gr.Row():
with gr.Column(scale=1):
csv_file = gr.File(label="Upload CSV (optional)", file_count="single", file_types=[".csv"])
selected_skills = gr.CheckboxGroup(choices=[], label="Select skills to plot")
smoothing_window = gr.Slider(1, 6, value=1, step=1, label="Smoothing (moving average window in months)")
normalize_mode = gr.Dropdown(choices=["none", "min-max (per skill)", "z-score (per skill)"], value="none", label="Normalize")
show_markers = gr.Checkbox(value=True, label="Show markers on lines")
y_label = gr.Textbox(value="", label="Y-axis label (optional)")
btn = gr.Button("Plot", variant="primary")
with gr.Column(scale=1):
out_img = gr.Image(label="Line Chart", type="pil") # explicitly set type=pil
out_table = gr.Dataframe(label="Data preview")
btn.click(
fn=run,
inputs=[csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label],
outputs=[out_img, selected_skills, out_table]
)
if __name__ == "__main__":
demo.launch()
|