LineChart / app.py
Nucha's picture
Upload 2 files
3607f17 verified
\
import os
import io
import pandas as pd
import numpy as np
import gradio as gr
# Matplotlib only (no seaborn), single chart per plot, no explicit colors
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from PIL import Image # <-- add PIL for returning PIL.Image
DEFAULT_CSV = "Trend_of_Top_10_Hard_Skills.csv" # Place at repo root
def load_dataframe(file):
"""Load CSV from uploaded file or the default CSV on repo root"""
if file is not None:
return pd.read_csv(file.name if hasattr(file, "name") else file)
if os.path.exists(DEFAULT_CSV):
return pd.read_csv(DEFAULT_CSV)
raise gr.Error("No CSV provided and default file not found. Please upload a CSV.")
def prepare_dataframe(df):
"""
Expect columns: YearMonth, <skill1>, <skill2>, ...
YearMonth as YYYY-MM, convert to datetime for sorting, back to string for ticks.
"""
if "YearMonth" not in df.columns:
raise gr.Error("CSV must have a 'YearMonth' column.")
dt = pd.to_datetime(df["YearMonth"], format="%Y-%m", errors="coerce")
if dt.isna().any():
dt = pd.to_datetime(df["YearMonth"], errors="coerce")
df = df.copy()
df["_dt"] = dt
df = df.sort_values("_dt")
for c in df.columns:
if c not in ["YearMonth", "_dt"]:
df[c] = pd.to_numeric(df[c], errors="coerce")
return df
def list_skill_columns(df):
return [c for c in df.columns if c not in ["YearMonth", "_dt"]]
def apply_smoothing(series, window):
if window and window > 1:
return series.rolling(window=window, min_periods=1).mean()
return series
def normalize_series(series, mode):
if mode == "none":
return series
s = series.copy()
if mode == "min-max (per skill)":
mn, mx = s.min(), s.max()
return (s - mn) / (mx - mn) if mx > mn else s.fillna(0.0)
if mode == "z-score (per skill)":
mu, sd = s.mean(), s.std(ddof=0)
return (s - mu) / sd if sd > 0 else s.fillna(0.0)
return series
def plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label):
if not selected_skills:
raise gr.Error("Please select at least one skill.")
x_dt = df["_dt"]
x_labels = df["YearMonth"].astype(str).tolist()
fig = plt.figure(figsize=(10, 5.5))
ax = fig.add_subplot(111)
for skill in selected_skills:
if skill not in df.columns:
continue
y = df[skill]
y = apply_smoothing(y, smoothing_window)
y = normalize_series(y, normalize_mode)
if show_markers:
ax.plot(x_dt, y, marker="o", label=skill)
else:
ax.plot(x_dt, y, label=skill)
ax.set_xlabel("Year-Month")
ax.set_ylabel(y_label if y_label.strip() else ("Normalized value" if normalize_mode!="none" else "Frequency"))
ax.set_title("Trend of Selected Hard Skills")
ax.legend(loc="best")
ax.grid(True, which="both", axis="both", alpha=0.35)
ax.set_xticks(x_dt)
ax.set_xticklabels(x_labels, rotation=45, ha="right")
fig.tight_layout()
buf = io.BytesIO()
fig.savefig(buf, format="png", dpi=160, bbox_inches="tight")
plt.close(fig)
buf.seek(0)
# Return PIL Image instead of BytesIO to satisfy gr.Image postprocess
return Image.open(buf)
def run(csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label):
df = load_dataframe(csv_file)
df = prepare_dataframe(df)
available = list_skill_columns(df)
if not selected_skills:
selected_skills = available
selected_skills = [s for s in selected_skills if s in available]
img = plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label)
preview_cols = ["YearMonth"] + selected_skills
preview = df[preview_cols].reset_index(drop=True)
return img, gr.update(choices=available, value=selected_skills), preview
with gr.Blocks(title="Hard Skills Trend Line Chart") as demo:
gr.Markdown("# Hard Skills Trend — Line Chart\\nUpload a CSV or place **Trend_of_Top_10_Hard_Skills.csv** in the repo root.")
with gr.Row():
with gr.Column(scale=1):
csv_file = gr.File(label="Upload CSV (optional)", file_count="single", file_types=[".csv"])
selected_skills = gr.CheckboxGroup(choices=[], label="Select skills to plot")
smoothing_window = gr.Slider(1, 6, value=1, step=1, label="Smoothing (moving average window in months)")
normalize_mode = gr.Dropdown(choices=["none", "min-max (per skill)", "z-score (per skill)"], value="none", label="Normalize")
show_markers = gr.Checkbox(value=True, label="Show markers on lines")
y_label = gr.Textbox(value="", label="Y-axis label (optional)")
btn = gr.Button("Plot", variant="primary")
with gr.Column(scale=1):
out_img = gr.Image(label="Line Chart", type="pil") # explicitly set type=pil
out_table = gr.Dataframe(label="Data preview")
btn.click(
fn=run,
inputs=[csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label],
outputs=[out_img, selected_skills, out_table]
)
if __name__ == "__main__":
demo.launch()