Spaces:

Nucha
/

LineChart

Sleeping

App Files Files Community

LineChart / app.py

Nucha

Upload 2 files

3607f17 verified 5 months ago

raw

history blame contribute delete

5.25 kB

	\
	import os
	import io
	import pandas as pd
	import numpy as np
	import gradio as gr

	# Matplotlib only (no seaborn), single chart per plot, no explicit colors
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	from PIL import Image # <-- add PIL for returning PIL.Image

	DEFAULT_CSV = "Trend_of_Top_10_Hard_Skills.csv" # Place at repo root

	def load_dataframe(file):
	"""Load CSV from uploaded file or the default CSV on repo root"""
	if file is not None:
	return pd.read_csv(file.name if hasattr(file, "name") else file)
	if os.path.exists(DEFAULT_CSV):
	return pd.read_csv(DEFAULT_CSV)
	raise gr.Error("No CSV provided and default file not found. Please upload a CSV.")

	def prepare_dataframe(df):
	"""
	Expect columns: YearMonth, <skill1>, <skill2>, ...
	YearMonth as YYYY-MM, convert to datetime for sorting, back to string for ticks.
	"""
	if "YearMonth" not in df.columns:
	raise gr.Error("CSV must have a 'YearMonth' column.")
	dt = pd.to_datetime(df["YearMonth"], format="%Y-%m", errors="coerce")
	if dt.isna().any():
	dt = pd.to_datetime(df["YearMonth"], errors="coerce")
	df = df.copy()
	df["_dt"] = dt
	df = df.sort_values("_dt")
	for c in df.columns:
	if c not in ["YearMonth", "_dt"]:
	df[c] = pd.to_numeric(df[c], errors="coerce")
	return df

	def list_skill_columns(df):
	return [c for c in df.columns if c not in ["YearMonth", "_dt"]]

	def apply_smoothing(series, window):
	if window and window > 1:
	return series.rolling(window=window, min_periods=1).mean()
	return series

	def normalize_series(series, mode):
	if mode == "none":
	return series
	s = series.copy()
	if mode == "min-max (per skill)":
	mn, mx = s.min(), s.max()
	return (s - mn) / (mx - mn) if mx > mn else s.fillna(0.0)
	if mode == "z-score (per skill)":
	mu, sd = s.mean(), s.std(ddof=0)
	return (s - mu) / sd if sd > 0 else s.fillna(0.0)
	return series

	def plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label):
	if not selected_skills:
	raise gr.Error("Please select at least one skill.")

	x_dt = df["_dt"]
	x_labels = df["YearMonth"].astype(str).tolist()

	fig = plt.figure(figsize=(10, 5.5))
	ax = fig.add_subplot(111)

	for skill in selected_skills:
	if skill not in df.columns:
	continue
	y = df[skill]
	y = apply_smoothing(y, smoothing_window)
	y = normalize_series(y, normalize_mode)
	if show_markers:
	ax.plot(x_dt, y, marker="o", label=skill)
	else:
	ax.plot(x_dt, y, label=skill)

	ax.set_xlabel("Year-Month")
	ax.set_ylabel(y_label if y_label.strip() else ("Normalized value" if normalize_mode!="none" else "Frequency"))
	ax.set_title("Trend of Selected Hard Skills")
	ax.legend(loc="best")
	ax.grid(True, which="both", axis="both", alpha=0.35)

	ax.set_xticks(x_dt)
	ax.set_xticklabels(x_labels, rotation=45, ha="right")

	fig.tight_layout()
	buf = io.BytesIO()
	fig.savefig(buf, format="png", dpi=160, bbox_inches="tight")
	plt.close(fig)
	buf.seek(0)
	# Return PIL Image instead of BytesIO to satisfy gr.Image postprocess
	return Image.open(buf)

	def run(csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label):
	df = load_dataframe(csv_file)
	df = prepare_dataframe(df)
	available = list_skill_columns(df)
	if not selected_skills:
	selected_skills = available
	selected_skills = [s for s in selected_skills if s in available]
	img = plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label)
	preview_cols = ["YearMonth"] + selected_skills
	preview = df[preview_cols].reset_index(drop=True)
	return img, gr.update(choices=available, value=selected_skills), preview

	with gr.Blocks(title="Hard Skills Trend Line Chart") as demo:
	gr.Markdown("# Hard Skills Trend — Line Chart\\nUpload a CSV or place Trend_of_Top_10_Hard_Skills.csv in the repo root.")

	with gr.Row():
	with gr.Column(scale=1):
	csv_file = gr.File(label="Upload CSV (optional)", file_count="single", file_types=[".csv"])
	selected_skills = gr.CheckboxGroup(choices=[], label="Select skills to plot")
	smoothing_window = gr.Slider(1, 6, value=1, step=1, label="Smoothing (moving average window in months)")
	normalize_mode = gr.Dropdown(choices=["none", "min-max (per skill)", "z-score (per skill)"], value="none", label="Normalize")
	show_markers = gr.Checkbox(value=True, label="Show markers on lines")
	y_label = gr.Textbox(value="", label="Y-axis label (optional)")
	btn = gr.Button("Plot", variant="primary")

	with gr.Column(scale=1):
	out_img = gr.Image(label="Line Chart", type="pil") # explicitly set type=pil
	out_table = gr.Dataframe(label="Data preview")

	btn.click(
	fn=run,
	inputs=[csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label],
	outputs=[out_img, selected_skills, out_table]
	)

	if __name__ == "__main__":
	demo.launch()