Nucha commited on
Commit
e70aeee
·
verified ·
1 Parent(s): c7a3425

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +156 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \
2
+ import os
3
+ import io
4
+ import pandas as pd
5
+ import numpy as np
6
+ import gradio as gr
7
+
8
+ # Matplotlib only (no seaborn), single chart per plot, no explicit colors
9
+ import matplotlib
10
+ matplotlib.use("Agg")
11
+ import matplotlib.pyplot as plt
12
+
13
+ DEFAULT_CSV = "Trend_of_Top_10_Hard_Skills.csv" # Place at repo root
14
+
15
+ def load_dataframe(file):
16
+ """Load CSV from uploaded file or the default CSV on repo root"""
17
+ if file is not None:
18
+ return pd.read_csv(file.name if hasattr(file, "name") else file)
19
+ if os.path.exists(DEFAULT_CSV):
20
+ return pd.read_csv(DEFAULT_CSV)
21
+ raise gr.Error("No CSV provided and default file not found. Please upload a CSV.")
22
+
23
+ def prepare_dataframe(df):
24
+ """
25
+ Expect columns: YearMonth, <skill1>, <skill2>, ...
26
+ YearMonth as YYYY-MM, convert to datetime for sorting, back to string for ticks.
27
+ """
28
+ if "YearMonth" not in df.columns:
29
+ raise gr.Error("CSV must have a 'YearMonth' column.")
30
+ # Parse YearMonth to datetime (coerce errors to NaT)
31
+ dt = pd.to_datetime(df["YearMonth"], format="%Y-%m", errors="coerce")
32
+ # If parsing fails, try general parse
33
+ if dt.isna().any():
34
+ dt = pd.to_datetime(df["YearMonth"], errors="coerce")
35
+ df = df.copy()
36
+ df["_dt"] = dt
37
+ df = df.sort_values("_dt")
38
+ # Coerce numeric columns
39
+ for c in df.columns:
40
+ if c not in ["YearMonth", "_dt"]:
41
+ df[c] = pd.to_numeric(df[c], errors="coerce")
42
+ return df
43
+
44
+ def list_skill_columns(df):
45
+ return [c for c in df.columns if c not in ["YearMonth", "_dt"]]
46
+
47
+ def apply_smoothing(series, window):
48
+ if window and window > 1:
49
+ return series.rolling(window=window, min_periods=1, center=False).mean()
50
+ return series
51
+
52
+ def normalize_series(series, mode):
53
+ if mode == "none":
54
+ return series
55
+ s = series.copy()
56
+ if mode == "min-max (per skill)":
57
+ mn, mx = s.min(), s.max()
58
+ if mx > mn:
59
+ return (s - mn) / (mx - mn)
60
+ else:
61
+ return s.fillna(0.0)
62
+ if mode == "z-score (per skill)":
63
+ mu, sd = s.mean(), s.std(ddof=0)
64
+ if sd > 0:
65
+ return (s - mu) / sd
66
+ else:
67
+ return s.fillna(0.0)
68
+ return series
69
+
70
+ def plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label):
71
+ if not selected_skills:
72
+ raise gr.Error("Please select at least one skill.")
73
+
74
+ # Prepare x
75
+ x_dt = df["_dt"]
76
+ x_labels = df["YearMonth"].astype(str).tolist()
77
+
78
+ # Create single chart
79
+ fig = plt.figure(figsize=(10, 5.5))
80
+ ax = fig.add_subplot(111)
81
+
82
+ for skill in selected_skills:
83
+ if skill not in df.columns:
84
+ continue
85
+ y = df[skill]
86
+ y = apply_smoothing(y, smoothing_window)
87
+ y = normalize_series(y, normalize_mode)
88
+ if show_markers:
89
+ ax.plot(x_dt, y, marker="o", label=skill)
90
+ else:
91
+ ax.plot(x_dt, y, label=skill)
92
+
93
+ ax.set_xlabel("Year-Month")
94
+ ax.set_ylabel(y_label if y_label.strip() else ("Normalized value" if normalize_mode!="none" else "Frequency"))
95
+ ax.set_title("Trend of Selected Hard Skills")
96
+ ax.legend(loc="best")
97
+ ax.grid(True, which="both", axis="both", alpha=0.35)
98
+
99
+ # Format x ticks with Month labels
100
+ ax.set_xticks(x_dt)
101
+ ax.set_xticklabels(x_labels, rotation=45, ha="right")
102
+
103
+ fig.tight_layout()
104
+ import io as _io
105
+ buf = _io.BytesIO()
106
+ fig.savefig(buf, format="png", dpi=160, bbox_inches="tight")
107
+ plt.close(fig)
108
+ buf.seek(0)
109
+ return buf # return BytesIO; Gradio Image can accept bytes-like
110
+
111
+ def run(
112
+ csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label
113
+ ):
114
+ df = load_dataframe(csv_file)
115
+ df = prepare_dataframe(df)
116
+ available = list_skill_columns(df)
117
+
118
+ # Auto-select if user didn't pick yet
119
+ if not selected_skills:
120
+ selected_skills = available
121
+
122
+ # Keep only existing skills
123
+ selected_skills = [s for s in selected_skills if s in available]
124
+
125
+ img_buf = plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label)
126
+ # Also return a preview table for selected columns
127
+ preview_cols = ["YearMonth"] + selected_skills
128
+ preview = df[preview_cols].reset_index(drop=True)
129
+ return img_buf, gr.update(choices=available, value=selected_skills), preview
130
+
131
+ with gr.Blocks(title="Hard Skills Trend Line Chart") as demo:
132
+ gr.Markdown("# Hard Skills Trend — Line Chart\nUpload a CSV or place **Trend_of_Top_10_Hard_Skills.csv** in the repo root.")
133
+
134
+ with gr.Row():
135
+ with gr.Column(scale=1):
136
+ csv_file = gr.File(label="Upload CSV (optional)", file_count="single", file_types=[".csv"])
137
+ selected_skills = gr.CheckboxGroup(choices=[], label="Select skills to plot")
138
+ smoothing_window = gr.Slider(1, 6, value=1, step=1, label="Smoothing (moving average window in months)")
139
+ normalize_mode = gr.Dropdown(choices=["none", "min-max (per skill)", "z-score (per skill)"], value="none", label="Normalize")
140
+ show_markers = gr.Checkbox(value=True, label="Show markers on lines")
141
+ y_label = gr.Textbox(value="", label="Y-axis label (optional)")
142
+ btn = gr.Button("Plot", variant="primary")
143
+
144
+ with gr.Column(scale=1):
145
+ out_img = gr.Image(label="Line Chart")
146
+ out_table = gr.Dataframe(label="Data preview")
147
+
148
+ # On click, return image, refresh skill choices, and table
149
+ btn.click(
150
+ fn=run,
151
+ inputs=[csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label],
152
+ outputs=[out_img, selected_skills, out_table]
153
+ )
154
+
155
+ if __name__ == "__main__":
156
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=4.26.0
2
+ pandas>=2.0.0
3
+ matplotlib>=3.8
4
+ numpy