SanthiSastra commited on
Commit
ea148ac
·
verified ·
1 Parent(s): 6e976d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +362 -0
app.py CHANGED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py — Student Mark Analysis (HF Spaces + Gradio)
2
+ # Upload a CSV with columns: RegNo, Name, Subject1, Subject2, ...
3
+ # Outputs: per-student totals/average/rank/remark, top lists, subject stats, charts, and downloadable CSV.
4
+
5
+ import io
6
+ import numpy as np
7
+ import pandas as pd
8
+ import matplotlib.pyplot as plt
9
+ import gradio as gr
10
+
11
+
12
+ def _safe_numeric(df: pd.DataFrame, cols):
13
+ """Convert columns to numeric; invalid -> NaN."""
14
+ out = df.copy()
15
+ for c in cols:
16
+ out[c] = pd.to_numeric(out[c], errors="coerce")
17
+ return out
18
+
19
+
20
+ def _compute_analysis(
21
+ df: pd.DataFrame,
22
+ pass_mark: int,
23
+ top2_pct: float,
24
+ top10_pct: float,
25
+ sort_order: str,
26
+ regno_search: str,
27
+ name_search: str,
28
+ fail_filter: str,
29
+ selected_subject: str,
30
+ topk_subject: int,
31
+ ):
32
+ if df is None or df.empty:
33
+ raise gr.Error("Uploaded file is empty. Please upload a valid CSV.")
34
+
35
+ # Basic required columns
36
+ required = {"RegNo", "Name"}
37
+ if not required.issubset(set(df.columns)):
38
+ raise gr.Error("CSV must contain at least these columns: RegNo, Name")
39
+
40
+ # Detect subject columns: everything except RegNo/Name
41
+ base_cols = ["RegNo", "Name"]
42
+ subject_cols = [c for c in df.columns if c not in base_cols]
43
+
44
+ if len(subject_cols) == 0:
45
+ raise gr.Error("No subject columns found. Add subject mark columns after RegNo and Name.")
46
+
47
+ # Numeric conversion for subject marks
48
+ df2 = df.copy()
49
+ df2["RegNo"] = df2["RegNo"].astype(str).str.strip()
50
+ df2["Name"] = df2["Name"].astype(str).str.strip()
51
+ df2 = _safe_numeric(df2, subject_cols)
52
+
53
+ # If any subject column is entirely NaN -> likely wrong format
54
+ all_nan_cols = [c for c in subject_cols if df2[c].isna().all()]
55
+ if all_nan_cols:
56
+ raise gr.Error(f"These subject columns have no valid numeric marks: {all_nan_cols}")
57
+
58
+ # Per-student metrics
59
+ df2["Total"] = df2[subject_cols].sum(axis=1, skipna=False)
60
+ df2["Average"] = df2[subject_cols].mean(axis=1, skipna=False)
61
+
62
+ # Fail count (arrears)
63
+ df2["Fail_Count"] = (df2[subject_cols] < pass_mark).sum(axis=1)
64
+
65
+ # Remark
66
+ def remark(row):
67
+ fc = int(row["Fail_Count"])
68
+ if np.isnan(row["Total"]) or np.isnan(row["Average"]):
69
+ return "Invalid Marks"
70
+ if fc == 0:
71
+ return "Pass"
72
+ return f"Arrear: {fc}"
73
+
74
+ df2["Remark"] = df2.apply(remark, axis=1)
75
+
76
+ # Rank (only for valid totals)
77
+ valid_mask = df2["Total"].notna()
78
+ # Higher total => better rank (1 is best)
79
+ df2.loc[valid_mask, "Rank"] = df2.loc[valid_mask, "Total"].rank(ascending=False, method="min").astype(int)
80
+ df2.loc[~valid_mask, "Rank"] = np.nan
81
+
82
+ # Class stats
83
+ n_students = len(df2)
84
+ pass_count = int((df2["Fail_Count"] == 0).sum())
85
+ fail_any_count = int((df2["Fail_Count"] > 0).sum())
86
+
87
+ # Top lists (based on Total)
88
+ df_ranked = df2[valid_mask].sort_values("Total", ascending=False).copy()
89
+ n_valid = len(df_ranked)
90
+
91
+ def top_n_by_pct(pct: float) -> int:
92
+ # Always at least 1 if data exists
93
+ if n_valid == 0:
94
+ return 0
95
+ return max(1, int(np.ceil((pct / 100.0) * n_valid)))
96
+
97
+ top2_n = top_n_by_pct(top2_pct)
98
+ top10_n = top_n_by_pct(top10_pct)
99
+
100
+ top2_df = df_ranked.head(top2_n)[["RegNo", "Name", "Total", "Average", "Rank", "Fail_Count", "Remark"]]
101
+ top10_df = df_ranked.head(top10_n)[["RegNo", "Name", "Total", "Average", "Rank", "Fail_Count", "Remark"]]
102
+
103
+ # Subject averages
104
+ subject_avg = df2[subject_cols].mean(axis=0, skipna=True).sort_values(ascending=False)
105
+ least_subject = subject_avg.idxmin()
106
+ least_subject_avg = float(subject_avg.min())
107
+
108
+ # Top-K per selected subject
109
+ if selected_subject not in subject_cols:
110
+ selected_subject = subject_cols[0]
111
+
112
+ topk_sub_df = (
113
+ df2[["RegNo", "Name", selected_subject]]
114
+ .dropna(subset=[selected_subject])
115
+ .sort_values(selected_subject, ascending=False)
116
+ .head(int(topk_subject))
117
+ .rename(columns={selected_subject: "Mark"})
118
+ )
119
+
120
+ # Filtering
121
+ filtered = df2.copy()
122
+
123
+ if regno_search.strip():
124
+ key = regno_search.strip()
125
+ filtered = filtered[filtered["RegNo"].str.contains(key, case=False, na=False)]
126
+
127
+ if name_search.strip():
128
+ key = name_search.strip()
129
+ filtered = filtered[filtered["Name"].str.contains(key, case=False, na=False)]
130
+
131
+ if fail_filter != "All":
132
+ if fail_filter == "Pass only (Fail_Count = 0)":
133
+ filtered = filtered[filtered["Fail_Count"] == 0]
134
+ elif fail_filter == "Arrear only (Fail_Count >= 1)":
135
+ filtered = filtered[filtered["Fail_Count"] >= 1]
136
+ else:
137
+ # "Fail_Count = k"
138
+ try:
139
+ k = int(fail_filter.split("=")[-1].strip())
140
+ filtered = filtered[filtered["Fail_Count"] == k]
141
+ except Exception:
142
+ pass
143
+
144
+ # Sorting
145
+ if sort_order == "Rank (Best first)":
146
+ filtered = filtered.sort_values(["Rank", "Total"], ascending=[True, False], na_position="last")
147
+ elif sort_order == "Total (High to Low)":
148
+ filtered = filtered.sort_values("Total", ascending=False, na_position="last")
149
+ elif sort_order == "Total (Low to High)":
150
+ filtered = filtered.sort_values("Total", ascending=True, na_position="last")
151
+ elif sort_order == "Name (A to Z)":
152
+ filtered = filtered.sort_values("Name", ascending=True, na_position="last")
153
+
154
+ # Output table columns
155
+ out_cols = ["RegNo", "Name"] + subject_cols + ["Total", "Average", "Rank", "Fail_Count", "Remark"]
156
+ filtered_out = filtered[out_cols].copy()
157
+
158
+ # Summary text
159
+ summary_lines = [
160
+ f"Students: {n_students} (Valid totals: {n_valid})",
161
+ f"Pass (Fail_Count=0): {pass_count}",
162
+ f"Arrear (Fail_Count>=1): {fail_any_count}",
163
+ f"Pass %: {((pass_count / n_students) * 100.0):.2f}%" if n_students else "Pass %: N/A",
164
+ f"Top {top2_pct:.1f}% count: {top2_n}",
165
+ f"Top {top10_pct:.1f}% count: {top10_n}",
166
+ f"Least average subject: {least_subject} (Avg = {least_subject_avg:.2f})",
167
+ ]
168
+ summary = "\n".join(summary_lines)
169
+
170
+ # Charts
171
+ # 1) Subject average bar chart
172
+ fig1 = plt.figure()
173
+ plt.bar(subject_avg.index.astype(str), subject_avg.values)
174
+ plt.xticks(rotation=45, ha="right")
175
+ plt.ylabel("Average Mark")
176
+ plt.title("Subject-wise Average")
177
+ plt.tight_layout()
178
+
179
+ # 2) Fail count distribution
180
+ fig2 = plt.figure()
181
+ vc = df2["Fail_Count"].value_counts().sort_index()
182
+ plt.bar(vc.index.astype(str), vc.values)
183
+ plt.xlabel("Fail_Count (No. of subjects below pass mark)")
184
+ plt.ylabel("Number of students")
185
+ plt.title("Arrear Distribution")
186
+ plt.tight_layout()
187
+
188
+ # Downloadable CSV (filtered output)
189
+ csv_bytes = filtered_out.to_csv(index=False).encode("utf-8")
190
+ download_file = ("student_mark_analysis.csv", csv_bytes)
191
+
192
+ # Tables: keep concise for display
193
+ subject_avg_table = pd.DataFrame({"Subject": subject_avg.index, "Average": subject_avg.values})
194
+ subject_avg_table["Average"] = subject_avg_table["Average"].round(2)
195
+
196
+ return (
197
+ summary,
198
+ filtered_out,
199
+ top2_df,
200
+ top10_df,
201
+ subject_avg_table,
202
+ topk_sub_df,
203
+ fig1,
204
+ fig2,
205
+ download_file,
206
+ subject_cols,
207
+ )
208
+
209
+
210
+ def analyze(
211
+ file_obj,
212
+ pass_mark,
213
+ top2_pct,
214
+ top10_pct,
215
+ sort_order,
216
+ regno_search,
217
+ name_search,
218
+ fail_filter,
219
+ selected_subject,
220
+ topk_subject,
221
+ ):
222
+ if file_obj is None:
223
+ raise gr.Error("Please upload a CSV file.")
224
+
225
+ # Read CSV
226
+ try:
227
+ df = pd.read_csv(file_obj.name)
228
+ except Exception:
229
+ # sometimes HF gives bytes-like
230
+ file_obj.seek(0)
231
+ df = pd.read_csv(file_obj)
232
+
233
+ return _compute_analysis(
234
+ df=df,
235
+ pass_mark=pass_mark,
236
+ top2_pct=top2_pct,
237
+ top10_pct=top10_pct,
238
+ sort_order=sort_order,
239
+ regno_search=regno_search,
240
+ name_search=name_search,
241
+ fail_filter=fail_filter,
242
+ selected_subject=selected_subject,
243
+ topk_subject=topk_subject,
244
+ )
245
+
246
+
247
+ def update_subject_dropdown(file_obj):
248
+ if file_obj is None:
249
+ return gr.Dropdown(choices=[], value=None)
250
+
251
+ try:
252
+ df = pd.read_csv(file_obj.name)
253
+ except Exception:
254
+ file_obj.seek(0)
255
+ df = pd.read_csv(file_obj)
256
+
257
+ if not {"RegNo", "Name"}.issubset(set(df.columns)):
258
+ return gr.Dropdown(choices=[], value=None)
259
+
260
+ subject_cols = [c for c in df.columns if c not in ["RegNo", "Name"]]
261
+ value = subject_cols[0] if subject_cols else None
262
+ return gr.Dropdown(choices=subject_cols, value=value)
263
+
264
+
265
+ with gr.Blocks(title="Student Mark Analysis") as demo:
266
+ gr.Markdown(
267
+ """
268
+ # 📊 Student Mark Analysis (CSV → Report)
269
+ **CSV format:** `RegNo, Name, Subject1, Subject2, ...`
270
+ Example: `RegNo,Name,Tamil,English,Maths,Science,Social`
271
+ """
272
+ )
273
+
274
+ with gr.Row():
275
+ file_in = gr.File(label="Upload CSV", file_types=[".csv"])
276
+ with gr.Column():
277
+ pass_mark = gr.Slider(0, 100, value=50, step=1, label="Pass mark (per subject)")
278
+ top2_pct = gr.Slider(0.5, 20, value=2.0, step=0.5, label="Top % (List-1)")
279
+ top10_pct = gr.Slider(1, 50, value=10.0, step=1, label="Top % (List-2)")
280
+
281
+ with gr.Row():
282
+ sort_order = gr.Dropdown(
283
+ choices=["Rank (Best first)", "Total (High to Low)", "Total (Low to High)", "Name (A to Z)"],
284
+ value="Rank (Best first)",
285
+ label="Sort result table",
286
+ )
287
+ fail_filter = gr.Dropdown(
288
+ choices=[
289
+ "All",
290
+ "Pass only (Fail_Count = 0)",
291
+ "Arrear only (Fail_Count >= 1)",
292
+ "Fail_Count = 1",
293
+ "Fail_Count = 2",
294
+ "Fail_Count = 3",
295
+ "Fail_Count = 4",
296
+ "Fail_Count = 5",
297
+ ],
298
+ value="All",
299
+ label="Filter by arrears",
300
+ )
301
+
302
+ with gr.Row():
303
+ regno_search = gr.Textbox(label="Search by RegNo (contains)", placeholder="e.g., 2026")
304
+ name_search = gr.Textbox(label="Search by Name (contains)", placeholder="e.g., Priya")
305
+
306
+ with gr.Row():
307
+ selected_subject = gr.Dropdown(choices=[], label="Choose a subject (Top-K in subject)", value=None)
308
+ topk_subject = gr.Slider(1, 20, value=3, step=1, label="Top-K students in selected subject")
309
+
310
+ analyze_btn = gr.Button("Generate Analysis", variant="primary")
311
+
312
+ summary = gr.Textbox(label="Class Summary", lines=8)
313
+
314
+ gr.Markdown("## ✅ Student-wise Result Table")
315
+ result_table = gr.Dataframe(interactive=False, wrap=True)
316
+
317
+ with gr.Row():
318
+ top2_table = gr.Dataframe(interactive=False, label="Top % (List-1)")
319
+ top10_table = gr.Dataframe(interactive=False, label="Top % (List-2)")
320
+
321
+ with gr.Row():
322
+ subj_avg_table = gr.Dataframe(interactive=False, label="Subject Averages")
323
+ topk_sub_table = gr.Dataframe(interactive=False, label="Top-K in Selected Subject")
324
+
325
+ with gr.Row():
326
+ chart1 = gr.Plot(label="Subject-wise Average (Bar Chart)")
327
+ chart2 = gr.Plot(label="Arrear Distribution (Fail_Count)")
328
+
329
+ download = gr.File(label="Download filtered report (CSV)")
330
+
331
+ # Update subject dropdown when a file is uploaded
332
+ file_in.change(fn=update_subject_dropdown, inputs=[file_in], outputs=[selected_subject])
333
+
334
+ analyze_btn.click(
335
+ fn=analyze,
336
+ inputs=[
337
+ file_in,
338
+ pass_mark,
339
+ top2_pct,
340
+ top10_pct,
341
+ sort_order,
342
+ regno_search,
343
+ name_search,
344
+ fail_filter,
345
+ selected_subject,
346
+ topk_subject,
347
+ ],
348
+ outputs=[
349
+ summary,
350
+ result_table,
351
+ top2_table,
352
+ top10_table,
353
+ subj_avg_table,
354
+ topk_sub_table,
355
+ chart1,
356
+ chart2,
357
+ download,
358
+ selected_subject, # refresh list too
359
+ ],
360
+ )
361
+
362
+ demo.launch()