Chand11 commited on
Commit
bb58e23
·
verified ·
1 Parent(s): 49c377c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -32
app.py CHANGED
@@ -2,8 +2,8 @@ import gradio as gr
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
 
5
- # ---------- UNIVERSAL FILE READER ----------
6
- def read_file(file):
7
  try:
8
  if file.name.endswith(".xlsx"):
9
  return pd.read_excel(file.name, header=None)
@@ -13,32 +13,38 @@ def read_file(file):
13
  return None
14
 
15
 
16
- # ---------- AI-LIKE STRUCTURE DETECTION ----------
17
- def detect_table_start(df_raw):
18
  for i, row in df_raw.iterrows():
19
  row_text = " ".join(row.astype(str)).lower()
20
 
21
- if "attendee details" in row_text:
22
- return i + 1
 
 
 
 
 
23
 
 
24
  if "course" in row_text and ("nps" in row_text or "completion" in row_text):
25
  return i
26
 
27
  return None
28
 
29
 
30
- # ---------- WEBINAR → COURSE CONVERSION ----------
31
  def convert_webinar(file):
32
  try:
33
- df_raw = read_file(file)
34
  if df_raw is None:
35
  return None
36
 
37
- header_index = detect_table_start(df_raw)
38
  if header_index is None:
39
  return None
40
 
41
- # re-read clean
42
  if file.name.endswith(".xlsx"):
43
  df = pd.read_excel(file.name, skiprows=header_index)
44
  else:
@@ -46,25 +52,29 @@ def convert_webinar(file):
46
 
47
  df.columns = df.columns.str.strip()
48
 
49
- if "Time in Session (minutes)" not in df.columns:
50
- return None
 
 
 
 
51
 
52
- df["Time in Session (minutes)"] = pd.to_numeric(
53
- df["Time in Session (minutes)"], errors="coerce"
54
- )
55
 
 
56
  df = df.dropna()
57
 
58
  if df.empty:
59
  return None
60
 
61
  total = len(df)
62
- completed = df[df["Time in Session (minutes)"] > 60]
63
 
64
  completion = (len(completed) / total) * 100
65
- avg_time = df["Time in Session (minutes)"].mean()
66
 
67
- # simulated metrics
68
  if avg_time > 150:
69
  satisfaction, nps = 4.6, 75
70
  elif avg_time > 100:
@@ -86,7 +96,7 @@ def convert_webinar(file):
86
 
87
 
88
  # ---------- CLEAN COURSE DATA ----------
89
- def clean_course_data(file):
90
  try:
91
  if file.name.endswith(".xlsx"):
92
  df = pd.read_excel(file.name)
@@ -109,6 +119,7 @@ def clean_course_data(file):
109
  df = df.rename(columns=rename_map)
110
 
111
  required = ["Course Name", "NPS Score", "Completion Rate (%)", "Satisfaction (1-5)"]
 
112
  for col in required:
113
  if col not in df.columns:
114
  df[col] = None
@@ -127,7 +138,7 @@ def clean_course_data(file):
127
 
128
 
129
  # ---------- CHARTS ----------
130
- def create_charts(df):
131
  fig1, ax1 = plt.subplots()
132
  ax1.bar(df["Course Name"], df["Health Score"])
133
  plt.xticks(rotation=45, ha="right")
@@ -141,18 +152,25 @@ def create_charts(df):
141
  return fig1, fig2
142
 
143
 
144
- # ---------- MAIN PROCESS ----------
145
  def process(file):
146
  try:
147
  if file is None:
148
- return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), None, None
 
 
 
 
 
 
 
149
 
150
- # try webinar conversion first
151
  df = convert_webinar(file)
152
 
153
- # fallback to course cleaning
154
  if df is None:
155
- df = clean_course_data(file)
156
 
157
  if df.empty:
158
  return (
@@ -177,7 +195,7 @@ def process(file):
177
  worst = df.sort_values(by="Health Score").head(3)
178
  attention = df[df["Needs Attention"] == True]
179
 
180
- fig1, fig2 = create_charts(df)
181
 
182
  return df, top, worst, attention, fig1, fig2
183
 
@@ -196,14 +214,14 @@ def process(file):
196
  with gr.Blocks() as app:
197
  gr.Markdown("# 📊 Smart Course Quality Tracker")
198
 
199
- gr.Markdown("Upload CSV or Excel. Handles messy reports automatically.")
200
 
201
- file_input = gr.File(label="Upload CSV / Excel")
202
 
203
  table = gr.Dataframe(label="Processed Data")
204
- top_table = gr.Dataframe(label="Top Courses")
205
- worst_table = gr.Dataframe(label="Worst Courses")
206
- attention_table = gr.Dataframe(label="Needs Attention")
207
 
208
  chart1 = gr.Plot()
209
  chart2 = gr.Plot()
@@ -211,7 +229,7 @@ with gr.Blocks() as app:
211
  file_input.change(
212
  fn=process,
213
  inputs=file_input,
214
- outputs=[table, top_table, worst_table, attention_table, chart1, chart2]
215
  )
216
 
217
  app.launch()
 
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
 
5
+ # ---------- READ FILE ----------
6
+ def read_raw(file):
7
  try:
8
  if file.name.endswith(".xlsx"):
9
  return pd.read_excel(file.name, header=None)
 
13
  return None
14
 
15
 
16
+ # ---------- DETECT HEADER ----------
17
+ def find_header(df_raw):
18
  for i, row in df_raw.iterrows():
19
  row_text = " ".join(row.astype(str)).lower()
20
 
21
+ # strong detection for webinar reports
22
+ if (
23
+ "time in session" in row_text and
24
+ "join time" in row_text and
25
+ "leave time" in row_text
26
+ ):
27
+ return i
28
 
29
+ # fallback detection for course datasets
30
  if "course" in row_text and ("nps" in row_text or "completion" in row_text):
31
  return i
32
 
33
  return None
34
 
35
 
36
+ # ---------- CONVERT WEBINAR ----------
37
  def convert_webinar(file):
38
  try:
39
+ df_raw = read_raw(file)
40
  if df_raw is None:
41
  return None
42
 
43
+ header_index = find_header(df_raw)
44
  if header_index is None:
45
  return None
46
 
47
+ # read structured part
48
  if file.name.endswith(".xlsx"):
49
  df = pd.read_excel(file.name, skiprows=header_index)
50
  else:
 
52
 
53
  df.columns = df.columns.str.strip()
54
 
55
+ # find time column dynamically
56
+ time_col = None
57
+ for col in df.columns:
58
+ if "time in session" in col.lower():
59
+ time_col = col
60
+ break
61
 
62
+ if time_col is None:
63
+ return None
 
64
 
65
+ df[time_col] = pd.to_numeric(df[time_col], errors="coerce")
66
  df = df.dropna()
67
 
68
  if df.empty:
69
  return None
70
 
71
  total = len(df)
72
+ completed = df[df[time_col] > 60]
73
 
74
  completion = (len(completed) / total) * 100
75
+ avg_time = df[time_col].mean()
76
 
77
+ # simulate metrics
78
  if avg_time > 150:
79
  satisfaction, nps = 4.6, 75
80
  elif avg_time > 100:
 
96
 
97
 
98
  # ---------- CLEAN COURSE DATA ----------
99
+ def clean_course(file):
100
  try:
101
  if file.name.endswith(".xlsx"):
102
  df = pd.read_excel(file.name)
 
119
  df = df.rename(columns=rename_map)
120
 
121
  required = ["Course Name", "NPS Score", "Completion Rate (%)", "Satisfaction (1-5)"]
122
+
123
  for col in required:
124
  if col not in df.columns:
125
  df[col] = None
 
138
 
139
 
140
  # ---------- CHARTS ----------
141
+ def charts(df):
142
  fig1, ax1 = plt.subplots()
143
  ax1.bar(df["Course Name"], df["Health Score"])
144
  plt.xticks(rotation=45, ha="right")
 
152
  return fig1, fig2
153
 
154
 
155
+ # ---------- MAIN ----------
156
  def process(file):
157
  try:
158
  if file is None:
159
+ return (
160
+ pd.DataFrame({"Message": ["Upload a file"]}),
161
+ pd.DataFrame(),
162
+ pd.DataFrame(),
163
+ pd.DataFrame(),
164
+ None,
165
+ None
166
+ )
167
 
168
+ # try webinar conversion
169
  df = convert_webinar(file)
170
 
171
+ # fallback
172
  if df is None:
173
+ df = clean_course(file)
174
 
175
  if df.empty:
176
  return (
 
195
  worst = df.sort_values(by="Health Score").head(3)
196
  attention = df[df["Needs Attention"] == True]
197
 
198
+ fig1, fig2 = charts(df)
199
 
200
  return df, top, worst, attention, fig1, fig2
201
 
 
214
  with gr.Blocks() as app:
215
  gr.Markdown("# 📊 Smart Course Quality Tracker")
216
 
217
+ gr.Markdown("Upload CSV or Excel (even messy reports).")
218
 
219
+ file_input = gr.File(label="Upload File")
220
 
221
  table = gr.Dataframe(label="Processed Data")
222
+ top = gr.Dataframe(label="Top Courses")
223
+ worst = gr.Dataframe(label="Worst Courses")
224
+ attention = gr.Dataframe(label="Needs Attention")
225
 
226
  chart1 = gr.Plot()
227
  chart2 = gr.Plot()
 
229
  file_input.change(
230
  fn=process,
231
  inputs=file_input,
232
+ outputs=[table, top, worst, attention, chart1, chart2]
233
  )
234
 
235
  app.launch()