Chand11 commited on
Commit
90f1636
·
verified ·
1 Parent(s): bb58e23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -161
app.py CHANGED
@@ -1,140 +1,74 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
 
4
 
5
- # ---------- READ FILE ----------
6
- def read_raw(file):
7
- try:
8
- if file.name.endswith(".xlsx"):
9
- return pd.read_excel(file.name, header=None)
10
- else:
11
- return pd.read_csv(file.name, header=None, engine="python")
12
- except:
13
- return None
14
-
15
 
16
- # ---------- DETECT HEADER ----------
17
- def find_header(df_raw):
18
- for i, row in df_raw.iterrows():
19
- row_text = " ".join(row.astype(str)).lower()
20
 
21
- # strong detection for webinar reports
22
- if (
23
- "time in session" in row_text and
24
- "join time" in row_text and
25
- "leave time" in row_text
26
- ):
27
- return i
28
 
29
- # fallback detection for course datasets
30
- if "course" in row_text and ("nps" in row_text or "completion" in row_text):
31
- return i
32
-
33
- return None
34
-
35
-
36
- # ---------- CONVERT WEBINAR ----------
37
- def convert_webinar(file):
38
  try:
39
- df_raw = read_raw(file)
40
- if df_raw is None:
41
- return None
42
-
43
- header_index = find_header(df_raw)
44
- if header_index is None:
45
- return None
46
-
47
- # read structured part
48
  if file.name.endswith(".xlsx"):
49
- df = pd.read_excel(file.name, skiprows=header_index)
50
  else:
51
- df = pd.read_csv(file.name, skiprows=header_index, engine="python")
52
 
53
- df.columns = df.columns.str.strip()
54
-
55
- # find time column dynamically
56
- time_col = None
57
- for col in df.columns:
58
- if "time in session" in col.lower():
59
- time_col = col
60
- break
61
-
62
- if time_col is None:
63
- return None
64
 
65
- df[time_col] = pd.to_numeric(df[time_col], errors="coerce")
66
- df = df.dropna()
67
-
68
- if df.empty:
69
- return None
70
 
71
- total = len(df)
72
- completed = df[df[time_col] > 60]
73
 
74
- completion = (len(completed) / total) * 100
75
- avg_time = df[time_col].mean()
 
 
76
 
77
- # simulate metrics
78
- if avg_time > 150:
79
- satisfaction, nps = 4.6, 75
80
- elif avg_time > 100:
81
- satisfaction, nps = 4.2, 65
82
- elif avg_time > 60:
83
- satisfaction, nps = 3.9, 55
84
- else:
85
- satisfaction, nps = 3.5, 40
86
 
87
- return pd.DataFrame({
88
- "Course Name": ["Webinar Course"],
89
- "NPS Score": [nps],
90
- "Completion Rate (%)": [round(completion, 2)],
91
- "Satisfaction (1-5)": [satisfaction]
92
- })
93
 
94
- except:
95
- return None
96
 
 
 
 
 
97
 
98
- # ---------- CLEAN COURSE DATA ----------
99
- def clean_course(file):
100
- try:
101
- if file.name.endswith(".xlsx"):
102
- df = pd.read_excel(file.name)
103
- else:
104
- df = pd.read_csv(file.name)
105
 
106
- df.columns = df.columns.str.strip().str.lower()
107
 
108
- rename_map = {}
109
- for col in df.columns:
110
- if "course" in col:
111
- rename_map[col] = "Course Name"
112
- elif "nps" in col:
113
- rename_map[col] = "NPS Score"
114
- elif "completion" in col:
115
- rename_map[col] = "Completion Rate (%)"
116
- elif "satisfaction" in col or "rating" in col:
117
- rename_map[col] = "Satisfaction (1-5)"
118
 
119
- df = df.rename(columns=rename_map)
 
120
 
121
- required = ["Course Name", "NPS Score", "Completion Rate (%)", "Satisfaction (1-5)"]
 
122
 
123
- for col in required:
124
- if col not in df.columns:
125
- df[col] = None
126
 
127
- for col in required[1:]:
128
- df[col] = df[col].astype(str).str.replace('%', '', regex=False)
129
- df[col] = df[col].astype(str).str.replace('/5', '', regex=False)
130
- df[col] = pd.to_numeric(df[col], errors="coerce")
131
 
132
  df = df.dropna()
133
 
134
  return df
135
 
136
  except:
137
- return pd.DataFrame()
138
 
139
 
140
  # ---------- CHARTS ----------
@@ -142,7 +76,7 @@ def charts(df):
142
  fig1, ax1 = plt.subplots()
143
  ax1.bar(df["Course Name"], df["Health Score"])
144
  plt.xticks(rotation=45, ha="right")
145
- plt.title("Course Health Score")
146
 
147
  fig2, ax2 = plt.subplots()
148
  ax2.bar(df["Course Name"], df["Completion Rate (%)"])
@@ -154,54 +88,11 @@ def charts(df):
154
 
155
  # ---------- MAIN ----------
156
  def process(file):
157
- try:
158
- if file is None:
159
- return (
160
- pd.DataFrame({"Message": ["Upload a file"]}),
161
- pd.DataFrame(),
162
- pd.DataFrame(),
163
- pd.DataFrame(),
164
- None,
165
- None
166
- )
167
-
168
- # try webinar conversion
169
- df = convert_webinar(file)
170
-
171
- # fallback
172
- if df is None:
173
- df = clean_course(file)
174
-
175
- if df.empty:
176
- return (
177
- pd.DataFrame({"Message": ["Could not understand file structure"]}),
178
- pd.DataFrame(),
179
- pd.DataFrame(),
180
- pd.DataFrame(),
181
- None,
182
- None
183
- )
184
-
185
- # health score
186
- df["Health Score"] = (
187
- (df["NPS Score"] * 0.4) +
188
- (df["Completion Rate (%)"] * 0.3) +
189
- (df["Satisfaction (1-5)"] * 20 * 0.3)
190
- )
191
 
192
- df["Needs Attention"] = df["Health Score"] < 60
193
-
194
- top = df.sort_values(by="Health Score", ascending=False).head(3)
195
- worst = df.sort_values(by="Health Score").head(3)
196
- attention = df[df["Needs Attention"] == True]
197
-
198
- fig1, fig2 = charts(df)
199
-
200
- return df, top, worst, attention, fig1, fig2
201
-
202
- except Exception as e:
203
  return (
204
- pd.DataFrame({"Error": [str(e)]}),
205
  pd.DataFrame(),
206
  pd.DataFrame(),
207
  pd.DataFrame(),
@@ -209,19 +100,33 @@ def process(file):
209
  None
210
  )
211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
  # ---------- UI ----------
214
  with gr.Blocks() as app:
215
- gr.Markdown("# 📊 Smart Course Quality Tracker")
216
-
217
- gr.Markdown("Upload CSV or Excel (even messy reports).")
218
 
219
- file_input = gr.File(label="Upload File")
220
 
221
- table = gr.Dataframe(label="Processed Data")
222
- top = gr.Dataframe(label="Top Courses")
223
- worst = gr.Dataframe(label="Worst Courses")
224
- attention = gr.Dataframe(label="Needs Attention")
225
 
226
  chart1 = gr.Plot()
227
  chart2 = gr.Plot()
 
1
  import gradio as gr
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
+ import google.generativeai as genai
5
 
6
+ # 🔑 ADD YOUR API KEY HERE
7
+ genai.configure(api_key="YOUR_API_KEY")
 
 
 
 
 
 
 
 
8
 
9
+ model = genai.GenerativeModel("gemini-1.5-flash")
 
 
 
10
 
 
 
 
 
 
 
 
11
 
12
+ # ---------- READ FILE AS TEXT ----------
13
+ def file_to_text(file):
 
 
 
 
 
 
 
14
  try:
 
 
 
 
 
 
 
 
 
15
  if file.name.endswith(".xlsx"):
16
+ df = pd.read_excel(file.name)
17
  else:
18
+ df = pd.read_csv(file.name, engine="python")
19
 
20
+ return df.to_string()
 
 
 
 
 
 
 
 
 
 
21
 
22
+ except:
23
+ with open(file.name, "r", encoding="utf-8", errors="ignore") as f:
24
+ return f.read()
 
 
25
 
 
 
26
 
27
+ # ---------- AI PARSER ----------
28
+ def ai_parse(file):
29
+ try:
30
+ text = file_to_text(file)
31
 
32
+ prompt = f"""
33
+ You are a data analyst.
 
 
 
 
 
 
 
34
 
35
+ Convert the following messy dataset into a structured course dataset.
 
 
 
 
 
36
 
37
+ Output ONLY a table with columns:
38
+ Course Name, NPS Score, Completion Rate (%), Satisfaction (1-5)
39
 
40
+ If it's webinar data:
41
+ - Use time in session as engagement
42
+ - Estimate completion rate
43
+ - Estimate satisfaction and NPS realistically
44
 
45
+ Data:
46
+ {text[:4000]}
47
+ """
 
 
 
 
48
 
49
+ response = model.generate_content(prompt)
50
 
51
+ output = response.text
 
 
 
 
 
 
 
 
 
52
 
53
+ # try converting AI output into dataframe
54
+ lines = [l.strip() for l in output.split("\n") if "," in l]
55
 
56
+ if len(lines) < 2:
57
+ return None
58
 
59
+ df = pd.DataFrame([l.split(",") for l in lines[1:]], columns=lines[0].split(","))
 
 
60
 
61
+ # clean numeric
62
+ df["NPS Score"] = pd.to_numeric(df["NPS Score"], errors="coerce")
63
+ df["Completion Rate (%)"] = pd.to_numeric(df["Completion Rate (%)"], errors="coerce")
64
+ df["Satisfaction (1-5)"] = pd.to_numeric(df["Satisfaction (1-5)"], errors="coerce")
65
 
66
  df = df.dropna()
67
 
68
  return df
69
 
70
  except:
71
+ return None
72
 
73
 
74
  # ---------- CHARTS ----------
 
76
  fig1, ax1 = plt.subplots()
77
  ax1.bar(df["Course Name"], df["Health Score"])
78
  plt.xticks(rotation=45, ha="right")
79
+ plt.title("Health Score")
80
 
81
  fig2, ax2 = plt.subplots()
82
  ax2.bar(df["Course Name"], df["Completion Rate (%)"])
 
88
 
89
  # ---------- MAIN ----------
90
  def process(file):
91
+ df = ai_parse(file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ if df is None or df.empty:
 
 
 
 
 
 
 
 
 
 
94
  return (
95
+ pd.DataFrame({"Message": ["AI could not parse file"]}),
96
  pd.DataFrame(),
97
  pd.DataFrame(),
98
  pd.DataFrame(),
 
100
  None
101
  )
102
 
103
+ df["Health Score"] = (
104
+ (df["NPS Score"] * 0.4) +
105
+ (df["Completion Rate (%)"] * 0.3) +
106
+ (df["Satisfaction (1-5)"] * 20 * 0.3)
107
+ )
108
+
109
+ df["Needs Attention"] = df["Health Score"] < 60
110
+
111
+ top = df.sort_values(by="Health Score", ascending=False).head(3)
112
+ worst = df.sort_values(by="Health Score").head(3)
113
+ attention = df[df["Needs Attention"]]
114
+
115
+ fig1, fig2 = charts(df)
116
+
117
+ return df, top, worst, attention, fig1, fig2
118
+
119
 
120
  # ---------- UI ----------
121
  with gr.Blocks() as app:
122
+ gr.Markdown("# 🤖 AI Course Quality Tracker")
 
 
123
 
124
+ file_input = gr.File(label="Upload ANY file")
125
 
126
+ table = gr.Dataframe()
127
+ top = gr.Dataframe()
128
+ worst = gr.Dataframe()
129
+ attention = gr.Dataframe()
130
 
131
  chart1 = gr.Plot()
132
  chart2 = gr.Plot()