Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,140 +1,74 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
|
|
|
| 4 |
|
| 5 |
-
#
|
| 6 |
-
|
| 7 |
-
try:
|
| 8 |
-
if file.name.endswith(".xlsx"):
|
| 9 |
-
return pd.read_excel(file.name, header=None)
|
| 10 |
-
else:
|
| 11 |
-
return pd.read_csv(file.name, header=None, engine="python")
|
| 12 |
-
except:
|
| 13 |
-
return None
|
| 14 |
-
|
| 15 |
|
| 16 |
-
|
| 17 |
-
def find_header(df_raw):
|
| 18 |
-
for i, row in df_raw.iterrows():
|
| 19 |
-
row_text = " ".join(row.astype(str)).lower()
|
| 20 |
|
| 21 |
-
# strong detection for webinar reports
|
| 22 |
-
if (
|
| 23 |
-
"time in session" in row_text and
|
| 24 |
-
"join time" in row_text and
|
| 25 |
-
"leave time" in row_text
|
| 26 |
-
):
|
| 27 |
-
return i
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
return i
|
| 32 |
-
|
| 33 |
-
return None
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
# ---------- CONVERT WEBINAR ----------
|
| 37 |
-
def convert_webinar(file):
|
| 38 |
try:
|
| 39 |
-
df_raw = read_raw(file)
|
| 40 |
-
if df_raw is None:
|
| 41 |
-
return None
|
| 42 |
-
|
| 43 |
-
header_index = find_header(df_raw)
|
| 44 |
-
if header_index is None:
|
| 45 |
-
return None
|
| 46 |
-
|
| 47 |
-
# read structured part
|
| 48 |
if file.name.endswith(".xlsx"):
|
| 49 |
-
df = pd.read_excel(file.name
|
| 50 |
else:
|
| 51 |
-
df = pd.read_csv(file.name,
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
# find time column dynamically
|
| 56 |
-
time_col = None
|
| 57 |
-
for col in df.columns:
|
| 58 |
-
if "time in session" in col.lower():
|
| 59 |
-
time_col = col
|
| 60 |
-
break
|
| 61 |
-
|
| 62 |
-
if time_col is None:
|
| 63 |
-
return None
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
if df.empty:
|
| 69 |
-
return None
|
| 70 |
|
| 71 |
-
total = len(df)
|
| 72 |
-
completed = df[df[time_col] > 60]
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
| 76 |
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
satisfaction, nps = 4.6, 75
|
| 80 |
-
elif avg_time > 100:
|
| 81 |
-
satisfaction, nps = 4.2, 65
|
| 82 |
-
elif avg_time > 60:
|
| 83 |
-
satisfaction, nps = 3.9, 55
|
| 84 |
-
else:
|
| 85 |
-
satisfaction, nps = 3.5, 40
|
| 86 |
|
| 87 |
-
|
| 88 |
-
"Course Name": ["Webinar Course"],
|
| 89 |
-
"NPS Score": [nps],
|
| 90 |
-
"Completion Rate (%)": [round(completion, 2)],
|
| 91 |
-
"Satisfaction (1-5)": [satisfaction]
|
| 92 |
-
})
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
if file.name.endswith(".xlsx"):
|
| 102 |
-
df = pd.read_excel(file.name)
|
| 103 |
-
else:
|
| 104 |
-
df = pd.read_csv(file.name)
|
| 105 |
|
| 106 |
-
|
| 107 |
|
| 108 |
-
|
| 109 |
-
for col in df.columns:
|
| 110 |
-
if "course" in col:
|
| 111 |
-
rename_map[col] = "Course Name"
|
| 112 |
-
elif "nps" in col:
|
| 113 |
-
rename_map[col] = "NPS Score"
|
| 114 |
-
elif "completion" in col:
|
| 115 |
-
rename_map[col] = "Completion Rate (%)"
|
| 116 |
-
elif "satisfaction" in col or "rating" in col:
|
| 117 |
-
rename_map[col] = "Satisfaction (1-5)"
|
| 118 |
|
| 119 |
-
|
|
|
|
| 120 |
|
| 121 |
-
|
|
|
|
| 122 |
|
| 123 |
-
for
|
| 124 |
-
if col not in df.columns:
|
| 125 |
-
df[col] = None
|
| 126 |
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
|
| 132 |
df = df.dropna()
|
| 133 |
|
| 134 |
return df
|
| 135 |
|
| 136 |
except:
|
| 137 |
-
return
|
| 138 |
|
| 139 |
|
| 140 |
# ---------- CHARTS ----------
|
|
@@ -142,7 +76,7 @@ def charts(df):
|
|
| 142 |
fig1, ax1 = plt.subplots()
|
| 143 |
ax1.bar(df["Course Name"], df["Health Score"])
|
| 144 |
plt.xticks(rotation=45, ha="right")
|
| 145 |
-
plt.title("
|
| 146 |
|
| 147 |
fig2, ax2 = plt.subplots()
|
| 148 |
ax2.bar(df["Course Name"], df["Completion Rate (%)"])
|
|
@@ -154,54 +88,11 @@ def charts(df):
|
|
| 154 |
|
| 155 |
# ---------- MAIN ----------
|
| 156 |
def process(file):
|
| 157 |
-
|
| 158 |
-
if file is None:
|
| 159 |
-
return (
|
| 160 |
-
pd.DataFrame({"Message": ["Upload a file"]}),
|
| 161 |
-
pd.DataFrame(),
|
| 162 |
-
pd.DataFrame(),
|
| 163 |
-
pd.DataFrame(),
|
| 164 |
-
None,
|
| 165 |
-
None
|
| 166 |
-
)
|
| 167 |
-
|
| 168 |
-
# try webinar conversion
|
| 169 |
-
df = convert_webinar(file)
|
| 170 |
-
|
| 171 |
-
# fallback
|
| 172 |
-
if df is None:
|
| 173 |
-
df = clean_course(file)
|
| 174 |
-
|
| 175 |
-
if df.empty:
|
| 176 |
-
return (
|
| 177 |
-
pd.DataFrame({"Message": ["Could not understand file structure"]}),
|
| 178 |
-
pd.DataFrame(),
|
| 179 |
-
pd.DataFrame(),
|
| 180 |
-
pd.DataFrame(),
|
| 181 |
-
None,
|
| 182 |
-
None
|
| 183 |
-
)
|
| 184 |
-
|
| 185 |
-
# health score
|
| 186 |
-
df["Health Score"] = (
|
| 187 |
-
(df["NPS Score"] * 0.4) +
|
| 188 |
-
(df["Completion Rate (%)"] * 0.3) +
|
| 189 |
-
(df["Satisfaction (1-5)"] * 20 * 0.3)
|
| 190 |
-
)
|
| 191 |
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
top = df.sort_values(by="Health Score", ascending=False).head(3)
|
| 195 |
-
worst = df.sort_values(by="Health Score").head(3)
|
| 196 |
-
attention = df[df["Needs Attention"] == True]
|
| 197 |
-
|
| 198 |
-
fig1, fig2 = charts(df)
|
| 199 |
-
|
| 200 |
-
return df, top, worst, attention, fig1, fig2
|
| 201 |
-
|
| 202 |
-
except Exception as e:
|
| 203 |
return (
|
| 204 |
-
pd.DataFrame({"
|
| 205 |
pd.DataFrame(),
|
| 206 |
pd.DataFrame(),
|
| 207 |
pd.DataFrame(),
|
|
@@ -209,19 +100,33 @@ def process(file):
|
|
| 209 |
None
|
| 210 |
)
|
| 211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
# ---------- UI ----------
|
| 214 |
with gr.Blocks() as app:
|
| 215 |
-
gr.Markdown("#
|
| 216 |
-
|
| 217 |
-
gr.Markdown("Upload CSV or Excel (even messy reports).")
|
| 218 |
|
| 219 |
-
file_input = gr.File(label="Upload
|
| 220 |
|
| 221 |
-
table = gr.Dataframe(
|
| 222 |
-
top = gr.Dataframe(
|
| 223 |
-
worst = gr.Dataframe(
|
| 224 |
-
attention = gr.Dataframe(
|
| 225 |
|
| 226 |
chart1 = gr.Plot()
|
| 227 |
chart2 = gr.Plot()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
+
import google.generativeai as genai
|
| 5 |
|
| 6 |
+
# 🔑 ADD YOUR API KEY HERE
|
| 7 |
+
genai.configure(api_key="YOUR_API_KEY")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
model = genai.GenerativeModel("gemini-1.5-flash")
|
|
|
|
|
|
|
|
|
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
# ---------- READ FILE AS TEXT ----------
|
| 13 |
+
def file_to_text(file):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
if file.name.endswith(".xlsx"):
|
| 16 |
+
df = pd.read_excel(file.name)
|
| 17 |
else:
|
| 18 |
+
df = pd.read_csv(file.name, engine="python")
|
| 19 |
|
| 20 |
+
return df.to_string()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
+
except:
|
| 23 |
+
with open(file.name, "r", encoding="utf-8", errors="ignore") as f:
|
| 24 |
+
return f.read()
|
|
|
|
|
|
|
| 25 |
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
# ---------- AI PARSER ----------
|
| 28 |
+
def ai_parse(file):
|
| 29 |
+
try:
|
| 30 |
+
text = file_to_text(file)
|
| 31 |
|
| 32 |
+
prompt = f"""
|
| 33 |
+
You are a data analyst.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
+
Convert the following messy dataset into a structured course dataset.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
+
Output ONLY a table with columns:
|
| 38 |
+
Course Name, NPS Score, Completion Rate (%), Satisfaction (1-5)
|
| 39 |
|
| 40 |
+
If it's webinar data:
|
| 41 |
+
- Use time in session as engagement
|
| 42 |
+
- Estimate completion rate
|
| 43 |
+
- Estimate satisfaction and NPS realistically
|
| 44 |
|
| 45 |
+
Data:
|
| 46 |
+
{text[:4000]}
|
| 47 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
+
response = model.generate_content(prompt)
|
| 50 |
|
| 51 |
+
output = response.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
+
# try converting AI output into dataframe
|
| 54 |
+
lines = [l.strip() for l in output.split("\n") if "," in l]
|
| 55 |
|
| 56 |
+
if len(lines) < 2:
|
| 57 |
+
return None
|
| 58 |
|
| 59 |
+
df = pd.DataFrame([l.split(",") for l in lines[1:]], columns=lines[0].split(","))
|
|
|
|
|
|
|
| 60 |
|
| 61 |
+
# clean numeric
|
| 62 |
+
df["NPS Score"] = pd.to_numeric(df["NPS Score"], errors="coerce")
|
| 63 |
+
df["Completion Rate (%)"] = pd.to_numeric(df["Completion Rate (%)"], errors="coerce")
|
| 64 |
+
df["Satisfaction (1-5)"] = pd.to_numeric(df["Satisfaction (1-5)"], errors="coerce")
|
| 65 |
|
| 66 |
df = df.dropna()
|
| 67 |
|
| 68 |
return df
|
| 69 |
|
| 70 |
except:
|
| 71 |
+
return None
|
| 72 |
|
| 73 |
|
| 74 |
# ---------- CHARTS ----------
|
|
|
|
| 76 |
fig1, ax1 = plt.subplots()
|
| 77 |
ax1.bar(df["Course Name"], df["Health Score"])
|
| 78 |
plt.xticks(rotation=45, ha="right")
|
| 79 |
+
plt.title("Health Score")
|
| 80 |
|
| 81 |
fig2, ax2 = plt.subplots()
|
| 82 |
ax2.bar(df["Course Name"], df["Completion Rate (%)"])
|
|
|
|
| 88 |
|
| 89 |
# ---------- MAIN ----------
|
| 90 |
def process(file):
|
| 91 |
+
df = ai_parse(file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
+
if df is None or df.empty:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
return (
|
| 95 |
+
pd.DataFrame({"Message": ["AI could not parse file"]}),
|
| 96 |
pd.DataFrame(),
|
| 97 |
pd.DataFrame(),
|
| 98 |
pd.DataFrame(),
|
|
|
|
| 100 |
None
|
| 101 |
)
|
| 102 |
|
| 103 |
+
df["Health Score"] = (
|
| 104 |
+
(df["NPS Score"] * 0.4) +
|
| 105 |
+
(df["Completion Rate (%)"] * 0.3) +
|
| 106 |
+
(df["Satisfaction (1-5)"] * 20 * 0.3)
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
df["Needs Attention"] = df["Health Score"] < 60
|
| 110 |
+
|
| 111 |
+
top = df.sort_values(by="Health Score", ascending=False).head(3)
|
| 112 |
+
worst = df.sort_values(by="Health Score").head(3)
|
| 113 |
+
attention = df[df["Needs Attention"]]
|
| 114 |
+
|
| 115 |
+
fig1, fig2 = charts(df)
|
| 116 |
+
|
| 117 |
+
return df, top, worst, attention, fig1, fig2
|
| 118 |
+
|
| 119 |
|
| 120 |
# ---------- UI ----------
|
| 121 |
with gr.Blocks() as app:
|
| 122 |
+
gr.Markdown("# 🤖 AI Course Quality Tracker")
|
|
|
|
|
|
|
| 123 |
|
| 124 |
+
file_input = gr.File(label="Upload ANY file")
|
| 125 |
|
| 126 |
+
table = gr.Dataframe()
|
| 127 |
+
top = gr.Dataframe()
|
| 128 |
+
worst = gr.Dataframe()
|
| 129 |
+
attention = gr.Dataframe()
|
| 130 |
|
| 131 |
chart1 = gr.Plot()
|
| 132 |
chart2 = gr.Plot()
|