Spaces:

Chand11
/

Product-AI

Sleeping

App Files Files Community

Chand11 commited on Apr 28

Commit

bb58e23

verified ·

1 Parent(s): 49c377c

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -32

app.py CHANGED Viewed

@@ -2,8 +2,8 @@ import gradio as gr
 import pandas as pd
 import matplotlib.pyplot as plt
-# ---------- UNIVERSAL FILE READER ----------
-def read_file(file):
     try:
         if file.name.endswith(".xlsx"):
             return pd.read_excel(file.name, header=None)
@@ -13,32 +13,38 @@ def read_file(file):
         return None
-# ---------- AI-LIKE STRUCTURE DETECTION ----------
-def detect_table_start(df_raw):
     for i, row in df_raw.iterrows():
         row_text = " ".join(row.astype(str)).lower()
-        if "attendee details" in row_text:
-            return i + 1
         if "course" in row_text and ("nps" in row_text or "completion" in row_text):
             return i
     return None
-# ---------- WEBINAR → COURSE CONVERSION ----------
 def convert_webinar(file):
     try:
-        df_raw = read_file(file)
         if df_raw is None:
             return None
-        header_index = detect_table_start(df_raw)
         if header_index is None:
             return None
-        # re-read clean
         if file.name.endswith(".xlsx"):
             df = pd.read_excel(file.name, skiprows=header_index)
         else:
@@ -46,25 +52,29 @@ def convert_webinar(file):
         df.columns = df.columns.str.strip()
-        if "Time in Session (minutes)" not in df.columns:
-            return None
-        df["Time in Session (minutes)"] = pd.to_numeric(
-            df["Time in Session (minutes)"], errors="coerce"
-        )
         df = df.dropna()
         if df.empty:
             return None
         total = len(df)
-        completed = df[df["Time in Session (minutes)"] > 60]
         completion = (len(completed) / total) * 100
-        avg_time = df["Time in Session (minutes)"].mean()
-        # simulated metrics
         if avg_time > 150:
             satisfaction, nps = 4.6, 75
         elif avg_time > 100:
@@ -86,7 +96,7 @@ def convert_webinar(file):
 # ---------- CLEAN COURSE DATA ----------
-def clean_course_data(file):
     try:
         if file.name.endswith(".xlsx"):
             df = pd.read_excel(file.name)
@@ -109,6 +119,7 @@ def clean_course_data(file):
         df = df.rename(columns=rename_map)
         required = ["Course Name", "NPS Score", "Completion Rate (%)", "Satisfaction (1-5)"]
         for col in required:
             if col not in df.columns:
                 df[col] = None
@@ -127,7 +138,7 @@ def clean_course_data(file):
 # ---------- CHARTS ----------
-def create_charts(df):
     fig1, ax1 = plt.subplots()
     ax1.bar(df["Course Name"], df["Health Score"])
     plt.xticks(rotation=45, ha="right")
@@ -141,18 +152,25 @@ def create_charts(df):
     return fig1, fig2
-# ---------- MAIN PROCESS ----------
 def process(file):
     try:
         if file is None:
-            return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), None, None
-        # try webinar conversion first
         df = convert_webinar(file)
-        # fallback to course cleaning
         if df is None:
-            df = clean_course_data(file)
         if df.empty:
             return (
@@ -177,7 +195,7 @@ def process(file):
         worst = df.sort_values(by="Health Score").head(3)
         attention = df[df["Needs Attention"] == True]
-        fig1, fig2 = create_charts(df)
         return df, top, worst, attention, fig1, fig2
@@ -196,14 +214,14 @@ def process(file):
 with gr.Blocks() as app:
     gr.Markdown("# 📊 Smart Course Quality Tracker")
-    gr.Markdown("Upload CSV or Excel. Handles messy reports automatically.")
-    file_input = gr.File(label="Upload CSV / Excel")
     table = gr.Dataframe(label="Processed Data")
-    top_table = gr.Dataframe(label="Top Courses")
-    worst_table = gr.Dataframe(label="Worst Courses")
-    attention_table = gr.Dataframe(label="Needs Attention")
     chart1 = gr.Plot()
     chart2 = gr.Plot()
@@ -211,7 +229,7 @@ with gr.Blocks() as app:
     file_input.change(
         fn=process,
         inputs=file_input,
-        outputs=[table, top_table, worst_table, attention_table, chart1, chart2]
     )
 app.launch()

 import pandas as pd
 import matplotlib.pyplot as plt
+# ---------- READ FILE ----------
+def read_raw(file):
     try:
         if file.name.endswith(".xlsx"):
             return pd.read_excel(file.name, header=None)
         return None
+# ---------- DETECT HEADER ----------
+def find_header(df_raw):
     for i, row in df_raw.iterrows():
         row_text = " ".join(row.astype(str)).lower()
+        # strong detection for webinar reports
+        if (
+            "time in session" in row_text and
+            "join time" in row_text and
+            "leave time" in row_text
+        ):
+            return i
+        # fallback detection for course datasets
         if "course" in row_text and ("nps" in row_text or "completion" in row_text):
             return i
     return None
+# ---------- CONVERT WEBINAR ----------
 def convert_webinar(file):
     try:
+        df_raw = read_raw(file)
         if df_raw is None:
             return None
+        header_index = find_header(df_raw)
         if header_index is None:
             return None
+        # read structured part
         if file.name.endswith(".xlsx"):
             df = pd.read_excel(file.name, skiprows=header_index)
         else:
         df.columns = df.columns.str.strip()
+        # find time column dynamically
+        time_col = None
+        for col in df.columns:
+            if "time in session" in col.lower():
+                time_col = col
+                break
+        if time_col is None:
+            return None
+        df[time_col] = pd.to_numeric(df[time_col], errors="coerce")
         df = df.dropna()
         if df.empty:
             return None
         total = len(df)
+        completed = df[df[time_col] > 60]
         completion = (len(completed) / total) * 100
+        avg_time = df[time_col].mean()
+        # simulate metrics
         if avg_time > 150:
             satisfaction, nps = 4.6, 75
         elif avg_time > 100:
 # ---------- CLEAN COURSE DATA ----------
+def clean_course(file):
     try:
         if file.name.endswith(".xlsx"):
             df = pd.read_excel(file.name)
         df = df.rename(columns=rename_map)
         required = ["Course Name", "NPS Score", "Completion Rate (%)", "Satisfaction (1-5)"]
         for col in required:
             if col not in df.columns:
                 df[col] = None
 # ---------- CHARTS ----------
+def charts(df):
     fig1, ax1 = plt.subplots()
     ax1.bar(df["Course Name"], df["Health Score"])
     plt.xticks(rotation=45, ha="right")
     return fig1, fig2
+# ---------- MAIN ----------
 def process(file):
     try:
         if file is None:
+            return (
+                pd.DataFrame({"Message": ["Upload a file"]}),
+                pd.DataFrame(),
+                pd.DataFrame(),
+                pd.DataFrame(),
+                None,
+                None
+            )
+        # try webinar conversion
         df = convert_webinar(file)
+        # fallback
         if df is None:
+            df = clean_course(file)
         if df.empty:
             return (
         worst = df.sort_values(by="Health Score").head(3)
         attention = df[df["Needs Attention"] == True]
+        fig1, fig2 = charts(df)
         return df, top, worst, attention, fig1, fig2
 with gr.Blocks() as app:
     gr.Markdown("# 📊 Smart Course Quality Tracker")
+    gr.Markdown("Upload CSV or Excel (even messy reports).")
+    file_input = gr.File(label="Upload File")
     table = gr.Dataframe(label="Processed Data")
+    top = gr.Dataframe(label="Top Courses")
+    worst = gr.Dataframe(label="Worst Courses")
+    attention = gr.Dataframe(label="Needs Attention")
     chart1 = gr.Plot()
     chart2 = gr.Plot()
     file_input.change(
         fn=process,
         inputs=file_input,
+        outputs=[table, top, worst, attention, chart1, chart2]
     )
 app.launch()