Spaces:

SanthiSastra
/

Analysis

Sleeping

App Files Files Community

SanthiSastra commited on Feb 13

Commit

9826f07

verified ·

1 Parent(s): 7c450bc

Update app.py

Browse files

Files changed (1) hide show

app.py +211 -447

app.py CHANGED Viewed

@@ -1,521 +1,285 @@
 import os
-import io
 import tempfile
-from datetime import datetime
 import numpy as np
 import pandas as pd
 import gradio as gr
 import matplotlib.pyplot as plt
-import seaborn as sns
-from scipy import stats
-from docx import Document
-from docx.shared import Inches, Pt
-from docx.enum.text import WD_ALIGN_PARAGRAPH
-# ----------------------------
-# EDA helpers
-# ----------------------------
-def find_numeric_columns(df: pd.DataFrame):
-    return df.select_dtypes(include=[np.number]).columns.tolist()
-def safe_skew(series: pd.Series):
-    s = series.dropna()
-    if len(s) < 3:
-        return np.nan
-    return float(s.skew())
-def safe_kurt(series: pd.Series):
-    s = series.dropna()
-    if len(s) < 4:
-        return np.nan
-    return float(s.kurt())
-def basic_numeric_stats(df: pd.DataFrame, num_cols: list[str]) -> pd.DataFrame:
-    rows = []
-    for c in num_cols:
-        s = df[c]
-        s2 = s.dropna()
-        rows.append({
-            "Attribute": c,
-            "Count": int(s2.shape[0]),
-            "Missing": int(s.isna().sum()),
-            "Mean": float(s2.mean()) if len(s2) else np.nan,
-            "Std": float(s2.std(ddof=1)) if len(s2) > 1 else np.nan,
-            "Var": float(s2.var(ddof=1)) if len(s2) > 1 else np.nan,
-            "Min": float(s2.min()) if len(s2) else np.nan,
-            "25%": float(s2.quantile(0.25)) if len(s2) else np.nan,
-            "Median": float(s2.median()) if len(s2) else np.nan,
-            "75%": float(s2.quantile(0.75)) if len(s2) else np.nan,
-            "Max": float(s2.max()) if len(s2) else np.nan,
-            "Skewness": safe_skew(s),
-            "Kurtosis": safe_kurt(s),
-        })
-    return pd.DataFrame(rows)
-def five_point_summary_table(df: pd.DataFrame, num_cols: list[str]) -> pd.DataFrame:
-    rows = []
-    for c in num_cols:
-        s = df[c].dropna()
-        if len(s) == 0:
-            rows.append({"Attribute": c, "Min": np.nan, "Q1": np.nan, "Median": np.nan, "Q3": np.nan, "Max": np.nan})
-        else:
-            rows.append({
-                "Attribute": c,
-                "Min": float(s.min()),
-                "Q1": float(s.quantile(0.25)),
-                "Median": float(s.median()),
-                "Q3": float(s.quantile(0.75)),
-                "Max": float(s.max()),
-            })
-    return pd.DataFrame(rows)
-def interpretation_numeric(stats_df: pd.DataFrame) -> str:
-    if stats_df.empty:
-        return "No numeric attributes were detected in the uploaded dataset."
-    tmp = stats_df[["Attribute", "Skewness"]].dropna()
-    skew_top = tmp.reindex(tmp["Skewness"].abs().sort_values(ascending=False).index).head(3)
-    lines = []
-    lines.append(f"Numeric attributes detected: {len(stats_df)}.")
-    if len(skew_top) > 0:
-        parts = [f"{r.Attribute} (skew={r.Skewness:.2f})" for r in skew_top.itertuples(index=False)]
-        lines.append("Most skewed attributes (absolute skewness): " + ", ".join(parts) + ".")
-    miss_sorted = stats_df.sort_values("Missing", ascending=False).head(3)
-    if miss_sorted["Missing"].max() > 0:
-        parts = [f"{r.Attribute} (missing={int(r.Missing)})" for r in miss_sorted.itertuples(index=False)]
-        lines.append("Attributes with higher missing values: " + ", ".join(parts) + ".")
-    else:
-        lines.append("No missing values were observed in numeric attributes.")
-    return " ".join(lines)
-def correlation_interpretation(corr: pd.DataFrame) -> str:
-    if corr is None or corr.empty:
-        return "Correlation could not be computed (insufficient numeric attributes)."
-    c = corr.copy()
-    np.fill_diagonal(c.values, np.nan)
-    stacked = c.stack().dropna()
-    if stacked.empty:
-        return "No meaningful pairwise correlations were found."
-    top = stacked.abs().sort_values(ascending=False).head(3)
-    lines = []
-    for (a, b), _ in top.items():
-        val = float(corr.loc[a, b])
-        sign = "positive" if val >= 0 else "negative"
-        lines.append(f"{a} vs {b}: {val:.2f} ({sign})")
-    return "Strongest correlations: " + "; ".join(lines) + "."
-def fig_to_png_path(fig) -> str:
-    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
-    fig.savefig(tmp.name, bbox_inches="tight", dpi=200)
-    plt.close(fig)
-    return tmp.name
-def plot_correlogram_annotated(corr: pd.DataFrame, title="Correlogram (Annotated)"):
-    fig, ax = plt.subplots(figsize=(8, 6))
-    data = corr.values
-    im = ax.imshow(data, aspect="auto")
-    ax.set_title(title)
-    ax.set_xticks(range(len(corr.columns)))
-    ax.set_xticklabels(corr.columns, rotation=45, ha="right")
-    ax.set_yticks(range(len(corr.index)))
-    ax.set_yticklabels(corr.index)
-    for i in range(data.shape[0]):
-        for j in range(data.shape[1]):
-            val = data[i, j]
-            ax.text(j, i, "" if np.isnan(val) else f"{val:.2f}", ha="center", va="center", fontsize=8)
-    fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
-    fig.tight_layout()
-    return fig
-def plot_pairplot(df: pd.DataFrame, num_cols: list[str], max_cols=6):
-    use_cols = num_cols[:max_cols]
-    if len(use_cols) < 2:
-        return None
-    grid = sns.pairplot(df[use_cols].dropna(), corner=True, diag_kind="hist")
-    grid.fig.suptitle("Pair Plot", y=1.02)
-    return grid
-# ----------------------------
-# DOCX helpers
-# ----------------------------
-def add_heading_centered(doc: Document, text: str, font_size=16, bold=True, color_rgb="1E5AA8"):
-    p = doc.add_paragraph()
-    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
-    run = p.add_run(text)
-    run.bold = bold
-    run.font.size = Pt(font_size)
-    # set run color
-    rPr = run._element.get_or_add_rPr()
-    color = rPr.get_or_add_color()
-    color.val = color_rgb
-def add_image(doc: Document, image_path: str, width_inches=6.2):
-    doc.add_picture(image_path, width=Inches(width_inches))
-def build_docx_report(df: pd.DataFrame, dataset_name: str, id_col: str | None,
-                      stats_df: pd.DataFrame, corr: pd.DataFrame,
-                      graph_paths: list[tuple[str, str]]) -> str:
-    doc = Document()
-    add_heading_centered(doc, "Amrita Manthana", font_size=20, bold=True)
-    add_heading_centered(doc, "Prof.B.Santhi,SRC,SASTRA", font_size=14, bold=True)
-    doc.add_paragraph("")
-    p = doc.add_paragraph()
-    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
-    p.add_run("EDA Report").bold = True
-    doc.add_paragraph(f"Dataset: {dataset_name}")
-    doc.add_paragraph(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
-    if id_col:
-        doc.add_paragraph(f"Record ID column: {id_col}")
-    doc.add_paragraph("")
-    doc.add_paragraph("1) Numeric Statistics").runs[0].bold = True
-    doc.add_paragraph(interpretation_numeric(stats_df))
-    if not stats_df.empty:
-        tdf = stats_df.head(25) if len(stats_df) > 25 else stats_df
-        table = doc.add_table(rows=1, cols=len(tdf.columns))
-        for j, col in enumerate(tdf.columns):
-            table.rows[0].cells[j].text = str(col)
-        for _, row in tdf.iterrows():
-            cells = table.add_row().cells
-            for j, col in enumerate(tdf.columns):
-                val = row[col]
-                if isinstance(val, float):
-                    cells[j].text = "" if np.isnan(val) else f"{val:.4f}"
-                else:
-                    cells[j].text = str(val)
-    doc.add_paragraph("")
-    doc.add_paragraph("2) Correlation").runs[0].bold = True
-    doc.add_paragraph(correlation_interpretation(corr))
-    doc.add_paragraph("")
-    doc.add_paragraph("3) Graphs & Interpretation").runs[0].bold = True
-    for title, path in graph_paths:
-        doc.add_paragraph("")
-        doc.add_paragraph(title).runs[0].bold = True
-        add_image(doc, path)
-        if "Correlogram" in title:
-            doc.add_paragraph("Interpretation: Values near +1/-1 indicate strong positive/negative association.")
-        elif "Bar" in title:
-            doc.add_paragraph("Interpretation: Taller bars indicate larger frequency/aggregate value.")
-        elif "Pie" in title:
-            doc.add_paragraph("Interpretation: Slice proportions show relative contribution of categories.")
-        elif "Scatter" in title:
-            doc.add_paragraph("Interpretation: Patterns indicate linear/non-linear trend, clustering, or outliers.")
-        elif "Pair Plot" in title:
-            doc.add_paragraph("Interpretation: Diagonal shows distributions; others show pairwise relationships/outliers.")
-    doc.add_paragraph("")
-    doc.add_paragraph("4) Final Remarks").runs[0].bold = True
-    doc.add_paragraph("This report consolidates numeric measures, distributions, and relationships among attributes.")
-    out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".docx").name
-    doc.save(out_path)
-    return out_path
-# ----------------------------
-# App logic
-# ----------------------------
-def load_csv(file_obj, sep, header_flag):
-    if file_obj is None:
-        return None, "Please upload a CSV.", None, [], None
-    try:
-        header = 0 if header_flag else None
-        df = pd.read_csv(file_obj.name, sep=sep, header=header)
-        if not header_flag:
-            df.columns = [f"col_{i+1}" for i in range(df.shape[1])]
-        num_cols = find_numeric_columns(df)
-        cols = df.columns.tolist()
-        info = f"Loaded: rows={df.shape[0]}, cols={df.shape[1]}. Numeric cols={len(num_cols)}."
-        return df, info, df.head(30), cols, num_cols
-    except Exception as e:
-        return None, f"Could not read CSV: {e}", None, [], None
-def eda_compute(df: pd.DataFrame):
-    # MUST return 6 outputs always: note, stats, fps, skew, corr_table, corr_note
-    if df is None:
-        return "Upload a CSV first.", pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), ""
-    num_cols = find_numeric_columns(df)
-    if len(num_cols) == 0:
-        return "No numeric columns found.", pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), "Correlation not available."
-    stats_df = basic_numeric_stats(df, num_cols)
-    fps_df = five_point_summary_table(df, num_cols)
-    skew_df = pd.DataFrame({"Attribute": num_cols, "Skewness": [safe_skew(df[c]) for c in num_cols]})
-    if len(num_cols) >= 2:
-        corr_df = df[num_cols].corr(numeric_only=True)
-        corr_text = correlation_interpretation(corr_df)
-    else:
-        corr_df = pd.DataFrame()
-        corr_text = "Correlation not available (need at least 2 numeric columns)."
-    return interpretation_numeric(stats_df), stats_df, fps_df, skew_df, corr_df, corr_text
-def graph_make(df: pd.DataFrame, barpie_col: str, topn: int,
-               scatter_x: str, scatter_y: str, pair_max: int):
-    if df is None:
-        return None, None, None, None, None
-    paths = [None, None, None, None, None]
-    # Bar / Pie data
-    series = df[barpie_col]
-    if pd.api.types.is_numeric_dtype(series):
-        binned = pd.cut(series.dropna(), bins=10)
-        counts = binned.value_counts().head(topn)
-        labels = counts.index.astype(str).tolist()
-        yvals = counts.values
-        suffix = "(binned)"
-    else:
-        counts = series.astype(str).value_counts().head(topn)
-        labels = counts.index.tolist()
-        yvals = counts.values
-        suffix = ""
-    fig_bar, ax = plt.subplots(figsize=(7, 4))
-    ax.bar(range(len(labels)), yvals)
-    ax.set_xticks(range(len(labels)))
-    ax.set_xticklabels(labels, rotation=45, ha="right")
-    ax.set_title(f"Bar Chart: {barpie_col} {suffix}")
-    ax.set_ylabel("Count")
-    paths[0] = fig_to_png_path(fig_bar)
-    fig_pie, ax2 = plt.subplots(figsize=(6, 4))
-    ax2.pie(yvals, labels=labels, autopct="%1.1f%%")
-    ax2.set_title(f"Pie Chart: {barpie_col} {suffix}")
-    paths[1] = fig_to_png_path(fig_pie)
-    # Scatter / Corr / Pair
-    num_cols = find_numeric_columns(df)
-    if len(num_cols) >= 2 and scatter_x in num_cols and scatter_y in num_cols:
-        fig_sc, ax3 = plt.subplots(figsize=(7, 4))
-        ax3.scatter(df[scatter_x], df[scatter_y], alpha=0.7)
-        ax3.set_xlabel(scatter_x)
-        ax3.set_ylabel(scatter_y)
-        ax3.set_title(f"Scatter: {scatter_x} vs {scatter_y}")
-        paths[2] = fig_to_png_path(fig_sc)
-        corr = df[num_cols].corr(numeric_only=True)
-        fig_corr = plot_correlogram_annotated(corr)
-        paths[3] = fig_to_png_path(fig_corr)
-        pair_max = max(2, min(pair_max, len(num_cols)))
-        grid = plot_pairplot(df, num_cols, max_cols=pair_max)
-        if grid is not None:
-            tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
-            grid.fig.savefig(tmp, bbox_inches="tight", dpi=200)
-            plt.close(grid.fig)
-            paths[4] = tmp
-    return tuple(paths)
-def report_generate(df: pd.DataFrame, dataset_name: str, id_col: str,
-                    barpie_col: str, topn: int, scatter_x: str, scatter_y: str, pair_max: int):
-    if df is None:
-        return None, "Upload a CSV first."
-    num_cols = find_numeric_columns(df)
-    stats_df = basic_numeric_stats(df, num_cols) if len(num_cols) else pd.DataFrame()
-    corr = df[num_cols].corr(numeric_only=True) if len(num_cols) >= 2 else pd.DataFrame()
-    # build graphs (same as graph tab)
-    bar_path, pie_path, sc_path, corr_path, pair_path = graph_make(df, barpie_col, topn, scatter_x, scatter_y, pair_max)
-    graph_paths = []
-    if bar_path: graph_paths.append((f"Bar Chart: {barpie_col}", bar_path))
-    if pie_path: graph_paths.append((f"Pie Chart: {barpie_col}", pie_path))
-    if sc_path: graph_paths.append((f"Scatter: {scatter_x} vs {scatter_y}", sc_path))
-    if corr_path: graph_paths.append(("Correlogram (Annotated)", corr_path))
-    if pair_path: graph_paths.append((f"Pair Plot (first {min(pair_max, len(num_cols))} numeric cols)", pair_path))
-    docx_path = build_docx_report(df, dataset_name or "uploaded.csv", id_col, stats_df, corr, graph_paths)
-    # (optional) cleanup images later is fine; HF temp storage is ok for session
-    return docx_path, "DOCX report generated successfully."
-def search_record(df: pd.DataFrame, id_col: str, query: str):
     if df is None:
-        return "Upload a CSV first.", None
-    if not query:
-        return "Enter a value to search.", None
-    if id_col not in df.columns:
-        return "Select a valid ID column.", None
-    col = df[id_col]
-    result = None
     if pd.api.types.is_numeric_dtype(col):
         try:
-            q = float(query)
-            result = df[df[id_col] == q]
-        except:
-            result = df[col.astype(str) == query]
     else:
-        result = df[col.astype(str) == query]
-    if result is None or result.empty:
         return "No matching record found.", pd.DataFrame()
-    return f"Found {len(result)} record(s).", result
-# ----------------------------
-# UI
-# ----------------------------
 CSS = """
-#titleblock {text-align:center; margin-top: 5px; margin-bottom: 10px;}
 #t1 {font-size:30px; font-weight:800; color:#1E5AA8;}
 #t2 {font-size:18px; font-weight:800; color:#1E5AA8;}
 """
-def logo_html():
-    # place logo.png in repo root
     if os.path.exists("logo.jpg"):
-        # gradio serves files placed in root with relative path in HTML
-        return f"""
-        <div id="titleblock">
-            <img src="file=logo.jpg" style="width:110px; display:block; margin:0 auto;" />
-            <div id="t1">Amrita Manthana</div>
-            <div id="t2">Prof.B.Santhi,SRC,SASTRA</div>
-        </div>
-        """
-    else:
         return """
         <div id="titleblock">
-            <div id="t1">Amrita Manthana</div>
-            <div id="t2">Prof.B.Santhi,SRC,SASTRA</div>
         </div>
         """
-with gr.Blocks(css=CSS, title="Amrita Manthana - EDA (Gradio)") as demo:
-    gr.HTML(logo_html())
     df_state = gr.State(None)
-    cols_state = gr.State([])
-    numcols_state = gr.State([])
     with gr.Row():
-        with gr.Column(scale=1, min_width=320):
-            gr.Markdown("### Data Upload")
             file_in = gr.File(label="Upload CSV", file_types=[".csv"])
-            sep = gr.Dropdown(label="CSV Separator", choices=[",", ";", "\t", "|"], value=",")
-            header_flag = gr.Checkbox(label="First row is header", value=True)
-            load_btn = gr.Button("Load Data", variant="primary")
             load_msg = gr.Textbox(label="Status", interactive=False)
-            preview = gr.Dataframe(label="Preview (first 30 rows)", interactive=False, wrap=True)
         with gr.Column(scale=2):
             with gr.Tabs():
-                with gr.Tab("EDA"):
-                    eda_btn = gr.Button("Compute EDA")
-                    eda_note = gr.Textbox(label="Interpretation", lines=3, interactive=False)
-                    stats_table = gr.Dataframe(label="Descriptive Statistics", interactive=False, wrap=True)
-                    fps_table = gr.Dataframe(label="Five-Point Summary", interactive=False, wrap=True)
-                    skew_table = gr.Dataframe(label="Skewness", interactive=False, wrap=True)
-                    corr_table = gr.Dataframe(label="Correlation (numeric)", interactive=False, wrap=True)
-                    corr_note = gr.Textbox(label="Correlation Interpretation", lines=2, interactive=False)
-                with gr.Tab("Graph"):
-                    gr.Markdown("#### Choose settings, then generate graphs")
-                    barpie_col = gr.Dropdown(label="Column for Bar/Pie", choices=[], value=None)
-                    topn = gr.Slider(label="Top-N categories", minimum=3, maximum=30, value=10, step=1)
-                    scatter_x = gr.Dropdown(label="Scatter X (numeric)", choices=[], value=None)
-                    scatter_y = gr.Dropdown(label="Scatter Y (numeric)", choices=[], value=None)
-                    pair_max = gr.Slider(label="Pair plot max numeric columns", minimum=2, maximum=10, value=6, step=1)
-                    graph_btn = gr.Button("Generate Graphs")
-                    with gr.Row():
-                        bar_img = gr.Image(label="Bar", type="filepath")
-                        pie_img = gr.Image(label="Pie", type="filepath")
-                    with gr.Row():
-                        sc_img = gr.Image(label="Scatter", type="filepath")
-                        corr_img = gr.Image(label="Correlogram (numbers inside)", type="filepath")
-                    pair_img = gr.Image(label="Pair Plot", type="filepath")
-                with gr.Tab("Report"):
-                    gr.Markdown("#### DOCX report (includes all graphs + interpretations)")
-                    id_col_rep = gr.Dropdown(label="Record ID column (for report/search)", choices=[], value=None)
-                    rep_btn = gr.Button("Generate DOCX Report", variant="primary")
-                    rep_status = gr.Textbox(label="Report Status", interactive=False)
-                    rep_file = gr.File(label="Download Report (.docx)")
                 with gr.Tab("Search"):
-                    id_col_search = gr.Dropdown(label="Select ID column", choices=[], value=None)
-                    query = gr.Textbox(label="Enter ID value (exact match)")
                     search_btn = gr.Button("Search")
                     search_msg = gr.Textbox(label="Search Status", interactive=False)
-                    search_out = gr.Dataframe(label="Matching Records", interactive=False, wrap=True)
-    # --- events ---
-    def after_load(file_obj, sep_val, header_val):
-        df, msg, prev, cols, numcols = load_csv(file_obj, sep_val, header_val)
-        # for dropdowns
-        return (
-            df, cols, numcols,
-            msg, prev,
-            gr.update(choices=cols, value=(cols[0] if cols else None)),  # barpie_col
-            gr.update(choices=numcols, value=(numcols[0] if len(numcols) else None)),  # scatter_x
-            gr.update(choices=numcols, value=(numcols[1] if len(numcols) > 1 else None)),  # scatter_y
-            gr.update(choices=cols, value=(cols[0] if cols else None)),  # id_col_rep
-            gr.update(choices=cols, value=(cols[0] if cols else None)),  # id_col_search
-        )
     load_btn.click(
-        after_load,
-        inputs=[file_in, sep, header_flag],
-        outputs=[df_state, cols_state, numcols_state,
-                 load_msg, preview,
-                 barpie_col, scatter_x, scatter_y, id_col_rep, id_col_search]
-    )
-    eda_btn.click(
-        eda_compute,
-        inputs=[df_state],
-        outputs=[eda_note, stats_table, fps_table, skew_table, corr_table, corr_note]
     )
-    graph_btn.click(
-        graph_make,
-        inputs=[df_state, barpie_col, topn, scatter_x, scatter_y, pair_max],
-        outputs=[bar_img, pie_img, sc_img, corr_img, pair_img]
-    )
-    def rep_run(df, idcol, barcol, topn_v, sx, sy, pmx, file_obj):
-        name = file_obj.name if file_obj is not None else "uploaded.csv"
-        path, status = report_generate(df, name, idcol, barcol, topn_v, sx, sy, pmx)
-        return status, path
-    rep_btn.click(
-        rep_run,
-        inputs=[df_state, id_col_rep, barpie_col, topn, scatter_x, scatter_y, pair_max, file_in],
-        outputs=[rep_status, rep_file]
     )
     search_btn.click(
-        search_record,
-        inputs=[df_state, id_col_search, query],
         outputs=[search_msg, search_out]
     )
 if __name__ == "__main__":
     demo.launch()

+# app.py  (Fast-build Hugging Face Gradio)
+# School Mark Analysis: RegNo, Name, Tamil, English, Maths, Science, Social
+# Features: Total, Average, Rank, Remark, subject averages, fail-count (1..5), top-3 overall, top-3 per subject,
+# search by RegNo, download result CSV
 import os
 import tempfile
 import numpy as np
 import pandas as pd
 import gradio as gr
 import matplotlib.pyplot as plt
+SUBJECTS_DEFAULT = ["Tamil", "English", "Maths", "Science", "Social"]
+ID_COL_DEFAULT = "RegNo"
+NAME_COL_DEFAULT = "Name"
+def _clean_columns(df: pd.DataFrame) -> pd.DataFrame:
+    df = df.copy()
+    df.columns = [c.strip() for c in df.columns]
+    return df
+def _validate_and_prepare(df: pd.DataFrame, id_col: str, name_col: str, subjects: list[str]) -> pd.DataFrame:
+    df = _clean_columns(df)
+    missing = [c for c in [id_col, name_col] + subjects if c not in df.columns]
+    if missing:
+        raise ValueError(f"Missing required columns: {missing}")
+    # Ensure subject columns are numeric
+    for s in subjects:
+        df[s] = pd.to_numeric(df[s], errors="coerce")
+    # Basic sanity
+    if df[subjects].isna().all(axis=None):
+        raise ValueError("All subject columns became NaN after numeric conversion. Check CSV data.")
+    return df
+def _remark(avg: float, failed_subjects: int) -> str:
+    if failed_subjects > 0:
+        return "Fail"
+    # Only pass students reach here
+    if avg >= 80:
+        return "Distinction"
+    if 60 <= avg <= 79:
+        return "First Class"
+    if 35 <= avg <= 59:
+        return "Second Class"
+    return "Pass"
+def compute_marks(df: pd.DataFrame, pass_mark: int = 35, id_col: str = ID_COL_DEFAULT,
+                  name_col: str = NAME_COL_DEFAULT, subjects: list[str] = SUBJECTS_DEFAULT):
+    df = _validate_and_prepare(df, id_col, name_col, subjects)
+    out = df.copy()
+    out["Total"] = out[subjects].sum(axis=1)
+    out["Average"] = out[subjects].mean(axis=1)
+    out["Failed_Subjects"] = (out[subjects] < pass_mark).sum(axis=1)
+    out["Remark"] = out.apply(lambda r: _remark(float(r["Average"]), int(r["Failed_Subjects"])), axis=1)
+    # Rank by Total (descending), ties get same minimum rank
+    out["Rank"] = out["Total"].rank(method="min", ascending=False).astype(int)
+    out = out.sort_values(["Rank", id_col], ascending=[True, True]).reset_index(drop=True)
+    # Subject-wise average
+    subj_avg = pd.DataFrame({
+        "Subject": subjects,
+        "Class_Average": [float(out[s].mean()) for s in subjects]
+    })
+    # Fail distribution (1..5)
+    fail_dist = pd.DataFrame({
+        "Failed_Subjects": [1, 2, 3, 4, 5],
+        "Student_Count": [int((out["Failed_Subjects"] == k).sum()) for k in [1, 2, 3, 4, 5]]
+    })
+    # Top 3 overall
+    top3_overall_cols = [id_col, name_col, "Total", "Average", "Rank", "Remark"]
+    top3_overall = out.nsmallest(3, "Rank")[top3_overall_cols]
+    # Top 3 per subject
+    rows = []
+    for s in subjects:
+        t = out.sort_values(s, ascending=False).head(3)[[id_col, name_col, s, "Total", "Average", "Rank", "Remark"]].copy()
+        t.insert(0, "Subject", s)
+        t.rename(columns={s: "Subject_Mark"}, inplace=True)
+        rows.append(t)
+    top3_each_subject = pd.concat(rows, ignore_index=True) if rows else pd.DataFrame()
+    # Summary
+    total_students = out.shape[0]
+    pass_count = int((out["Failed_Subjects"] == 0).sum())
+    fail_count = total_students - pass_count
+    dist_count = int((out["Remark"] == "Distinction").sum())
+    first_count = int((out["Remark"] == "First Class").sum())
+    second_count = int((out["Remark"] == "Second Class").sum())
+    summary = (
+        f"Students: {total_students} | Pass: {pass_count} | Fail: {fail_count} | "
+        f"Distinction: {dist_count} | First Class: {first_count} | Second Class: {second_count}"
+    )
+    return out, subj_avg, fail_dist, top3_overall, top3_each_subject, summary
+def plot_subject_avg(subj_avg: pd.DataFrame):
+    fig, ax = plt.subplots(figsize=(7, 4))
+    ax.bar(subj_avg["Subject"], subj_avg["Class_Average"])
+    ax.set_title("Subject-wise Class Average")
+    ax.set_xlabel("Subject")
+    ax.set_ylabel("Average Marks")
+    ax.set_ylim(0, 100)
+    plt.xticks(rotation=25, ha="right")
+    plt.tight_layout()
+    return fig
+def plot_remark_distribution(result_df: pd.DataFrame):
+    order = ["Distinction", "First Class", "Second Class", "Fail", "Pass"]
+    counts = result_df["Remark"].value_counts()
+    labels = [x for x in order if x in counts.index]
+    values = [int(counts[x]) for x in labels]
+    fig, ax = plt.subplots(figsize=(7, 4))
+    ax.bar(labels, values)
+    ax.set_title("Remark Distribution")
+    ax.set_xlabel("Remark")
+    ax.set_ylabel("Number of Students")
+    plt.xticks(rotation=20, ha="right")
+    plt.tight_layout()
+    return fig
+def load_csv(file_obj):
+    if file_obj is None:
+        return None, "Please upload a CSV.", None
+    try:
+        df = pd.read_csv(file_obj.name)
+        df = _clean_columns(df)
+        msg = f"Loaded: {os.path.basename(file_obj.name)} | Rows={df.shape[0]} | Cols={df.shape[1]}"
+        return df, msg, df.head(20)
+    except Exception as e:
+        return None, f"Could not read CSV: {e}", None
+def run_all(df, pass_mark):
     if df is None:
+        return "Upload a CSV first.", None, None, None, None, None, None, None, None
+    try:
+        result_df, subj_avg, fail_dist, top3_overall, top3_each_subject, summary = compute_marks(
+            df, pass_mark=int(pass_mark)
+        )
+        fig_avg = plot_subject_avg(subj_avg)
+        fig_remark = plot_remark_distribution(result_df)
+        # Save downloadable CSV
+        out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".csv").name
+        result_df.to_csv(out_path, index=False)
+        return summary, result_df, subj_avg, fail_dist, top3_overall, top3_each_subject, fig_avg, fig_remark, out_path
+    except Exception as e:
+        return f"Error: {e}", None, None, None, None, None, None, None, None
+def search_regno(result_df, regno_value):
+    if result_df is None or isinstance(result_df, str):
+        return "Run analysis first.", pd.DataFrame()
+    if not regno_value:
+        return "Enter RegNo to search.", pd.DataFrame()
+    # Exact match (string or numeric)
+    col = result_df[ID_COL_DEFAULT]
+    res = None
     if pd.api.types.is_numeric_dtype(col):
         try:
+            q = float(regno_value)
+            res = result_df[result_df[ID_COL_DEFAULT] == q]
+        except Exception:
+            res = result_df[col.astype(str) == str(regno_value)]
     else:
+        res = result_df[col.astype(str) == str(regno_value)]
+    if res is None or res.empty:
         return "No matching record found.", pd.DataFrame()
+    return f"Found {len(res)} record(s).", res
 CSS = """
+#titleblock {text-align:center; margin-top: 6px; margin-bottom: 8px;}
 #t1 {font-size:30px; font-weight:800; color:#1E5AA8;}
 #t2 {font-size:18px; font-weight:800; color:#1E5AA8;}
 """
+def header_html():
     if os.path.exists("logo.jpg"):
         return """
         <div id="titleblock">
+          <img src="file=logo.jpg" style="width:110px; display:block; margin:0 auto;" />
+          <div id="t1">Amrita Manthana</div>
+          <div id="t2">Prof.B.Santhi,SRC,SASTRA</div>
         </div>
         """
+    return """
+    <div id="titleblock">
+      <div id="t1">Amrita Manthana</div>
+      <div id="t2">Prof.B.Santhi,SRC,SASTRA</div>
+    </div>
+    """
+with gr.Blocks(css=CSS, title="School Mark Analysis") as demo:
+    gr.HTML(header_html())
     df_state = gr.State(None)
+    result_state = gr.State(None)  # stores result_df for search
     with gr.Row():
+        with gr.Column(scale=1, min_width=340):
+            gr.Markdown("### Upload Marks CSV")
             file_in = gr.File(label="Upload CSV", file_types=[".csv"])
+            load_btn = gr.Button("Load CSV", variant="primary")
             load_msg = gr.Textbox(label="Status", interactive=False)
+            preview = gr.Dataframe(label="Preview", interactive=False, wrap=True)
+            gr.Markdown("### Analysis Settings")
+            pass_mark = gr.Slider(label="Pass mark (per subject)", minimum=0, maximum=100, value=35, step=1)
+            run_btn = gr.Button("Run Mark Analysis", variant="primary")
+            summary = gr.Textbox(label="Summary", interactive=False)
+            download_file = gr.File(label="Download Result CSV")
         with gr.Column(scale=2):
             with gr.Tabs():
+                with gr.Tab("Result Table"):
+                    result_table = gr.Dataframe(label="Result (Total, Average, Rank, Remark, Failed_Subjects)", interactive=False, wrap=True)
+                with gr.Tab("Subject Averages"):
+                    subj_avg_table = gr.Dataframe(label="Subject-wise Averages", interactive=False, wrap=True)
+                    avg_plot = gr.Plot(label="Bar Chart: Subject-wise Average")
+                with gr.Tab("Fail Counts"):
+                    fail_dist_table = gr.Dataframe(label="Students failed in 1/2/3/4/5 subjects", interactive=False, wrap=True)
+                with gr.Tab("Toppers"):
+                    top3_overall_table = gr.Dataframe(label="Overall Top 3", interactive=False, wrap=True)
+                    top3_each_subject_table = gr.Dataframe(label="Top 3 in each subject", interactive=False, wrap=True)
+                with gr.Tab("Remarks"):
+                    remark_plot = gr.Plot(label="Remark Distribution")
                 with gr.Tab("Search"):
+                    regno_in = gr.Textbox(label="Enter RegNo (exact match)")
                     search_btn = gr.Button("Search")
                     search_msg = gr.Textbox(label="Search Status", interactive=False)
+                    search_out = gr.Dataframe(label="Matching Record(s)", interactive=False, wrap=True)
+    # Events
     load_btn.click(
+        load_csv,
+        inputs=[file_in],
+        outputs=[df_state, load_msg, preview]
     )
+    def run_and_store(df, pm):
+        s, res, subj, faild, t3, t3sub, f1, f2, fcsv = run_all(df, pm)
+        return s, res, subj, faild, t3, t3sub, f1, f2, fcsv, res
+    run_btn.click(
+        run_and_store,
+        inputs=[df_state, pass_mark],
+        outputs=[summary, result_table, subj_avg_table, fail_dist_table, top3_overall_table, top3_each_subject_table,
+                 avg_plot, remark_plot, download_file, result_state]
     )
     search_btn.click(
+        search_regno,
+        inputs=[result_state, regno_in],
         outputs=[search_msg, search_out]
     )
 if __name__ == "__main__":
     demo.launch()