Spaces:
Configuration error
Configuration error
| import os | |
| import sqlite3 | |
| from datetime import datetime | |
| import numpy as np | |
| import pandas as pd | |
| DB_PATH = os.environ.get("ATTENDANCE_DB_PATH", "attendance.db") | |
| def connect(db_path: str = DB_PATH): | |
| return sqlite3.connect(db_path, check_same_thread=False) | |
| def init_db(conn: sqlite3.Connection): | |
| cur = conn.cursor() | |
| cur.execute(""" | |
| CREATE TABLE IF NOT EXISTS attendance ( | |
| StudentID TEXT, | |
| StudentName TEXT, | |
| Subject TEXT, | |
| Date TEXT, | |
| Status TEXT CHECK(Status IN ('Present','Absent')), | |
| PRIMARY KEY (StudentID, Subject, Date) | |
| ); | |
| """) | |
| conn.commit() | |
| def normalize_status(x: str) -> str: | |
| if x is None: | |
| return "Absent" | |
| x = str(x).strip().lower() | |
| if x in ["p", "present", "1", "yes", "y"]: | |
| return "Present" | |
| return "Absent" | |
| def load_csv_to_df(file_path: str) -> pd.DataFrame: | |
| df = pd.read_csv(file_path) | |
| expected = {"StudentID", "Subject", "Date", "Status"} | |
| missing = expected - set(map(str, df.columns)) | |
| if missing: | |
| raise ValueError(f"Missing required columns: {missing}. Required: {sorted(expected)}") | |
| # Optional column for names | |
| if "StudentName" not in df.columns: | |
| df["StudentName"] = df["StudentID"] | |
| # normalize | |
| df["Status"] = df["Status"].apply(normalize_status) | |
| # validate date | |
| pd.to_datetime(df["Date"], errors="raise") | |
| return df[["StudentID", "StudentName", "Subject", "Date", "Status"]] | |
| def ingest_df(conn: sqlite3.Connection, df: pd.DataFrame): | |
| init_db(conn) | |
| df = df.copy() | |
| df["Status"] = df["Status"].apply(normalize_status) | |
| # ensure Date is string YYYY-MM-DD | |
| df["Date"] = pd.to_datetime(df["Date"]).dt.strftime("%Y-%m-%d") | |
| df.to_sql("attendance", conn, if_exists="append", index=False) | |
| # remove duplicates by replacing with most recent | |
| cur = conn.cursor() | |
| cur.execute(""" | |
| CREATE TEMP TABLE t AS | |
| SELECT * FROM attendance | |
| GROUP BY StudentID, Subject, Date | |
| """) | |
| cur.execute("DELETE FROM attendance") | |
| cur.execute("INSERT INTO attendance SELECT * FROM t") | |
| cur.execute("DROP TABLE t") | |
| conn.commit() | |
| def get_dataframe(conn: sqlite3.Connection, | |
| start_date: str | None = None, | |
| end_date: str | None = None, | |
| subject: str | None = None, | |
| student_id: str | None = None) -> pd.DataFrame: | |
| init_db(conn) | |
| q = "SELECT StudentID, StudentName, Subject, Date, Status FROM attendance WHERE 1=1" | |
| params = [] | |
| if start_date: | |
| q += " AND Date >= ?" | |
| params.append(start_date) | |
| if end_date: | |
| q += " AND Date <= ?" | |
| params.append(end_date) | |
| if subject: | |
| q += " AND Subject = ?" | |
| params.append(subject) | |
| if student_id: | |
| q += " AND StudentID = ?" | |
| params.append(student_id) | |
| q += " ORDER BY Date, Subject, StudentID" | |
| df = pd.read_sql_query(q, conn, params=params) | |
| return df | |
| def compute_percentages_numpy(df: pd.DataFrame) -> pd.DataFrame: | |
| # per student overall percentage using NumPy | |
| if df.empty: | |
| return pd.DataFrame(columns=["StudentID", "StudentName", "TotalDays", "PresentDays", "AttendancePercent"]) | |
| # map present to 1, absent to 0 | |
| status_arr = (df["Status"].to_numpy() == "Present").astype(int) | |
| # group indices by StudentID | |
| by_student = df.groupby(["StudentID", "StudentName"]).indices | |
| records = [] | |
| for (sid, sname), idx in by_student.items(): | |
| vals = status_arr[idx] | |
| total = vals.size | |
| present = int(vals.sum()) | |
| pct = (present / total) * 100 if total else 0.0 | |
| records.append((sid, sname, total, present, pct)) | |
| out = pd.DataFrame(records, columns=["StudentID", "StudentName", "TotalDays", "PresentDays", "AttendancePercent"]) | |
| out = out.sort_values(["AttendancePercent", "StudentID"], ascending=[True, True]).reset_index(drop=True) | |
| return out | |
| def summary_per_subject(df: pd.DataFrame) -> pd.DataFrame: | |
| if df.empty: | |
| return pd.DataFrame(columns=["Subject", "TotalClasses", "AvgAttendancePercent"]) | |
| # per (Student, Subject) attendance percent then average across students | |
| df_bin = df.assign(Present=(df["Status"] == "Present").astype(int)) | |
| per_ss = (df_bin.groupby(["StudentID", "StudentName", "Subject"]) | |
| .agg(Total=("Present","size"), Present=("Present","sum")) | |
| .reset_index()) | |
| per_ss["AttendancePercent"] = per_ss["Present"]/per_ss["Total"]*100 | |
| # subject summary | |
| subject_summary = (per_ss.groupby("Subject") | |
| .agg(TotalClasses=("Total","sum"), | |
| AvgAttendancePercent=("AttendancePercent","mean")) | |
| .reset_index() | |
| .sort_values("Subject")) | |
| return subject_summary | |
| def students_below_threshold(df: pd.DataFrame, threshold: float=75.0) -> pd.DataFrame: | |
| overall = compute_percentages_numpy(df) | |
| low = overall[overall["AttendancePercent"] < threshold].reset_index(drop=True) | |
| return low | |
| def export_csv(df: pd.DataFrame, path: str): | |
| df.to_csv(path, index=False) | |
| return path | |