|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
from sentence_transformers import SentenceTransformer |
|
|
|
|
|
|
|
|
model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
|
|
|
|
|
def process_file(uploaded_file): |
|
|
try: |
|
|
if uploaded_file.name.endswith('.xlsx') or uploaded_file.name.endswith('.xls'): |
|
|
df = pd.read_excel(uploaded_file) |
|
|
elif uploaded_file.name.endswith('.csv'): |
|
|
df = pd.read_csv(uploaded_file) |
|
|
elif uploaded_file.name.endswith('.pdf'): |
|
|
from PyPDF2 import PdfReader |
|
|
reader = PdfReader(uploaded_file) |
|
|
text = "".join(page.extract_text() for page in reader.pages) |
|
|
|
|
|
df = pd.DataFrame([row.split() for row in text.splitlines()], columns=["Name", "Grade", "Marks"]) |
|
|
|
|
|
df["Marks"] = pd.to_numeric(df["Marks"], errors='coerce') |
|
|
else: |
|
|
st.error("Unsupported file format. Please upload Excel, CSV, or PDF.") |
|
|
return None |
|
|
|
|
|
|
|
|
df.columns = df.columns.str.strip() |
|
|
st.write("DataFrame columns after cleaning:", df.columns) |
|
|
|
|
|
return df |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Error processing file: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
def main(): |
|
|
st.title("School Performance Analysis App") |
|
|
st.write("Upload a document containing student grades and marks to analyze their performance.") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload Excel, CSV, or PDF file", type=["xlsx", "xls", "csv", "pdf"]) |
|
|
|
|
|
if uploaded_file: |
|
|
df = process_file(uploaded_file) |
|
|
if df is not None: |
|
|
st.subheader("Uploaded Data") |
|
|
st.write(df.head()) |
|
|
|
|
|
|
|
|
required_columns = ["Name", "Class", "Maths", "Urdu", "English"] |
|
|
missing_columns = [col for col in required_columns if col not in df.columns] |
|
|
|
|
|
if missing_columns: |
|
|
st.error(f"Missing columns: {', '.join(missing_columns)}. Please check your data.") |
|
|
return |
|
|
|
|
|
|
|
|
marks_columns = ["Maths", "Urdu", "English"] |
|
|
for col in marks_columns: |
|
|
df[col] = pd.to_numeric(df[col], errors='coerce') |
|
|
|
|
|
|
|
|
df['Total Marks'] = df[marks_columns].sum(axis=1) |
|
|
|
|
|
|
|
|
top_performers = df.sort_values(by="Total Marks", ascending=False).head(10) |
|
|
st.subheader("Top 10 Students Based on Total Marks") |
|
|
st.write(top_performers[["Name", "Class", "Total Marks"]]) |
|
|
|
|
|
|
|
|
df['Embedding'] = df.apply(lambda row: model.encode(f"{row['Name']} {row['Class']} {row['Maths']} {row['Urdu']} {row['English']}"), axis=1) |
|
|
|
|
|
|
|
|
st.subheader("Search for a Student") |
|
|
search_query = st.text_input("Enter the student's name:") |
|
|
|
|
|
if search_query: |
|
|
|
|
|
search_embedding = model.encode(search_query) |
|
|
df['Similarity'] = df['Embedding'].apply(lambda emb: (emb @ search_embedding) / (emb.dot(emb) ** 0.5)) |
|
|
|
|
|
result = df.sort_values(by="Similarity", ascending=False).iloc[0] |
|
|
st.write("Search Result:") |
|
|
st.write(result[["Name", "Class", "Maths", "Urdu", "English", "Total Marks"]]) |
|
|
|
|
|
|
|
|
st.subheader("Updated Data with Total Marks") |
|
|
st.write(df) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|