Students / app.py
AzizWazir's picture
Update app.py
8ad8470 verified
import streamlit as st
import pandas as pd
from sentence_transformers import SentenceTransformer
# Load pre-trained model for embedding
model = SentenceTransformer('all-MiniLM-L6-v2')
# Function to process uploaded files
def process_file(uploaded_file):
try:
if uploaded_file.name.endswith('.xlsx') or uploaded_file.name.endswith('.xls'):
df = pd.read_excel(uploaded_file)
elif uploaded_file.name.endswith('.csv'):
df = pd.read_csv(uploaded_file)
elif uploaded_file.name.endswith('.pdf'):
from PyPDF2 import PdfReader
reader = PdfReader(uploaded_file)
text = "".join(page.extract_text() for page in reader.pages)
# Assuming a format with Name, Grade, Marks per line
df = pd.DataFrame([row.split() for row in text.splitlines()], columns=["Name", "Grade", "Marks"])
# Convert marks column to numeric
df["Marks"] = pd.to_numeric(df["Marks"], errors='coerce')
else:
st.error("Unsupported file format. Please upload Excel, CSV, or PDF.")
return None
# Clean column names (remove spaces or fix names)
df.columns = df.columns.str.strip()
st.write("DataFrame columns after cleaning:", df.columns)
return df
except Exception as e:
st.error(f"Error processing file: {e}")
return None
# Main app
def main():
st.title("School Performance Analysis App")
st.write("Upload a document containing student grades and marks to analyze their performance.")
uploaded_file = st.file_uploader("Upload Excel, CSV, or PDF file", type=["xlsx", "xls", "csv", "pdf"])
if uploaded_file:
df = process_file(uploaded_file)
if df is not None:
st.subheader("Uploaded Data")
st.write(df.head())
# Ensure the required columns exist (excluding "S.No.", "Science", "History", "Islamiat", "Geography")
required_columns = ["Name", "Class", "Maths", "Urdu", "English"]
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
st.error(f"Missing columns: {', '.join(missing_columns)}. Please check your data.")
return
# Convert marks columns to numeric (if not already numeric)
marks_columns = ["Maths", "Urdu", "English"]
for col in marks_columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
# Calculate the Total Marks (sum of all subject columns)
df['Total Marks'] = df[marks_columns].sum(axis=1)
# Show the top 10 students based on Total Marks
top_performers = df.sort_values(by="Total Marks", ascending=False).head(10)
st.subheader("Top 10 Students Based on Total Marks")
st.write(top_performers[["Name", "Class", "Total Marks"]])
# Add embedding column (for searching students by name or details)
df['Embedding'] = df.apply(lambda row: model.encode(f"{row['Name']} {row['Class']} {row['Maths']} {row['Urdu']} {row['English']}"), axis=1)
# Search functionality
st.subheader("Search for a Student")
search_query = st.text_input("Enter the student's name:")
if search_query:
# Find the most similar student based on embeddings
search_embedding = model.encode(search_query)
df['Similarity'] = df['Embedding'].apply(lambda emb: (emb @ search_embedding) / (emb.dot(emb) ** 0.5))
# Get the student with the highest similarity score
result = df.sort_values(by="Similarity", ascending=False).iloc[0]
st.write("Search Result:")
st.write(result[["Name", "Class", "Maths", "Urdu", "English", "Total Marks"]])
# Show the updated data with Total Marks column
st.subheader("Updated Data with Total Marks")
st.write(df)
if __name__ == "__main__":
main()