import streamlit as st import pandas as pd from sentence_transformers import SentenceTransformer # Load pre-trained model for embedding model = SentenceTransformer('all-MiniLM-L6-v2') # Function to process uploaded files def process_file(uploaded_file): try: if uploaded_file.name.endswith('.xlsx') or uploaded_file.name.endswith('.xls'): df = pd.read_excel(uploaded_file) elif uploaded_file.name.endswith('.csv'): df = pd.read_csv(uploaded_file) elif uploaded_file.name.endswith('.pdf'): from PyPDF2 import PdfReader reader = PdfReader(uploaded_file) text = "".join(page.extract_text() for page in reader.pages) # Assuming a format with Name, Grade, Marks per line df = pd.DataFrame([row.split() for row in text.splitlines()], columns=["Name", "Grade", "Marks"]) # Convert marks column to numeric df["Marks"] = pd.to_numeric(df["Marks"], errors='coerce') else: st.error("Unsupported file format. Please upload Excel, CSV, or PDF.") return None # Clean column names (remove spaces or fix names) df.columns = df.columns.str.strip() st.write("DataFrame columns after cleaning:", df.columns) return df except Exception as e: st.error(f"Error processing file: {e}") return None # Main app def main(): st.title("School Performance Analysis App") st.write("Upload a document containing student grades and marks to analyze their performance.") uploaded_file = st.file_uploader("Upload Excel, CSV, or PDF file", type=["xlsx", "xls", "csv", "pdf"]) if uploaded_file: df = process_file(uploaded_file) if df is not None: st.subheader("Uploaded Data") st.write(df.head()) # Ensure the required columns exist (excluding "S.No.", "Science", "History", "Islamiat", "Geography") required_columns = ["Name", "Class", "Maths", "Urdu", "English"] missing_columns = [col for col in required_columns if col not in df.columns] if missing_columns: st.error(f"Missing columns: {', '.join(missing_columns)}. Please check your data.") return # Convert marks columns to numeric (if not already numeric) marks_columns = ["Maths", "Urdu", "English"] for col in marks_columns: df[col] = pd.to_numeric(df[col], errors='coerce') # Calculate the Total Marks (sum of all subject columns) df['Total Marks'] = df[marks_columns].sum(axis=1) # Show the top 10 students based on Total Marks top_performers = df.sort_values(by="Total Marks", ascending=False).head(10) st.subheader("Top 10 Students Based on Total Marks") st.write(top_performers[["Name", "Class", "Total Marks"]]) # Add embedding column (for searching students by name or details) df['Embedding'] = df.apply(lambda row: model.encode(f"{row['Name']} {row['Class']} {row['Maths']} {row['Urdu']} {row['English']}"), axis=1) # Search functionality st.subheader("Search for a Student") search_query = st.text_input("Enter the student's name:") if search_query: # Find the most similar student based on embeddings search_embedding = model.encode(search_query) df['Similarity'] = df['Embedding'].apply(lambda emb: (emb @ search_embedding) / (emb.dot(emb) ** 0.5)) # Get the student with the highest similarity score result = df.sort_values(by="Similarity", ascending=False).iloc[0] st.write("Search Result:") st.write(result[["Name", "Class", "Maths", "Urdu", "English", "Total Marks"]]) # Show the updated data with Total Marks column st.subheader("Updated Data with Total Marks") st.write(df) if __name__ == "__main__": main()