Spaces:

AzizWazir
/

Students

Sleeping

App Files Files Community

Students / app.py

AzizWazir

Update app.py

8ad8470 verified 12 months ago

raw

history blame contribute delete

4.13 kB

	import streamlit as st
	import pandas as pd
	from sentence_transformers import SentenceTransformer

	# Load pre-trained model for embedding
	model = SentenceTransformer('all-MiniLM-L6-v2')

	# Function to process uploaded files
	def process_file(uploaded_file):
	try:
	if uploaded_file.name.endswith('.xlsx') or uploaded_file.name.endswith('.xls'):
	df = pd.read_excel(uploaded_file)
	elif uploaded_file.name.endswith('.csv'):
	df = pd.read_csv(uploaded_file)
	elif uploaded_file.name.endswith('.pdf'):
	from PyPDF2 import PdfReader
	reader = PdfReader(uploaded_file)
	text = "".join(page.extract_text() for page in reader.pages)
	# Assuming a format with Name, Grade, Marks per line
	df = pd.DataFrame([row.split() for row in text.splitlines()], columns=["Name", "Grade", "Marks"])
	# Convert marks column to numeric
	df["Marks"] = pd.to_numeric(df["Marks"], errors='coerce')
	else:
	st.error("Unsupported file format. Please upload Excel, CSV, or PDF.")
	return None

	# Clean column names (remove spaces or fix names)
	df.columns = df.columns.str.strip()
	st.write("DataFrame columns after cleaning:", df.columns)

	return df

	except Exception as e:
	st.error(f"Error processing file: {e}")
	return None

	# Main app
	def main():
	st.title("School Performance Analysis App")
	st.write("Upload a document containing student grades and marks to analyze their performance.")

	uploaded_file = st.file_uploader("Upload Excel, CSV, or PDF file", type=["xlsx", "xls", "csv", "pdf"])

	if uploaded_file:
	df = process_file(uploaded_file)
	if df is not None:
	st.subheader("Uploaded Data")
	st.write(df.head())

	# Ensure the required columns exist (excluding "S.No.", "Science", "History", "Islamiat", "Geography")
	required_columns = ["Name", "Class", "Maths", "Urdu", "English"]
	missing_columns = [col for col in required_columns if col not in df.columns]

	if missing_columns:
	st.error(f"Missing columns: {', '.join(missing_columns)}. Please check your data.")
	return

	# Convert marks columns to numeric (if not already numeric)
	marks_columns = ["Maths", "Urdu", "English"]
	for col in marks_columns:
	df[col] = pd.to_numeric(df[col], errors='coerce')

	# Calculate the Total Marks (sum of all subject columns)
	df['Total Marks'] = df[marks_columns].sum(axis=1)

	# Show the top 10 students based on Total Marks
	top_performers = df.sort_values(by="Total Marks", ascending=False).head(10)
	st.subheader("Top 10 Students Based on Total Marks")
	st.write(top_performers[["Name", "Class", "Total Marks"]])

	# Add embedding column (for searching students by name or details)
	df['Embedding'] = df.apply(lambda row: model.encode(f"{row['Name']} {row['Class']} {row['Maths']} {row['Urdu']} {row['English']}"), axis=1)

	# Search functionality
	st.subheader("Search for a Student")
	search_query = st.text_input("Enter the student's name:")

	if search_query:
	# Find the most similar student based on embeddings
	search_embedding = model.encode(search_query)
	df['Similarity'] = df['Embedding'].apply(lambda emb: (emb @ search_embedding) / (emb.dot(emb) ** 0.5))
	# Get the student with the highest similarity score
	result = df.sort_values(by="Similarity", ascending=False).iloc[0]
	st.write("Search Result:")
	st.write(result[["Name", "Class", "Maths", "Urdu", "English", "Total Marks"]])

	# Show the updated data with Total Marks column
	st.subheader("Updated Data with Total Marks")
	st.write(df)

	if __name__ == "__main__":
	main()