Update app.py
Browse files
app.py
CHANGED
|
@@ -16,7 +16,10 @@ def process_file(uploaded_file):
|
|
| 16 |
from PyPDF2 import PdfReader
|
| 17 |
reader = PdfReader(uploaded_file)
|
| 18 |
text = "".join(page.extract_text() for page in reader.pages)
|
|
|
|
| 19 |
df = pd.DataFrame([row.split() for row in text.splitlines()], columns=["Name", "Grade", "Marks"])
|
|
|
|
|
|
|
| 20 |
else:
|
| 21 |
st.error("Unsupported file format. Please upload Excel, CSV, or PDF.")
|
| 22 |
return None
|
|
@@ -30,7 +33,7 @@ def main():
|
|
| 30 |
st.title("School Performance Analysis App")
|
| 31 |
st.write("Upload a document containing student grades and marks to analyze their performance.")
|
| 32 |
|
| 33 |
-
uploaded_file = st.file_uploader("Upload
|
| 34 |
|
| 35 |
if uploaded_file:
|
| 36 |
df = process_file(uploaded_file)
|
|
@@ -38,23 +41,37 @@ def main():
|
|
| 38 |
st.subheader("Uploaded Data")
|
| 39 |
st.write(df.head())
|
| 40 |
|
| 41 |
-
#
|
| 42 |
-
df['
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
#
|
| 45 |
-
|
| 46 |
-
st.subheader("Top 10 Students")
|
| 47 |
-
st.write(
|
| 48 |
|
|
|
|
|
|
|
|
|
|
| 49 |
# Search functionality
|
| 50 |
st.subheader("Search for a Student")
|
| 51 |
search_query = st.text_input("Enter the student's name or details:")
|
|
|
|
| 52 |
if search_query:
|
|
|
|
| 53 |
search_embedding = model.encode(search_query)
|
| 54 |
df['Similarity'] = df['Embedding'].apply(lambda emb: (emb @ search_embedding) / (emb.dot(emb) ** 0.5))
|
|
|
|
| 55 |
result = df.sort_values(by="Similarity", ascending=False).iloc[0]
|
| 56 |
st.write("Search Result:")
|
| 57 |
-
st.write(result[["Name", "Grade", "Marks"]])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
if __name__ == "__main__":
|
| 60 |
main()
|
|
|
|
| 16 |
from PyPDF2 import PdfReader
|
| 17 |
reader = PdfReader(uploaded_file)
|
| 18 |
text = "".join(page.extract_text() for page in reader.pages)
|
| 19 |
+
# Assuming a format with Name, Grade, Marks per line
|
| 20 |
df = pd.DataFrame([row.split() for row in text.splitlines()], columns=["Name", "Grade", "Marks"])
|
| 21 |
+
# Convert marks column to numeric
|
| 22 |
+
df["Marks"] = pd.to_numeric(df["Marks"], errors='coerce')
|
| 23 |
else:
|
| 24 |
st.error("Unsupported file format. Please upload Excel, CSV, or PDF.")
|
| 25 |
return None
|
|
|
|
| 33 |
st.title("School Performance Analysis App")
|
| 34 |
st.write("Upload a document containing student grades and marks to analyze their performance.")
|
| 35 |
|
| 36 |
+
uploaded_file = st.file_uploader("Upload Excel, CSV, or PDF file", type=["xlsx", "xls", "csv", "pdf"])
|
| 37 |
|
| 38 |
if uploaded_file:
|
| 39 |
df = process_file(uploaded_file)
|
|
|
|
| 41 |
st.subheader("Uploaded Data")
|
| 42 |
st.write(df.head())
|
| 43 |
|
| 44 |
+
# Convert Marks to numeric (if not already numeric)
|
| 45 |
+
df['Marks'] = pd.to_numeric(df['Marks'], errors='coerce')
|
| 46 |
+
|
| 47 |
+
# Add a Total Marks column (sum of all marks if there are multiple subjects)
|
| 48 |
+
# Assuming "Marks" column represents the total of all subjects for simplicity.
|
| 49 |
+
df['Total Marks'] = df['Marks'].sum(axis=1)
|
| 50 |
|
| 51 |
+
# Show the top 10 students based on total marks
|
| 52 |
+
top_performers = df.sort_values(by="Total Marks", ascending=False).head(10)
|
| 53 |
+
st.subheader("Top 10 Students Based on Total Marks")
|
| 54 |
+
st.write(top_performers[["Name", "Grade", "Total Marks"]])
|
| 55 |
|
| 56 |
+
# Add embedding column (for searching students by name or details)
|
| 57 |
+
df['Embedding'] = df.apply(lambda row: model.encode(f"{row['Name']} {row['Grade']} {row['Marks']}"), axis=1)
|
| 58 |
+
|
| 59 |
# Search functionality
|
| 60 |
st.subheader("Search for a Student")
|
| 61 |
search_query = st.text_input("Enter the student's name or details:")
|
| 62 |
+
|
| 63 |
if search_query:
|
| 64 |
+
# Find the most similar student based on embeddings
|
| 65 |
search_embedding = model.encode(search_query)
|
| 66 |
df['Similarity'] = df['Embedding'].apply(lambda emb: (emb @ search_embedding) / (emb.dot(emb) ** 0.5))
|
| 67 |
+
# Get the student with the highest similarity score
|
| 68 |
result = df.sort_values(by="Similarity", ascending=False).iloc[0]
|
| 69 |
st.write("Search Result:")
|
| 70 |
+
st.write(result[["Name", "Grade", "Marks", "Total Marks"]])
|
| 71 |
+
|
| 72 |
+
# Show the updated data
|
| 73 |
+
st.subheader("Updated Data with Total Marks")
|
| 74 |
+
st.write(df)
|
| 75 |
|
| 76 |
if __name__ == "__main__":
|
| 77 |
main()
|