CertGen / app.py
kambris's picture
Update app.py
ca30333 verified
import pandas as pd
import gradio as gr
import io
import os
import tempfile
# Define which grades count as completed
COMPLETED_GRADES = {"GM", "RI", "RU", "RG", "RB", "RU+", "R1", "R2", "R3", "S"}
PROGRESS_GRADE = "MP"
def process_files(course_files, crn_file, session_name):
# Validate files exist
if not course_files or not crn_file:
return None, "Please upload both course roster files and a CRN mapping file."
# Validate file extensions
for file in course_files + [crn_file]:
_, ext = os.path.splitext(file.name)
if ext.lower() not in ['.xlsx', '.xls']:
return None, f"File {file.name} is not an Excel file. Please upload only Excel files."
try:
# Read CRN mapping (Course Name in A, CRN in E)
crn_map_df = pd.read_excel(crn_file.name, header=None)
# Convert CRNs to strings for consistent lookup
crn_map = {}
for idx, row in crn_map_df.iterrows():
if not pd.isna(row[4]) and not pd.isna(row[0]): # Ensure both CRN and course name exist
crn_map[str(row[4])] = str(row[0])
students = {}
for course_file in course_files:
try:
df = pd.read_excel(course_file.name, header=None)
# Get CRN from the appropriate cell and convert to string
crn = str(df.iloc[0, 2]) if not pd.isna(df.iloc[0, 2]) else "Unknown"
course_name = crn_map.get(crn, f"Unknown Course (CRN: {crn})")
row = 1 # Start at C2/G2
while row < df.shape[0]:
try:
# Read name, grade code, grade
name = df.iloc[row, 2] # C (names)
if pd.isna(name):
row += 1
continue
grade_code = str(df.iloc[row, 3]).strip().upper() if not pd.isna(df.iloc[row, 3]) else "" # D (grade code)
grade = str(df.iloc[row, 6]).strip().upper() if not pd.isna(df.iloc[row, 6]) else "" # G (grade)
# Convert "Last, First Middle" to "First Middle Last"
try:
if isinstance(name, str) and "," in name:
last, first = [x.strip() for x in str(name).split(",", 1)]
full_name = f"{first} {last}"
else:
full_name = str(name).strip()
except Exception:
full_name = str(name).strip()
if full_name not in students:
students[full_name] = {"progress": set(), "completed": set()}
# Parse the course name without CRN
course = course_name # Just the course name without the CRN
if grade == PROGRESS_GRADE:
students[full_name]["progress"].add(course)
elif grade in COMPLETED_GRADES:
students[full_name]["completed"].add(course)
except Exception as e:
print(f"Error processing row {row} in file {course_file.name}: {e}")
row += 1
except Exception as e:
print(f"Error processing file {course_file.name}: {e}")
continue
# Build summary DataFrame
summary = []
for full_name, data in students.items():
# Join with newline character for Excel
progress = "\n".join(sorted(data["progress"]))
completed = "\n".join(sorted(data["completed"]))
both_types = "Y" if data["progress"] and data["completed"] else ""
summary.append({
"Name": full_name,
"Session": session_name, # Add the session name here
"Progress_Courses": progress,
"Completed_Courses": completed,
"Both_Types": both_types
})
summary_df = pd.DataFrame(summary)
# Sort by student name
if not summary_df.empty:
summary_df = summary_df.sort_values("Name")
# Save to a temporary file
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
temp_file.close()
with pd.ExcelWriter(temp_file.name, engine='openpyxl') as writer:
summary_df.to_excel(writer, index=False)
# Get the worksheet
worksheet = writer.sheets['Sheet1']
# Auto-adjust row heights to accommodate multiple lines and wrap text
for idx, row in summary_df.iterrows():
# Set row height based on number of newlines in progress and completed columns
progress_lines = row['Progress_Courses'].count('\n') + 1 if isinstance(row['Progress_Courses'], str) else 1
completed_lines = row['Completed_Courses'].count('\n') + 1 if isinstance(row['Completed_Courses'], str) else 1
max_lines = max(progress_lines, completed_lines)
# Set row height (approximately 15 points per line)
worksheet.row_dimensions[idx + 2].height = max(15 * max_lines, 15) # +2 because of header and 1-based indexing
# Enable text wrapping for these columns
for col_idx in [2, 3]: # Columns C and D (0-indexed would be 2 and 3)
for cell in worksheet[chr(65 + col_idx)]:
cell.alignment = cell.alignment.copy(wrapText=True)
# Auto-adjust column widths
for column in worksheet.columns:
max_length = 0
column_letter = column[0].column_letter
for cell in column:
try:
if len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
except:
pass
adjusted_width = (max_length + 2)
worksheet.column_dimensions[column_letter].width = min(adjusted_width, 50) # Cap width at 50
# Return the path to the temporary file
return temp_file.name, "Summary generated successfully!"
except Exception as e:
print(f"Error generating summary: {e}")
return None, f"Error: {str(e)}"
iface = gr.Interface(
fn=process_files,
inputs=[
gr.Files(label="Upload ALL Course Roster Files (.xlsx)", file_count="multiple"),
gr.File(label="Upload CRN-to-Course Mapping File (.xlsx)"),
gr.Textbox(label="Session (e.g., Fall 1, Spring 2)", placeholder="Enter session name")
],
outputs=[
gr.File(label="Download Student Summary Excel", file_types=[".xlsx"]),
gr.Textbox(label="Status/Error Messages")
],
title="Student Course Summary Generator",
description="Upload all course roster Excel files and a CRN-to-course mapping file. Get a summary Excel file per student."
)
if __name__ == "__main__":
iface.launch()