import pandas as pd import gradio as gr import io import os import tempfile # Define which grades count as completed COMPLETED_GRADES = {"GM", "RI", "RU", "RG", "RB", "RU+", "R1", "R2", "R3", "S"} PROGRESS_GRADE = "MP" def process_files(course_files, crn_file, session_name): # Validate files exist if not course_files or not crn_file: return None, "Please upload both course roster files and a CRN mapping file." # Validate file extensions for file in course_files + [crn_file]: _, ext = os.path.splitext(file.name) if ext.lower() not in ['.xlsx', '.xls']: return None, f"File {file.name} is not an Excel file. Please upload only Excel files." try: # Read CRN mapping (Course Name in A, CRN in E) crn_map_df = pd.read_excel(crn_file.name, header=None) # Convert CRNs to strings for consistent lookup crn_map = {} for idx, row in crn_map_df.iterrows(): if not pd.isna(row[4]) and not pd.isna(row[0]): # Ensure both CRN and course name exist crn_map[str(row[4])] = str(row[0]) students = {} for course_file in course_files: try: df = pd.read_excel(course_file.name, header=None) # Get CRN from the appropriate cell and convert to string crn = str(df.iloc[0, 2]) if not pd.isna(df.iloc[0, 2]) else "Unknown" course_name = crn_map.get(crn, f"Unknown Course (CRN: {crn})") row = 1 # Start at C2/G2 while row < df.shape[0]: try: # Read name, grade code, grade name = df.iloc[row, 2] # C (names) if pd.isna(name): row += 1 continue grade_code = str(df.iloc[row, 3]).strip().upper() if not pd.isna(df.iloc[row, 3]) else "" # D (grade code) grade = str(df.iloc[row, 6]).strip().upper() if not pd.isna(df.iloc[row, 6]) else "" # G (grade) # Convert "Last, First Middle" to "First Middle Last" try: if isinstance(name, str) and "," in name: last, first = [x.strip() for x in str(name).split(",", 1)] full_name = f"{first} {last}" else: full_name = str(name).strip() except Exception: full_name = str(name).strip() if full_name not in students: students[full_name] = {"progress": set(), "completed": set()} # Parse the course name without CRN course = course_name # Just the course name without the CRN if grade == PROGRESS_GRADE: students[full_name]["progress"].add(course) elif grade in COMPLETED_GRADES: students[full_name]["completed"].add(course) except Exception as e: print(f"Error processing row {row} in file {course_file.name}: {e}") row += 1 except Exception as e: print(f"Error processing file {course_file.name}: {e}") continue # Build summary DataFrame summary = [] for full_name, data in students.items(): # Join with newline character for Excel progress = "\n".join(sorted(data["progress"])) completed = "\n".join(sorted(data["completed"])) both_types = "Y" if data["progress"] and data["completed"] else "" summary.append({ "Name": full_name, "Session": session_name, # Add the session name here "Progress_Courses": progress, "Completed_Courses": completed, "Both_Types": both_types }) summary_df = pd.DataFrame(summary) # Sort by student name if not summary_df.empty: summary_df = summary_df.sort_values("Name") # Save to a temporary file temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") temp_file.close() with pd.ExcelWriter(temp_file.name, engine='openpyxl') as writer: summary_df.to_excel(writer, index=False) # Get the worksheet worksheet = writer.sheets['Sheet1'] # Auto-adjust row heights to accommodate multiple lines and wrap text for idx, row in summary_df.iterrows(): # Set row height based on number of newlines in progress and completed columns progress_lines = row['Progress_Courses'].count('\n') + 1 if isinstance(row['Progress_Courses'], str) else 1 completed_lines = row['Completed_Courses'].count('\n') + 1 if isinstance(row['Completed_Courses'], str) else 1 max_lines = max(progress_lines, completed_lines) # Set row height (approximately 15 points per line) worksheet.row_dimensions[idx + 2].height = max(15 * max_lines, 15) # +2 because of header and 1-based indexing # Enable text wrapping for these columns for col_idx in [2, 3]: # Columns C and D (0-indexed would be 2 and 3) for cell in worksheet[chr(65 + col_idx)]: cell.alignment = cell.alignment.copy(wrapText=True) # Auto-adjust column widths for column in worksheet.columns: max_length = 0 column_letter = column[0].column_letter for cell in column: try: if len(str(cell.value)) > max_length: max_length = len(str(cell.value)) except: pass adjusted_width = (max_length + 2) worksheet.column_dimensions[column_letter].width = min(adjusted_width, 50) # Cap width at 50 # Return the path to the temporary file return temp_file.name, "Summary generated successfully!" except Exception as e: print(f"Error generating summary: {e}") return None, f"Error: {str(e)}" iface = gr.Interface( fn=process_files, inputs=[ gr.Files(label="Upload ALL Course Roster Files (.xlsx)", file_count="multiple"), gr.File(label="Upload CRN-to-Course Mapping File (.xlsx)"), gr.Textbox(label="Session (e.g., Fall 1, Spring 2)", placeholder="Enter session name") ], outputs=[ gr.File(label="Download Student Summary Excel", file_types=[".xlsx"]), gr.Textbox(label="Status/Error Messages") ], title="Student Course Summary Generator", description="Upload all course roster Excel files and a CRN-to-course mapping file. Get a summary Excel file per student." ) if __name__ == "__main__": iface.launch()