import pandas as pd
import gradio as gr
import io
import os
import tempfile

# Define which grades count as completed
COMPLETED_GRADES = {"GM", "RI", "RU", "RG", "RB", "RU+", "R1", "R2", "R3", "S"}
PROGRESS_GRADE = "MP"

def process_files(course_files, crn_file, session_name):
    # Validate files exist
    if not course_files or not crn_file:
        return None, "Please upload both course roster files and a CRN mapping file."
    
    # Validate file extensions
    for file in course_files + [crn_file]:
        _, ext = os.path.splitext(file.name)
        if ext.lower() not in ['.xlsx', '.xls']:
            return None, f"File {file.name} is not an Excel file. Please upload only Excel files."
    
    try:
        # Read CRN mapping (Course Name in A, CRN in E)
        crn_map_df = pd.read_excel(crn_file.name, header=None)
        
        # Convert CRNs to strings for consistent lookup
        crn_map = {}
        for idx, row in crn_map_df.iterrows():
            if not pd.isna(row[4]) and not pd.isna(row[0]):  # Ensure both CRN and course name exist
                crn_map[str(row[4])] = str(row[0])
        
        students = {}
        for course_file in course_files:
            try:
                df = pd.read_excel(course_file.name, header=None)
                
                # Get CRN from the appropriate cell and convert to string
                crn = str(df.iloc[0, 2]) if not pd.isna(df.iloc[0, 2]) else "Unknown"
                course_name = crn_map.get(crn, f"Unknown Course (CRN: {crn})")
                
                row = 1  # Start at C2/G2
                while row < df.shape[0]:
                    try:
                        # Read name, grade code, grade
                        name = df.iloc[row, 2]  # C (names)
                        
                        if pd.isna(name):
                            row += 1
                            continue
                        
                        grade_code = str(df.iloc[row, 3]).strip().upper() if not pd.isna(df.iloc[row, 3]) else ""  # D (grade code)
                        grade = str(df.iloc[row, 6]).strip().upper() if not pd.isna(df.iloc[row, 6]) else ""  # G (grade)
                        
                        # Convert "Last, First Middle" to "First Middle Last"
                        try:
                            if isinstance(name, str) and "," in name:
                                last, first = [x.strip() for x in str(name).split(",", 1)]
                                full_name = f"{first} {last}"
                            else:
                                full_name = str(name).strip()
                        except Exception:
                            full_name = str(name).strip()
                        
                        if full_name not in students:
                            students[full_name] = {"progress": set(), "completed": set()}
                        
                        # Parse the course name without CRN
                        course = course_name  # Just the course name without the CRN
                        
                        if grade == PROGRESS_GRADE:
                            students[full_name]["progress"].add(course)
                        elif grade in COMPLETED_GRADES:
                            students[full_name]["completed"].add(course)
                    except Exception as e:
                        print(f"Error processing row {row} in file {course_file.name}: {e}")
                    
                    row += 1
            except Exception as e:
                print(f"Error processing file {course_file.name}: {e}")
                continue
        
        # Build summary DataFrame
        summary = []
        for full_name, data in students.items():
            # Join with newline character for Excel
            progress = "\n".join(sorted(data["progress"]))
            completed = "\n".join(sorted(data["completed"]))
            both_types = "Y" if data["progress"] and data["completed"] else ""
            summary.append({
                "Name": full_name,
                "Session": session_name,  # Add the session name here
                "Progress_Courses": progress,
                "Completed_Courses": completed,
                "Both_Types": both_types
            })
        
        summary_df = pd.DataFrame(summary)
        
        # Sort by student name
        if not summary_df.empty:
            summary_df = summary_df.sort_values("Name")
        
        # Save to a temporary file
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
        temp_file.close()
        
        with pd.ExcelWriter(temp_file.name, engine='openpyxl') as writer:
            summary_df.to_excel(writer, index=False)
            
            # Get the worksheet
            worksheet = writer.sheets['Sheet1']
            
            # Auto-adjust row heights to accommodate multiple lines and wrap text
            for idx, row in summary_df.iterrows():
                # Set row height based on number of newlines in progress and completed columns
                progress_lines = row['Progress_Courses'].count('\n') + 1 if isinstance(row['Progress_Courses'], str) else 1
                completed_lines = row['Completed_Courses'].count('\n') + 1 if isinstance(row['Completed_Courses'], str) else 1
                max_lines = max(progress_lines, completed_lines)
                
                # Set row height (approximately 15 points per line)
                worksheet.row_dimensions[idx + 2].height = max(15 * max_lines, 15)  # +2 because of header and 1-based indexing
            
            # Enable text wrapping for these columns
            for col_idx in [2, 3]:  # Columns C and D (0-indexed would be 2 and 3)
                for cell in worksheet[chr(65 + col_idx)]:
                    cell.alignment = cell.alignment.copy(wrapText=True)
            
            # Auto-adjust column widths
            for column in worksheet.columns:
                max_length = 0
                column_letter = column[0].column_letter
                for cell in column:
                    try:
                        if len(str(cell.value)) > max_length:
                            max_length = len(str(cell.value))
                    except:
                        pass
                adjusted_width = (max_length + 2)
                worksheet.column_dimensions[column_letter].width = min(adjusted_width, 50)  # Cap width at 50
        
        # Return the path to the temporary file
        return temp_file.name, "Summary generated successfully!"
    
    except Exception as e:
        print(f"Error generating summary: {e}")
        return None, f"Error: {str(e)}"

iface = gr.Interface(
    fn=process_files,
    inputs=[
        gr.Files(label="Upload ALL Course Roster Files (.xlsx)", file_count="multiple"),
        gr.File(label="Upload CRN-to-Course Mapping File (.xlsx)"),
        gr.Textbox(label="Session (e.g., Fall 1, Spring 2)", placeholder="Enter session name")
    ],
    outputs=[
        gr.File(label="Download Student Summary Excel", file_types=[".xlsx"]),
        gr.Textbox(label="Status/Error Messages")
    ],
    title="Student Course Summary Generator",
    description="Upload all course roster Excel files and a CRN-to-course mapping file. Get a summary Excel file per student."
)

if __name__ == "__main__":
    iface.launch()