import pandas as pd import numpy as np import gradio as gr import os def process_admissions(file_path, capacity=120): """ Loads, filters, and processes student admission data based on defined rules. Args: file_path (str): The path to the CSV file containing the student data. capacity (int): The maximum number of students to admit. Returns: tuple: A tuple containing: - pd.DataFrame: A DataFrame containing all students with admission status ('正取' or '備取'), or an empty DataFrame if an error occurred or no valid data. - str: The file path to the saved CSV, or None if an error occurred or no valid data. """ try: df = pd.read_csv(file_path) print("DataFrame loaded successfully.") if df is not None: # Filter out rows where any column contains '未填報-未填寫' df_filtered = df[~(df == '未填報-未填寫').any(axis=1)].copy() print(f"DataFrame after filtering: {len(df_filtered)} rows remaining.") if df_filtered.empty: print("No valid entries found after filtering.") return pd.DataFrame(), None # Shuffle the filtered DataFrame to ensure random selection within priority groups #df_shuffled = df_filtered.sample(frac=1, random_state=42).reset_index(drop=True) df_shuffled = df_filtered.sample(frac=1).reset_index(drop=True) admitted_students_list = [] remaining_df = df_shuffled.copy() # Prioritize one person per institution, up to capacity first_admissions_indices = remaining_df.groupby('機關名稱').head(1).index num_first_admissions = min(len(first_admissions_indices), capacity) admitted_students_list.append(remaining_df.loc[first_admissions_indices[:num_first_admissions]]) remaining_df = remaining_df.drop(first_admissions_indices[:num_first_admissions]) # Prioritize a second person per institution if capacity permits current_admitted_count = sum(len(df) for df in admitted_students_list) if current_admitted_count < capacity: second_admissions_indices = remaining_df.groupby('機關名稱').head(1).index additional_capacity = capacity - current_admitted_count num_second_admissions = min(len(second_admissions_indices), additional_capacity) admitted_students_list.append(remaining_df.loc[second_admissions_indices[:num_second_admissions]]) remaining_df = remaining_df.drop(second_admissions_indices[:num_second_admissions]) # Allow more if capacity permits, up to the total capacity current_admitted_count = sum(len(df) for df in admitted_students_list) if current_admitted_count < capacity: additional_capacity = capacity - current_admitted_count additional_admissions_indices = remaining_df.head(additional_capacity).index admitted_students_list.append(remaining_df.loc[additional_admissions_indices]) remaining_df = remaining_df.drop(additional_admissions_indices) # Combine all selected students and drop duplicates admitted_df = pd.concat(admitted_students_list).drop_duplicates().reset_index(drop=True) # Assign '正取' status and order if not admitted_df.empty: admitted_df['錄取順序'] = [f'正取{i+1}' for i in range(len(admitted_df))] # Assign '備取' status to the remaining students if not remaining_df.empty: remaining_df['錄取順序'] = [f'備取{i+1}' for i in range(len(remaining_df))] else: remaining_df['錄取順序'] = [] # Ensure column exists even if empty # Combine admitted and remaining students final_df = pd.concat([admitted_df, remaining_df]).reset_index(drop=True) print("Final Student List with Admission Status:") print(final_df.head()) # 用 print 輸出前幾列 print("\nFinal Student List Info:") print(final_df.info()) # 仍然可以看 info,用 print 包住就好 # Save the processed DataFrame to a temporary CSV file output_csv_path = "admitted_students_processed.csv" final_df.to_csv(output_csv_path, index=False, encoding='utf-8-sig') print(f"Processed data saved to {output_csv_path}") return final_df, output_csv_path else: print("DataFrame not loaded. Cannot perform processing.") return pd.DataFrame(), None except FileNotFoundError: print(f"Error: '{file_path}' not found. Please ensure the file is in the correct directory.") return pd.DataFrame(), None except Exception as e: print(f"An error occurred during processing: {e}") return pd.DataFrame(), None # Assuming process_admissions is defined and available in the environment # It should return a tuple: (DataFrame, file_path) def process_admissions_and_return_df_and_file(file_path, capacity): """ Processes student admission data and returns the DataFrame and the path to the saved CSV. Args: file_path (str): The path to the uploaded CSV file. capacity (int): The maximum number of students to admit. Returns: tuple: A tuple containing: - pd.DataFrame: The processed DataFrame (including 正取 and 備取). - str: The file path to the saved CSV, or None if processing failed. """ final_df, output_csv_path = process_admissions(file_path, capacity) # Call process_admissions which returns final_df and path return final_df, output_csv_path # Return final_df and path if 'process_admissions' in locals(): # Define the Gradio interface interface = gr.Interface( fn=process_admissions_and_return_df_and_file, inputs=[ gr.File(label="Upload CSV File"), gr.Number(label="Admission Capacity", value=120, precision=0) # Add number input for capacity ], outputs=[ gr.Dataframe(label="Admitted Students List"), # Changed label to reflect it includes all students gr.File(label="Download Admitted Students CSV") ], title="Student Admission Processing", description="Upload a CSV file to process student admissions based on institutional priority and capacity, and get a list with admission order and a downloadable CSV." ) print("Gradio interface designed with capacity input.") else: print("The 'process_admissions' function is not defined. Please ensure the data processing logic is defined in a previous step.") if 'interface' in locals(): print("Launching Gradio interface...") interface.launch(debug=True, share=False) else: print("Gradio interface not found. Please ensure the previous steps were executed successfully.")