Spaces:
Sleeping
Sleeping
File size: 7,097 Bytes
78d5d5c 3ddbbfc 78d5d5c 9df17b8 78d5d5c 9df17b8 78d5d5c 3ddbbfc 78d5d5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import pandas as pd
import numpy as np
import gradio as gr
import os
def process_admissions(file_path, capacity=120):
"""
Loads, filters, and processes student admission data based on defined rules.
Args:
file_path (str): The path to the CSV file containing the student data.
capacity (int): The maximum number of students to admit.
Returns:
tuple: A tuple containing:
- pd.DataFrame: A DataFrame containing all students with admission status ('正取' or '備取'),
or an empty DataFrame if an error occurred or no valid data.
- str: The file path to the saved CSV, or None if an error occurred or no valid data.
"""
try:
df = pd.read_csv(file_path)
print("DataFrame loaded successfully.")
if df is not None:
# Filter out rows where any column contains '未填報-未填寫'
df_filtered = df[~(df == '未填報-未填寫').any(axis=1)].copy()
print(f"DataFrame after filtering: {len(df_filtered)} rows remaining.")
if df_filtered.empty:
print("No valid entries found after filtering.")
return pd.DataFrame(), None
# Shuffle the filtered DataFrame to ensure random selection within priority groups
#df_shuffled = df_filtered.sample(frac=1, random_state=42).reset_index(drop=True)
df_shuffled = df_filtered.sample(frac=1).reset_index(drop=True)
admitted_students_list = []
remaining_df = df_shuffled.copy()
# Prioritize one person per institution, up to capacity
first_admissions_indices = remaining_df.groupby('機關名稱').head(1).index
num_first_admissions = min(len(first_admissions_indices), capacity)
admitted_students_list.append(remaining_df.loc[first_admissions_indices[:num_first_admissions]])
remaining_df = remaining_df.drop(first_admissions_indices[:num_first_admissions])
# Prioritize a second person per institution if capacity permits
current_admitted_count = sum(len(df) for df in admitted_students_list)
if current_admitted_count < capacity:
second_admissions_indices = remaining_df.groupby('機關名稱').head(1).index
additional_capacity = capacity - current_admitted_count
num_second_admissions = min(len(second_admissions_indices), additional_capacity)
admitted_students_list.append(remaining_df.loc[second_admissions_indices[:num_second_admissions]])
remaining_df = remaining_df.drop(second_admissions_indices[:num_second_admissions])
# Allow more if capacity permits, up to the total capacity
current_admitted_count = sum(len(df) for df in admitted_students_list)
if current_admitted_count < capacity:
additional_capacity = capacity - current_admitted_count
additional_admissions_indices = remaining_df.head(additional_capacity).index
admitted_students_list.append(remaining_df.loc[additional_admissions_indices])
remaining_df = remaining_df.drop(additional_admissions_indices)
# Combine all selected students and drop duplicates
admitted_df = pd.concat(admitted_students_list).drop_duplicates().reset_index(drop=True)
# Assign '正取' status and order
if not admitted_df.empty:
admitted_df['錄取順序'] = [f'正取{i+1}' for i in range(len(admitted_df))]
# Assign '備取' status to the remaining students
if not remaining_df.empty:
remaining_df['錄取順序'] = [f'備取{i+1}' for i in range(len(remaining_df))]
else:
remaining_df['錄取順序'] = [] # Ensure column exists even if empty
# Combine admitted and remaining students
final_df = pd.concat([admitted_df, remaining_df]).reset_index(drop=True)
print("Final Student List with Admission Status:")
print(final_df.head()) # 用 print 輸出前幾列
print("\nFinal Student List Info:")
print(final_df.info()) # 仍然可以看 info,用 print 包住就好
# Save the processed DataFrame to a temporary CSV file
output_csv_path = "admitted_students_processed.csv"
final_df.to_csv(output_csv_path, index=False, encoding='utf-8-sig')
print(f"Processed data saved to {output_csv_path}")
return final_df, output_csv_path
else:
print("DataFrame not loaded. Cannot perform processing.")
return pd.DataFrame(), None
except FileNotFoundError:
print(f"Error: '{file_path}' not found. Please ensure the file is in the correct directory.")
return pd.DataFrame(), None
except Exception as e:
print(f"An error occurred during processing: {e}")
return pd.DataFrame(), None
# Assuming process_admissions is defined and available in the environment
# It should return a tuple: (DataFrame, file_path)
def process_admissions_and_return_df_and_file(file_path, capacity):
"""
Processes student admission data and returns the DataFrame and the path to the saved CSV.
Args:
file_path (str): The path to the uploaded CSV file.
capacity (int): The maximum number of students to admit.
Returns:
tuple: A tuple containing:
- pd.DataFrame: The processed DataFrame (including 正取 and 備取).
- str: The file path to the saved CSV, or None if processing failed.
"""
final_df, output_csv_path = process_admissions(file_path, capacity) # Call process_admissions which returns final_df and path
return final_df, output_csv_path # Return final_df and path
if 'process_admissions' in locals():
# Define the Gradio interface
interface = gr.Interface(
fn=process_admissions_and_return_df_and_file,
inputs=[
gr.File(label="Upload CSV File"),
gr.Number(label="Admission Capacity", value=120, precision=0) # Add number input for capacity
],
outputs=[
gr.Dataframe(label="Admitted Students List"), # Changed label to reflect it includes all students
gr.File(label="Download Admitted Students CSV")
],
title="Student Admission Processing",
description="Upload a CSV file to process student admissions based on institutional priority and capacity, and get a list with admission order and a downloadable CSV."
)
print("Gradio interface designed with capacity input.")
else:
print("The 'process_admissions' function is not defined. Please ensure the data processing logic is defined in a previous step.")
if 'interface' in locals():
print("Launching Gradio interface...")
interface.launch(debug=True, share=False)
else:
print("Gradio interface not found. Please ensure the previous steps were executed successfully.") |