File size: 7,097 Bytes
78d5d5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ddbbfc
 
 
78d5d5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9df17b8
78d5d5c
9df17b8
78d5d5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ddbbfc
78d5d5c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import pandas as pd
import numpy as np
import gradio as gr
import os

def process_admissions(file_path, capacity=120):
    """
    Loads, filters, and processes student admission data based on defined rules.

    Args:
        file_path (str): The path to the CSV file containing the student data.
        capacity (int): The maximum number of students to admit.

    Returns:
        tuple: A tuple containing:
            - pd.DataFrame: A DataFrame containing all students with admission status ('正取' or '備取'),
                            or an empty DataFrame if an error occurred or no valid data.
            - str: The file path to the saved CSV, or None if an error occurred or no valid data.
    """
    try:
        df = pd.read_csv(file_path)
        print("DataFrame loaded successfully.")

        if df is not None:
            # Filter out rows where any column contains '未填報-未填寫'
            df_filtered = df[~(df == '未填報-未填寫').any(axis=1)].copy()
            print(f"DataFrame after filtering: {len(df_filtered)} rows remaining.")

            if df_filtered.empty:
                print("No valid entries found after filtering.")
                return pd.DataFrame(), None

            # Shuffle the filtered DataFrame to ensure random selection within priority groups
            #df_shuffled = df_filtered.sample(frac=1, random_state=42).reset_index(drop=True)
            df_shuffled = df_filtered.sample(frac=1).reset_index(drop=True)


            admitted_students_list = []
            remaining_df = df_shuffled.copy()

            # Prioritize one person per institution, up to capacity
            first_admissions_indices = remaining_df.groupby('機關名稱').head(1).index
            num_first_admissions = min(len(first_admissions_indices), capacity)
            admitted_students_list.append(remaining_df.loc[first_admissions_indices[:num_first_admissions]])
            remaining_df = remaining_df.drop(first_admissions_indices[:num_first_admissions])

            # Prioritize a second person per institution if capacity permits
            current_admitted_count = sum(len(df) for df in admitted_students_list)
            if current_admitted_count < capacity:
                second_admissions_indices = remaining_df.groupby('機關名稱').head(1).index
                additional_capacity = capacity - current_admitted_count
                num_second_admissions = min(len(second_admissions_indices), additional_capacity)
                admitted_students_list.append(remaining_df.loc[second_admissions_indices[:num_second_admissions]])
                remaining_df = remaining_df.drop(second_admissions_indices[:num_second_admissions])

            # Allow more if capacity permits, up to the total capacity
            current_admitted_count = sum(len(df) for df in admitted_students_list)
            if current_admitted_count < capacity:
                additional_capacity = capacity - current_admitted_count
                additional_admissions_indices = remaining_df.head(additional_capacity).index
                admitted_students_list.append(remaining_df.loc[additional_admissions_indices])
                remaining_df = remaining_df.drop(additional_admissions_indices)


            # Combine all selected students and drop duplicates
            admitted_df = pd.concat(admitted_students_list).drop_duplicates().reset_index(drop=True)

            # Assign '正取' status and order
            if not admitted_df.empty:
                admitted_df['錄取順序'] = [f'正取{i+1}' for i in range(len(admitted_df))]

            # Assign '備取' status to the remaining students
            if not remaining_df.empty:
                remaining_df['錄取順序'] = [f'備取{i+1}' for i in range(len(remaining_df))]
            else:
                 remaining_df['錄取順序'] = [] # Ensure column exists even if empty


            # Combine admitted and remaining students
            final_df = pd.concat([admitted_df, remaining_df]).reset_index(drop=True)
            print("Final Student List with Admission Status:")
            print(final_df.head())  # 用 print 輸出前幾列
            print("\nFinal Student List Info:")
            print(final_df.info())  # 仍然可以看 info,用 print 包住就好


            # Save the processed DataFrame to a temporary CSV file
            output_csv_path = "admitted_students_processed.csv"
            final_df.to_csv(output_csv_path, index=False, encoding='utf-8-sig')
            print(f"Processed data saved to {output_csv_path}")

            return final_df, output_csv_path

        else:
            print("DataFrame not loaded. Cannot perform processing.")
            return pd.DataFrame(), None

    except FileNotFoundError:
        print(f"Error: '{file_path}' not found. Please ensure the file is in the correct directory.")
        return pd.DataFrame(), None
    except Exception as e:
        print(f"An error occurred during processing: {e}")
        return pd.DataFrame(), None

# Assuming process_admissions is defined and available in the environment
# It should return a tuple: (DataFrame, file_path)

def process_admissions_and_return_df_and_file(file_path, capacity):
    """
    Processes student admission data and returns the DataFrame and the path to the saved CSV.

    Args:
        file_path (str): The path to the uploaded CSV file.
        capacity (int): The maximum number of students to admit.

    Returns:
        tuple: A tuple containing:
            - pd.DataFrame: The processed DataFrame (including 正取 and 備取).
            - str: The file path to the saved CSV, or None if processing failed.
    """
    final_df, output_csv_path = process_admissions(file_path, capacity) # Call process_admissions which returns final_df and path
    return final_df, output_csv_path # Return final_df and path


if 'process_admissions' in locals():
    # Define the Gradio interface
    interface = gr.Interface(
        fn=process_admissions_and_return_df_and_file,
        inputs=[
            gr.File(label="Upload CSV File"),
            gr.Number(label="Admission Capacity", value=120, precision=0) # Add number input for capacity
            ],
        outputs=[
            gr.Dataframe(label="Admitted Students List"), # Changed label to reflect it includes all students
            gr.File(label="Download Admitted Students CSV")
            ],
        title="Student Admission Processing",
        description="Upload a CSV file to process student admissions based on institutional priority and capacity, and get a list with admission order and a downloadable CSV."
    )

    print("Gradio interface designed with capacity input.")
else:
    print("The 'process_admissions' function is not defined. Please ensure the data processing logic is defined in a previous step.")


if 'interface' in locals():
    print("Launching Gradio interface...")
    interface.launch(debug=True, share=False)
else:
    print("Gradio interface not found. Please ensure the previous steps were executed successfully.")