import pandas as pd import re import io import gradio as gr def clean_excel(file): try: # Read the uploaded Excel file directly from the file object df_raw = pd.read_excel(file, skiprows=4) df_raw.columns = [ 'S.No', 'District', 'Institution Name', 'V Minorities Sanctioned', 'V Minorities Admitted', 'V NonMinorities Sanctioned', 'V NonMinorities Admitted', 'Course', 'Inter Minorities Sanctioned', 'Inter Minorities Admitted', 'Inter NonMinorities Sanctioned', 'Inter NonMinorities Admitted' ] df_raw = df_raw.iloc[:-1] # Remove summary/footer row df_raw['Institution Name'] = df_raw['Institution Name'].astype(str).apply( lambda x: re.sub(r'\([^)]*\)', '', x).replace('Boys', 'B').replace('Girls', 'G').strip() ) numeric_cols = [ 'V Minorities Sanctioned', 'V Minorities Admitted', 'V NonMinorities Sanctioned', 'V NonMinorities Admitted', 'Inter Minorities Sanctioned', 'Inter Minorities Admitted', 'Inter NonMinorities Sanctioned', 'Inter NonMinorities Admitted' ] for col in numeric_cols: df_raw[col] = pd.to_numeric(df_raw[col], errors='coerce').fillna(0).astype(int) df_v = df_raw[['S.No', 'District', 'Institution Name', 'V Minorities Sanctioned', 'V Minorities Admitted', 'V NonMinorities Sanctioned', 'V NonMinorities Admitted']].copy() df_v['Class'] = 'V' df_v['Sanctioned'] = df_v['V Minorities Sanctioned'] + df_v['V NonMinorities Sanctioned'] df_v['Admitted'] = df_v['V Minorities Admitted'] + df_v['V NonMinorities Admitted'] df_inter = df_raw[['S.No', 'District', 'Institution Name', 'Inter Minorities Sanctioned', 'Inter Minorities Admitted', 'Inter NonMinorities Sanctioned', 'Inter NonMinorities Admitted']].copy() df_inter['Class'] = 'Inter 1st Year' df_inter['Sanctioned'] = df_inter['Inter Minorities Sanctioned'] + df_inter['Inter NonMinorities Sanctioned'] df_inter['Admitted'] = df_inter['Inter Minorities Admitted'] + df_inter['Inter NonMinorities Admitted'] df_final = pd.concat([df_v, df_inter], ignore_index=True) df_final['Vacancies'] = df_final['Sanctioned'] - df_final['Admitted'] df_final = df_final[['S.No', 'District', 'Institution Name', 'Class', 'Sanctioned', 'Admitted', 'Vacancies']] output = io.StringIO() df_final.to_csv(output, index=False) output.seek(0) return gr.File.update(value=io.BytesIO(output.getvalue().encode()), filename="cleaned_data.csv") except Exception as e: return f"❌ Error: {e}" # Gradio Interface interface = gr.Interface( fn=clean_excel, inputs=gr.File(label="Upload Excel File (.xlsx)"), outputs=gr.File(label="Download Cleaned CSV"), title="📊 Clean Admission Excel Data", description="Upload your Excel file. Get a cleaned CSV ready to download." ) if __name__ == "__main__": interface.launch()