import pandas as pd import gradio as gr from rapidfuzz import process # NOTE: Adjust file_path for Hugging Face Spaces # Assuming you upload 'indaction air port 1-4.xlsx' directly to your Space's root directory. file_path = "indaction air port 1-4.xlsx" # Read the 'Orascom' sheet raw_orascom = pd.read_excel(file_path, sheet_name='Orascom', header=None) # Read the 'Sub Contractor' sheet raw_subcontractor = pd.read_excel(file_path, sheet_name='Sub Contractor', header=None) # Get header from Orascom sheet (assuming it's consistent) header = raw_orascom.iloc[1] needed_cols_list = [ "ID Number / HR Code", "NAME", "Company", "Phone no.", "Position", "Induction Date" ] # --- Process Orascom data --- df_orascom_temp = raw_orascom[2:].copy() df_orascom_temp.columns = header df_orascom_temp.columns = df_orascom_temp.columns.astype(str).str.replace('\n', ' ', regex=True).str.replace(r'\s+', ' ', regex=True).str.strip() df_orascom_final = df_orascom_temp[needed_cols_list] df_orascom_final = df_orascom_final.dropna(subset=["NAME"]) df_orascom_final["NAME"] = df_orascom_final["NAME"].astype(str).str.lower().str.strip() df_orascom_final["ID Number / HR Code"] = df_orascom_final["ID Number / HR Code"].astype(str).str.replace(".0","", regex=False).str.strip() # --- Process Sub Contractor data --- df_subcontractor_temp = raw_subcontractor[2:].copy() # Trim df_subcontractor to the same number of columns as the header df_subcontractor_temp = df_subcontractor_temp.iloc[:, :len(header)] df_subcontractor_temp.columns = header df_subcontractor_temp.columns = df_subcontractor_temp.columns.astype(str).str.replace('\n', ' ', regex=True).str.replace(r'\s+', ' ', regex=True).str.strip() df_subcontractor_final = df_subcontractor_temp[needed_cols_list] df_subcontractor_final = df_subcontractor_final.dropna(subset=["NAME"]) df_subcontractor_final["NAME"] = df_subcontractor_final["NAME"].astype(str).str.lower().str.strip() df_subcontractor_final["ID Number / HR Code"] = df_subcontractor_final["ID Number / HR Code"].astype(str).str.replace(".0","", regex=False).str.strip() def search_person(query, df_to_search): query = query.strip().lower() # ID search res_id = df_to_search[df_to_search["ID Number / HR Code"] == query] if not res_id.empty: return res_id # Name search names = df_to_search["NAME"].tolist() # Increased the score threshold to 85 for more precise name matching matches = process.extract(query, names) similar_people = [] for match, score, idx in matches: if score > 85: # Changed from 70 to 85 for stricter matching similar_people.append(df_to_search.iloc[idx]) if similar_people: return pd.DataFrame(similar_people).drop_duplicates(subset=["ID Number / HR Code"]) return None def chatbot(user_type_selection, search_query_input): if not user_type_selection: return "Please select if you are OC or Subcontractor.", None if not search_query_input: return "Please enter a Name or ID to search.", None df_to_use = None if user_type_selection == "OC": df_to_use = df_orascom_final elif user_type_selection == "Subcontractor": df_to_use = df_subcontractor_final else: return "Invalid user type selection.", None persons = search_person(search_query_input, df_to_use) if persons is None or persons.empty: return f"❌ This person did NOT take induction within {user_type_selection} data.", None return "✅ Found person(s).", persons demo = gr.Interface( fn=chatbot, inputs=[ gr.Radio(["OC", "Subcontractor"], label="Are you OC or Subcontractor?"), gr.Textbox(label="Enter Name or ID to search") ], outputs=[ gr.Textbox(label="Status/Message"), gr.Dataframe(label="Induction Details") ], title="Induction Status Chatbot" ) demo.launch()