Woolv7007's picture
Update app.py
aab0686 verified
import pandas as pd
import gradio as gr
from rapidfuzz import process
# NOTE: Adjust file_path for Hugging Face Spaces
# Assuming you upload 'indaction air port 1-4.xlsx' directly to your Space's root directory.
file_path = "indaction air port 1-4.xlsx"
# Read the 'Orascom' sheet
raw_orascom = pd.read_excel(file_path, sheet_name='Orascom', header=None)
# Read the 'Sub Contractor' sheet
raw_subcontractor = pd.read_excel(file_path, sheet_name='Sub Contractor', header=None)
# Get header from Orascom sheet (assuming it's consistent)
header = raw_orascom.iloc[1]
needed_cols_list = [
"ID Number / HR Code",
"NAME",
"Company",
"Phone no.",
"Position",
"Induction Date"
]
# --- Process Orascom data ---
df_orascom_temp = raw_orascom[2:].copy()
df_orascom_temp.columns = header
df_orascom_temp.columns = df_orascom_temp.columns.astype(str).str.replace('\n', ' ', regex=True).str.replace(r'\s+', ' ', regex=True).str.strip()
df_orascom_final = df_orascom_temp[needed_cols_list]
df_orascom_final = df_orascom_final.dropna(subset=["NAME"])
df_orascom_final["NAME"] = df_orascom_final["NAME"].astype(str).str.lower().str.strip()
df_orascom_final["ID Number / HR Code"] = df_orascom_final["ID Number / HR Code"].astype(str).str.replace(".0","", regex=False).str.strip()
# --- Process Sub Contractor data ---
df_subcontractor_temp = raw_subcontractor[2:].copy()
# Trim df_subcontractor to the same number of columns as the header
df_subcontractor_temp = df_subcontractor_temp.iloc[:, :len(header)]
df_subcontractor_temp.columns = header
df_subcontractor_temp.columns = df_subcontractor_temp.columns.astype(str).str.replace('\n', ' ', regex=True).str.replace(r'\s+', ' ', regex=True).str.strip()
df_subcontractor_final = df_subcontractor_temp[needed_cols_list]
df_subcontractor_final = df_subcontractor_final.dropna(subset=["NAME"])
df_subcontractor_final["NAME"] = df_subcontractor_final["NAME"].astype(str).str.lower().str.strip()
df_subcontractor_final["ID Number / HR Code"] = df_subcontractor_final["ID Number / HR Code"].astype(str).str.replace(".0","", regex=False).str.strip()
def search_person(query, df_to_search):
query = query.strip().lower()
# ID search
res_id = df_to_search[df_to_search["ID Number / HR Code"] == query]
if not res_id.empty:
return res_id
# Name search
names = df_to_search["NAME"].tolist()
# Increased the score threshold to 85 for more precise name matching
matches = process.extract(query, names)
similar_people = []
for match, score, idx in matches:
if score > 85: # Changed from 70 to 85 for stricter matching
similar_people.append(df_to_search.iloc[idx])
if similar_people:
return pd.DataFrame(similar_people).drop_duplicates(subset=["ID Number / HR Code"])
return None
def chatbot(user_type_selection, search_query_input):
if not user_type_selection:
return "Please select if you are OC or Subcontractor.", None
if not search_query_input:
return "Please enter a Name or ID to search.", None
df_to_use = None
if user_type_selection == "OC":
df_to_use = df_orascom_final
elif user_type_selection == "Subcontractor":
df_to_use = df_subcontractor_final
else:
return "Invalid user type selection.", None
persons = search_person(search_query_input, df_to_use)
if persons is None or persons.empty:
return f"❌ This person did NOT take induction within {user_type_selection} data.", None
return "✅ Found person(s).", persons
demo = gr.Interface(
fn=chatbot,
inputs=[
gr.Radio(["OC", "Subcontractor"], label="Are you OC or Subcontractor?"),
gr.Textbox(label="Enter Name or ID to search")
],
outputs=[
gr.Textbox(label="Status/Message"),
gr.Dataframe(label="Induction Details")
],
title="Induction Status Chatbot"
)
demo.launch()