Spaces:
Runtime error
Runtime error
File size: 6,931 Bytes
74d86cf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | import pandas as pd
import gradio as gr
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from openpyxl import load_workbook
from IPython.display import Markdown
import os
# Initialize empty DataFrame and TF-IDF matrix
data = pd.DataFrame()
tfidf_vectorizer = None
tfidf_matrix = None
# Helper: Load Data from File (Excel or CSV)
def load_file(file):
global data, tfidf_vectorizer, tfidf_matrix
try:
# Read file based on its extension
if file.endswith(".xlsx"):
# Read Excel file
workbook = load_workbook(filename=file, data_only=True)
sheet = workbook.active
headers = [cell.value if cell.value is not None else f"Unnamed Column {i}" for i, cell in enumerate(sheet[1])]
rows = sheet.iter_rows(min_row=2, values_only=True)
data = pd.DataFrame(rows, columns=headers)
# Extract hyperlinks from "Information" column if present
if "Information" in data.columns:
for i, row in enumerate(sheet.iter_rows(min_row=2)):
cell = row[data.columns.get_loc("Information")]
if cell.hyperlink:
data.at[i, "Information"] = cell.hyperlink.target
elif file.endswith(".csv"):
# Read CSV file
data = pd.read_csv(file)
else:
return "Unsupported file format. Please upload a .xlsx or .csv file."
# Initialize TF-IDF for employee search
tfidf_vectorizer = TfidfVectorizer(analyzer="char_wb", ngram_range=(2, 4))
tfidf_matrix = tfidf_vectorizer.fit_transform(data["Employee Name"].astype(str))
return "File loaded successfully!"
except Exception as e:
return f"Error loading file: {e}"
# Helper: Generate Report Dynamically
import os
def generate_dynamic_report(query):
if data.empty or tfidf_vectorizer is None:
return None
query_vec = tfidf_vectorizer.transform([query])
similarities = cosine_similarity(query_vec, tfidf_matrix).flatten()
best_match_idx = similarities.argmax()
if similarities[best_match_idx] > 0:
employee = data.iloc[best_match_idx]
employee_name = employee["Employee Name"]
report_file = f"{employee_name.replace(' ', '_')}_report.html"
# Generate HTML content dynamically
with open(report_file, "w") as file:
file.write(f"<html><head><title>{employee_name} Report</title></head><body>")
file.write(f"<h1>Employee Report for {employee_name}</h1>")
file.write("<hr>")
# Employee Details
file.write("<h2>Employee Details:</h2><ul>")
for column, value in employee.items():
if column == "Information" and isinstance(value, str):
if value.startswith("http") or value.startswith("file://"):
# Detect file type from hyperlink
file_type = os.path.splitext(value)[-1].lower()
if file_type == ".pdf":
label = "View PDF Document"
elif file_type in [".ppt", ".pptx"]:
label = "View PowerPoint Presentation"
elif file_type in [".doc", ".docx"]:
label = "View Word Document"
else:
label = "Download/View File"
value = f'<a href="{value}" target="_blank" style="color:blue;text-decoration:underline;">{label}</a>'
else:
# Add a clickable link for local paths
value = f'<a href="file:///{value}" target="_blank" style="color:blue;text-decoration:underline;">Download/View File</a>'
file.write(f"<li><strong>{column}:</strong> {value}</li>")
file.write("</ul>")
# Customer Insights
file.write("<h2>Customer Insights:</h2><ul>")
if "Experience" in data.columns:
experience = employee.get("Experience", "No experience details available.")
file.write(f"<li>Experience in the field: {experience}</li>")
if "Skills" in data.columns:
skills = employee.get("Skills", "No skills information available.")
file.write(f"<li>Key skills: {skills}</li>")
if "Projects" in data.columns:
projects = employee.get("Projects", "No projects listed.")
file.write(f"<li>Notable projects: {projects}</li>")
file.write("</ul>")
# Summary
file.write("<h2>Summary:</h2>")
file.write(f"<p>{employee_name} has shown notable contributions in the domain. Refer to the linked documents for more details.</p>")
file.write("<p>Thank you for using our Employee Dashboard!</p>")
file.write("</body></html>")
return report_file
else:
return None
# Helper: Search Employee
def search_employee(query):
if data.empty or tfidf_vectorizer is None:
return pd.DataFrame([{"Error": "No data available. Please upload a file first."}])
query_vec = tfidf_vectorizer.transform([query])
similarities = cosine_similarity(query_vec, tfidf_matrix).flatten()
best_match_idx = similarities.argmax()
if similarities[best_match_idx] > 0:
# Ensure output is a valid DataFrame with one row
employee = data.iloc[best_match_idx].to_frame().T # Convert Series to DataFrame
return employee
else:
return pd.DataFrame([{"Error": "No matching employee found."}])
# Gradio Interface
with gr.Blocks() as interface:
gr.Markdown("""
<h1 style="text-align: center;">Employee Dashboard</h1>
<p style="text-align: center;">Upload your Excel or CSV file to get started. Search employees, view metrics, and generate dynamic reports.</p>
""")
with gr.Row():
file_upload = gr.File(label="Upload Excel or CSV File", type="filepath")
upload_status = gr.Textbox(label="Upload Status", interactive=False)
upload_button = gr.Button("Upload")
upload_button.click(load_file, inputs=[file_upload], outputs=[upload_status])
with gr.Row():
search_query = gr.Textbox(label="Search Employee", placeholder="Type partial name (e.g., 'Aar')")
employee_details = gr.Dataframe(label="Employee Details", interactive=True)
report_output = gr.File(label="Download Report")
search_button = gr.Button("Search")
search_button.click(
search_employee,
inputs=[search_query],
outputs=[employee_details],
)
search_button.click(
generate_dynamic_report,
inputs=[search_query],
outputs=[report_output],
)
if __name__ == "__main__":
interface.launch(share=True) |