Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """Hugging Face Deployment.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/18VOpfepQVq0IUPYF25R0ltS0U6_T1M0u | |
| """ | |
| import json | |
| import gspread | |
| import requests | |
| import tempfile | |
| import os | |
| import re | |
| from docx import Document | |
| import pdfplumber | |
| from google.oauth2.service_account import Credentials | |
| import gradio as gr | |
| import openai | |
| from datetime import datetime | |
| from googleapiclient.discovery import build | |
| from googleapiclient.http import MediaIoBaseDownload | |
| import io | |
| # =============================== | |
| # CONFIGURATION | |
| # =============================== | |
| # OpenAI API key from Hugging Face secret | |
| openai.api_key = os.environ.get("OPENAI_API_KEY") | |
| DESTINATION_SHEET_NAME = "Resume Shortlisting - Master" | |
| # Google Sheets credentials from Hugging Face secret | |
| import json | |
| import os | |
| from google.oauth2.service_account import Credentials | |
| import gspread | |
| # Load the JSON from Hugging Face secret | |
| gsheets_json = os.environ.get("GSHEETS_JSON") | |
| if not gsheets_json: | |
| raise ValueError("GSHEETS_JSON environment variable not set.") | |
| service_account_info = json.loads(gsheets_json) | |
| # Convert literal \n into actual newlines in the private key | |
| if "private_key" in service_account_info: | |
| service_account_info["private_key"] = service_account_info["private_key"].replace("\\n", "\n") | |
| GOOGLE_SHEET_CREDENTIALS = service_account_info | |
| # Create Google Sheets client | |
| creds = Credentials.from_service_account_info(GOOGLE_SHEET_CREDENTIALS, | |
| scopes=[ | |
| "https://www.googleapis.com/auth/spreadsheets", | |
| "https://www.googleapis.com/auth/drive" | |
| ]) | |
| client = gspread.authorize(creds) | |
| # =============================== | |
| # GOOGLE SHEET CLIENT | |
| # =============================== | |
| def get_gsheet_client(): | |
| scopes = [ | |
| "https://www.googleapis.com/auth/spreadsheets", | |
| "https://www.googleapis.com/auth/drive" | |
| ] | |
| credentials = Credentials.from_service_account_info(GOOGLE_SHEET_CREDENTIALS, scopes=scopes) | |
| return gspread.authorize(credentials) | |
| # =============================== | |
| # TEXT EXTRACTION HELPERS | |
| # =============================== | |
| def extract_text_from_docx(path): | |
| try: | |
| doc = Document(path) | |
| return "\n".join([p.text for p in doc.paragraphs if p.text.strip()]) | |
| except Exception as e: | |
| print(f"[DOCX ERROR] {e}") | |
| return "" | |
| def extract_text_from_pdf(path): | |
| try: | |
| with pdfplumber.open(path) as pdf: | |
| return "\n".join([page.extract_text() or "" for page in pdf.pages]) | |
| except Exception as e: | |
| print(f"[PDF ERROR] {e}") | |
| return "" | |
| def extract_text_from_jd(file_path): | |
| ext = os.path.splitext(file_path)[1].lower() | |
| if ext == ".pdf": | |
| with pdfplumber.open(file_path) as pdf: | |
| return "\n".join([page.extract_text() or "" for page in pdf.pages]) | |
| elif ext == ".docx": | |
| doc = Document(file_path) | |
| return "\n".join([p.text for p in doc.paragraphs if p.text.strip()]) | |
| else: | |
| return None | |
| # =============================== | |
| # RESUME DOWNLOAD & EXTRACTION | |
| # =============================== | |
| def download_file_from_drive(file_id): | |
| """Download a Google Docs file as DOCX using the Drive API.""" | |
| service = build('drive', 'v3', credentials=creds) | |
| request = service.files().export_media( | |
| fileId=file_id, | |
| mimeType='application/vnd.openxmlformats-officedocument.wordprocessingml.document' | |
| ) | |
| fh = io.BytesIO() | |
| downloader = MediaIoBaseDownload(fh, request) | |
| done = False | |
| while not done: | |
| status, done = downloader.next_chunk() | |
| fh.seek(0) | |
| return fh.read() | |
| from googleapiclient.discovery import build | |
| from googleapiclient.http import MediaIoBaseDownload | |
| import io | |
| def download_and_extract_resumes_from_sheet(source_sheet_name): | |
| """ | |
| Downloads resumes from a Google Sheet and extracts text from Google Docs or Drive files. | |
| Returns a list of tuples: (unique_id, resume_text) | |
| """ | |
| client = get_gsheet_client() | |
| sheet = client.open(source_sheet_name).get_worksheet(0) | |
| data = sheet.get_all_records() | |
| resume_column_name = "Resume Link" | |
| unique_id_column = "Unique ID" | |
| resume_entries = [] | |
| # Create Drive API client | |
| drive_service = build('drive', 'v3', credentials=creds) | |
| for row in data: | |
| link = row.get(resume_column_name) | |
| unique_id = row.get(unique_id_column) | |
| if not link or not unique_id: | |
| continue | |
| # Extract file ID from link | |
| match = re.search(r"/d/([a-zA-Z0-9_-]+)", link) | |
| file_id = match.group(1) if match else None | |
| if not file_id: | |
| print(f"[WARN] Could not extract file ID from link: {link}") | |
| continue | |
| try: | |
| # Try exporting as DOCX first | |
| request = drive_service.files().export_media(fileId=file_id, mimeType='application/vnd.openxmlformats-officedocument.wordprocessingml.document') | |
| fh = io.BytesIO() | |
| downloader = MediaIoBaseDownload(fh, request) | |
| done = False | |
| while not done: | |
| status, done = downloader.next_chunk() | |
| fh.seek(0) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp_file: | |
| tmp_file.write(fh.read()) | |
| tmp_path = tmp_file.name | |
| text = extract_text_from_docx(tmp_path) | |
| os.remove(tmp_path) | |
| if text.strip(): | |
| resume_entries.append((unique_id.strip(), text.strip())) | |
| else: | |
| print(f"[WARN] No text extracted for resume ID {unique_id}") | |
| except Exception as e: | |
| print(f"[ERROR] Could not download {unique_id}: {e}") | |
| continue | |
| return resume_entries | |
| # =============================== | |
| # OPENAI EVALUATION | |
| # =============================== | |
| def evaluate_resumes(resume_entries, job_description): | |
| results = {"shortlisted": [], "potential": [], "rejected": []} | |
| for i, (unique_id, resume) in enumerate(resume_entries): | |
| try: | |
| prompt = [ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "You are an AI hiring assistant. Given a job description and resume, return:\n" | |
| "1. Candidate name.\n2. A match score (0-100%).\n3. A short explanation (2-4 lines)." | |
| ) | |
| }, | |
| { | |
| "role": "user", | |
| "content": f"Job Description:\n{job_description}\n\nResume:\n{resume}" | |
| } | |
| ] | |
| response = openai.ChatCompletion.create( | |
| model="gpt-3.5-turbo-16k", | |
| messages=prompt, | |
| temperature=0, | |
| max_tokens=500 | |
| ) | |
| content = response["choices"][0]["message"]["content"] | |
| score_match = re.search(r"(\d{1,3})%", content) | |
| score = int(score_match.group(1)) if score_match else 0 | |
| status = "shortlisted" if score >= 90 else "potential" if score >= 65 else "rejected" | |
| results[status].append((unique_id, content)) | |
| except Exception as e: | |
| results["rejected"].append((unique_id, f"[ERROR] {e}")) | |
| return results | |
| # =============================== | |
| # WRITE TO MASTER SHEET | |
| # =============================== | |
| def extract_name_and_score(text): | |
| name_match = re.search(r"(?i)^(.+?)\n", text) | |
| score_match = re.search(r"(\d{1,3})%", text) | |
| return name_match.group(1).strip() if name_match else "Unknown", score_match.group(1) if score_match else "0" | |
| def write_to_sheet(sheet, candidates, status, job_title, recruiter_name): | |
| for unique_id, feedback in candidates: | |
| name, score = extract_name_and_score(feedback) | |
| now = datetime.now() | |
| sheet.append_row([ | |
| unique_id, # Unique ID | |
| name, # Candidate Name | |
| job_title, # Vacancy Position | |
| score, # Score | |
| status, # Status | |
| now.strftime("%Y-%m-%d"), # Date | |
| now.strftime("%Y-%m-%d %H:%M:%S"), # Timestamp | |
| feedback, # AI Feedback | |
| "", # Line Manager Feedback (empty) | |
| recruiter_name, # Recruiter | |
| "" # Resume (empty) | |
| ]) | |
| # =============================== | |
| # MAIN JOB PROCESS | |
| # =============================== | |
| def process_job_description(file_path, recruiter_name, source_sheet_name): | |
| try: | |
| jd_text = extract_text_from_jd(file_path) | |
| if not jd_text or not jd_text.strip(): | |
| return "β Could not extract job description text.", "" | |
| job_title = os.path.splitext(os.path.basename(file_path))[0] | |
| resume_texts = download_and_extract_resumes_from_sheet(source_sheet_name) | |
| if not resume_texts: | |
| return "β No resumes extracted.", "" | |
| results = evaluate_resumes(resume_texts, jd_text) | |
| client = get_gsheet_client() | |
| dest_ws = client.open(DESTINATION_SHEET_NAME).sheet1 | |
| write_to_sheet(dest_ws, results["shortlisted"], "shortlisted", job_title, recruiter_name) | |
| write_to_sheet(dest_ws, results["potential"], "potential", job_title, recruiter_name) | |
| write_to_sheet(dest_ws, results["rejected"], "not selected", job_title, recruiter_name) | |
| summary = ( | |
| f"β Shortlisting Complete!\n\n" | |
| f"Shortlisted: {len(results['shortlisted'])}\n" | |
| f"Potential: {len(results['potential'])}\n" | |
| f"Rejected: {len(results['rejected'])}\n" | |
| ) | |
| details = "\n\n".join([f"{c}\n{f}" for c, f in results["shortlisted"] + results["potential"] + results["rejected"]]) | |
| return summary, details | |
| except Exception as e: | |
| import traceback | |
| print("[ERROR] process_job_description failed:") | |
| traceback.print_exc() # This will print the actual stack trace in Hugging Face logs | |
| return f"β Error: {str(e)}", "" | |
| # =============================== | |
| # SYNC FEEDBACK & RESUMES (NEW) | |
| # =============================== | |
| def sync_feedback_and_resumes(): | |
| try: | |
| client = get_gsheet_client() | |
| master_ws = client.open(DESTINATION_SHEET_NAME).sheet1 | |
| master_data = master_ws.get_all_records() | |
| updates = 0 | |
| feedback_col = master_ws.find("Line Manager Feedback").col | |
| resume_col = master_ws.find("Resume").col | |
| for i, row in enumerate(master_data, start=2): | |
| unique_id = row.get("Unique ID") | |
| position = row.get("Vacancy Position") | |
| existing_feedback = row.get("Line Manager Feedback", "") | |
| existing_resume = row.get("Resume", "") | |
| if (existing_feedback and existing_feedback.strip()) and (existing_resume and existing_resume.strip()): | |
| continue | |
| if not unique_id or not position: | |
| continue | |
| try: | |
| folder_name = position | |
| response_sheet_name = f"{folder_name} - (Responses)" | |
| source_ws = client.open(response_sheet_name).get_worksheet(0) | |
| source_data = source_ws.get_all_records() | |
| for src_row in source_data: | |
| if str(src_row.get("Unique ID")).strip() == str(unique_id).strip(): | |
| feedback = src_row.get("Line Manager Feedback", "") | |
| resume_link = src_row.get("Please upload your resume", "") | |
| if (not existing_feedback and feedback) or (not existing_resume and resume_link): | |
| if feedback: | |
| master_ws.update_cell(i, feedback_col, feedback) | |
| if resume_link: | |
| master_ws.update_cell(i, resume_col, resume_link) | |
| updates += 1 | |
| break | |
| except Exception as e: | |
| # Safely handle Response objects and other exceptions | |
| msg = f"HTTP error {e.status_code} for URL {e.url}" if isinstance(e, requests.models.Response) else str(e) | |
| print(f"[WARN] Error accessing response sheet for {position}: {msg}") | |
| continue | |
| return f"β Sync complete! {updates} records updated." | |
| except Exception as e: | |
| msg = f"HTTP error {e.status_code} for URL {e.url}" if isinstance(e, requests.models.Response) else str(e) | |
| return f"β Error during sync: {msg}" | |
| # =============================== | |
| # GRADIO INTERFACE | |
| # =============================== | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## π Resume Intelligence & Feedback Sync Tool") | |
| recruiter_name_input = gr.Textbox(label="Recruiter's Name", placeholder="Enter your name") | |
| source_sheet_name_input = gr.Textbox(label="Source Sheet Name", placeholder="e.g., Key Account Manager - JBS Consulting (Responses)") | |
| jd_file = gr.File(label="Upload Job Description (PDF/DOCX)", type="filepath") | |
| with gr.Row(): | |
| submit_btn = gr.Button("π Run Resume Shortlisting") | |
| sync_btn = gr.Button("π Sync Feedback & Resumes") | |
| output_summary = gr.Textbox(label="Summary", lines=4) | |
| output_details = gr.Textbox(label="Evaluation Details", lines=20) | |
| submit_btn.click( | |
| fn=process_job_description, | |
| inputs=[jd_file, recruiter_name_input, source_sheet_name_input], | |
| outputs=[output_summary, output_details] | |
| ) | |
| sync_btn.click( | |
| fn=sync_feedback_and_resumes, | |
| inputs=[], | |
| outputs=[output_summary] | |
| ) | |
| # Launch the app for Hugging Face Spaces | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) |