Feedalytics_2.0 / app.py
KhanOmerKhan's picture
Update app.py
1d97694 verified
# -*- coding: utf-8 -*-
"""Hugging Face Deployment.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/18VOpfepQVq0IUPYF25R0ltS0U6_T1M0u
"""
import json
import gspread
import requests
import tempfile
import os
import re
from docx import Document
import pdfplumber
from google.oauth2.service_account import Credentials
import gradio as gr
import openai
from datetime import datetime
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
import io
# ===============================
# CONFIGURATION
# ===============================
# OpenAI API key from Hugging Face secret
openai.api_key = os.environ.get("OPENAI_API_KEY")
DESTINATION_SHEET_NAME = "Resume Shortlisting - Master"
# Google Sheets credentials from Hugging Face secret
import json
import os
from google.oauth2.service_account import Credentials
import gspread
# Load the JSON from Hugging Face secret
gsheets_json = os.environ.get("GSHEETS_JSON")
if not gsheets_json:
raise ValueError("GSHEETS_JSON environment variable not set.")
service_account_info = json.loads(gsheets_json)
# Convert literal \n into actual newlines in the private key
if "private_key" in service_account_info:
service_account_info["private_key"] = service_account_info["private_key"].replace("\\n", "\n")
GOOGLE_SHEET_CREDENTIALS = service_account_info
# Create Google Sheets client
creds = Credentials.from_service_account_info(GOOGLE_SHEET_CREDENTIALS,
scopes=[
"https://www.googleapis.com/auth/spreadsheets",
"https://www.googleapis.com/auth/drive"
])
client = gspread.authorize(creds)
# ===============================
# GOOGLE SHEET CLIENT
# ===============================
def get_gsheet_client():
scopes = [
"https://www.googleapis.com/auth/spreadsheets",
"https://www.googleapis.com/auth/drive"
]
credentials = Credentials.from_service_account_info(GOOGLE_SHEET_CREDENTIALS, scopes=scopes)
return gspread.authorize(credentials)
# ===============================
# TEXT EXTRACTION HELPERS
# ===============================
def extract_text_from_docx(path):
try:
doc = Document(path)
return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
except Exception as e:
print(f"[DOCX ERROR] {e}")
return ""
def extract_text_from_pdf(path):
try:
with pdfplumber.open(path) as pdf:
return "\n".join([page.extract_text() or "" for page in pdf.pages])
except Exception as e:
print(f"[PDF ERROR] {e}")
return ""
def extract_text_from_jd(file_path):
ext = os.path.splitext(file_path)[1].lower()
if ext == ".pdf":
with pdfplumber.open(file_path) as pdf:
return "\n".join([page.extract_text() or "" for page in pdf.pages])
elif ext == ".docx":
doc = Document(file_path)
return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
else:
return None
# ===============================
# RESUME DOWNLOAD & EXTRACTION
# ===============================
def download_file_from_drive(file_id):
"""Download a Google Docs file as DOCX using the Drive API."""
service = build('drive', 'v3', credentials=creds)
request = service.files().export_media(
fileId=file_id,
mimeType='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while not done:
status, done = downloader.next_chunk()
fh.seek(0)
return fh.read()
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
import io
def download_and_extract_resumes_from_sheet(source_sheet_name):
"""
Downloads resumes from a Google Sheet and extracts text from Google Docs or Drive files.
Returns a list of tuples: (unique_id, resume_text)
"""
client = get_gsheet_client()
sheet = client.open(source_sheet_name).get_worksheet(0)
data = sheet.get_all_records()
resume_column_name = "Resume Link"
unique_id_column = "Unique ID"
resume_entries = []
# Create Drive API client
drive_service = build('drive', 'v3', credentials=creds)
for row in data:
link = row.get(resume_column_name)
unique_id = row.get(unique_id_column)
if not link or not unique_id:
continue
# Extract file ID from link
match = re.search(r"/d/([a-zA-Z0-9_-]+)", link)
file_id = match.group(1) if match else None
if not file_id:
print(f"[WARN] Could not extract file ID from link: {link}")
continue
try:
# Try exporting as DOCX first
request = drive_service.files().export_media(fileId=file_id, mimeType='application/vnd.openxmlformats-officedocument.wordprocessingml.document')
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while not done:
status, done = downloader.next_chunk()
fh.seek(0)
with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp_file:
tmp_file.write(fh.read())
tmp_path = tmp_file.name
text = extract_text_from_docx(tmp_path)
os.remove(tmp_path)
if text.strip():
resume_entries.append((unique_id.strip(), text.strip()))
else:
print(f"[WARN] No text extracted for resume ID {unique_id}")
except Exception as e:
print(f"[ERROR] Could not download {unique_id}: {e}")
continue
return resume_entries
# ===============================
# OPENAI EVALUATION
# ===============================
def evaluate_resumes(resume_entries, job_description):
results = {"shortlisted": [], "potential": [], "rejected": []}
for i, (unique_id, resume) in enumerate(resume_entries):
try:
prompt = [
{
"role": "system",
"content": (
"You are an AI hiring assistant. Given a job description and resume, return:\n"
"1. Candidate name.\n2. A match score (0-100%).\n3. A short explanation (2-4 lines)."
)
},
{
"role": "user",
"content": f"Job Description:\n{job_description}\n\nResume:\n{resume}"
}
]
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo-16k",
messages=prompt,
temperature=0,
max_tokens=500
)
content = response["choices"][0]["message"]["content"]
score_match = re.search(r"(\d{1,3})%", content)
score = int(score_match.group(1)) if score_match else 0
status = "shortlisted" if score >= 90 else "potential" if score >= 65 else "rejected"
results[status].append((unique_id, content))
except Exception as e:
results["rejected"].append((unique_id, f"[ERROR] {e}"))
return results
# ===============================
# WRITE TO MASTER SHEET
# ===============================
def extract_name_and_score(text):
name_match = re.search(r"(?i)^(.+?)\n", text)
score_match = re.search(r"(\d{1,3})%", text)
return name_match.group(1).strip() if name_match else "Unknown", score_match.group(1) if score_match else "0"
def write_to_sheet(sheet, candidates, status, job_title, recruiter_name):
for unique_id, feedback in candidates:
name, score = extract_name_and_score(feedback)
now = datetime.now()
sheet.append_row([
unique_id, # Unique ID
name, # Candidate Name
job_title, # Vacancy Position
score, # Score
status, # Status
now.strftime("%Y-%m-%d"), # Date
now.strftime("%Y-%m-%d %H:%M:%S"), # Timestamp
feedback, # AI Feedback
"", # Line Manager Feedback (empty)
recruiter_name, # Recruiter
"" # Resume (empty)
])
# ===============================
# MAIN JOB PROCESS
# ===============================
def process_job_description(file_path, recruiter_name, source_sheet_name):
try:
jd_text = extract_text_from_jd(file_path)
if not jd_text or not jd_text.strip():
return "❌ Could not extract job description text.", ""
job_title = os.path.splitext(os.path.basename(file_path))[0]
resume_texts = download_and_extract_resumes_from_sheet(source_sheet_name)
if not resume_texts:
return "❌ No resumes extracted.", ""
results = evaluate_resumes(resume_texts, jd_text)
client = get_gsheet_client()
dest_ws = client.open(DESTINATION_SHEET_NAME).sheet1
write_to_sheet(dest_ws, results["shortlisted"], "shortlisted", job_title, recruiter_name)
write_to_sheet(dest_ws, results["potential"], "potential", job_title, recruiter_name)
write_to_sheet(dest_ws, results["rejected"], "not selected", job_title, recruiter_name)
summary = (
f"βœ… Shortlisting Complete!\n\n"
f"Shortlisted: {len(results['shortlisted'])}\n"
f"Potential: {len(results['potential'])}\n"
f"Rejected: {len(results['rejected'])}\n"
)
details = "\n\n".join([f"{c}\n{f}" for c, f in results["shortlisted"] + results["potential"] + results["rejected"]])
return summary, details
except Exception as e:
import traceback
print("[ERROR] process_job_description failed:")
traceback.print_exc() # This will print the actual stack trace in Hugging Face logs
return f"❌ Error: {str(e)}", ""
# ===============================
# SYNC FEEDBACK & RESUMES (NEW)
# ===============================
def sync_feedback_and_resumes():
try:
client = get_gsheet_client()
master_ws = client.open(DESTINATION_SHEET_NAME).sheet1
master_data = master_ws.get_all_records()
updates = 0
feedback_col = master_ws.find("Line Manager Feedback").col
resume_col = master_ws.find("Resume").col
for i, row in enumerate(master_data, start=2):
unique_id = row.get("Unique ID")
position = row.get("Vacancy Position")
existing_feedback = row.get("Line Manager Feedback", "")
existing_resume = row.get("Resume", "")
if (existing_feedback and existing_feedback.strip()) and (existing_resume and existing_resume.strip()):
continue
if not unique_id or not position:
continue
try:
folder_name = position
response_sheet_name = f"{folder_name} - (Responses)"
source_ws = client.open(response_sheet_name).get_worksheet(0)
source_data = source_ws.get_all_records()
for src_row in source_data:
if str(src_row.get("Unique ID")).strip() == str(unique_id).strip():
feedback = src_row.get("Line Manager Feedback", "")
resume_link = src_row.get("Please upload your resume", "")
if (not existing_feedback and feedback) or (not existing_resume and resume_link):
if feedback:
master_ws.update_cell(i, feedback_col, feedback)
if resume_link:
master_ws.update_cell(i, resume_col, resume_link)
updates += 1
break
except Exception as e:
# Safely handle Response objects and other exceptions
msg = f"HTTP error {e.status_code} for URL {e.url}" if isinstance(e, requests.models.Response) else str(e)
print(f"[WARN] Error accessing response sheet for {position}: {msg}")
continue
return f"βœ… Sync complete! {updates} records updated."
except Exception as e:
msg = f"HTTP error {e.status_code} for URL {e.url}" if isinstance(e, requests.models.Response) else str(e)
return f"❌ Error during sync: {msg}"
# ===============================
# GRADIO INTERFACE
# ===============================
with gr.Blocks() as demo:
gr.Markdown("## πŸ“„ Resume Intelligence & Feedback Sync Tool")
recruiter_name_input = gr.Textbox(label="Recruiter's Name", placeholder="Enter your name")
source_sheet_name_input = gr.Textbox(label="Source Sheet Name", placeholder="e.g., Key Account Manager - JBS Consulting (Responses)")
jd_file = gr.File(label="Upload Job Description (PDF/DOCX)", type="filepath")
with gr.Row():
submit_btn = gr.Button("πŸš€ Run Resume Shortlisting")
sync_btn = gr.Button("πŸ” Sync Feedback & Resumes")
output_summary = gr.Textbox(label="Summary", lines=4)
output_details = gr.Textbox(label="Evaluation Details", lines=20)
submit_btn.click(
fn=process_job_description,
inputs=[jd_file, recruiter_name_input, source_sheet_name_input],
outputs=[output_summary, output_details]
)
sync_btn.click(
fn=sync_feedback_and_resumes,
inputs=[],
outputs=[output_summary]
)
# Launch the app for Hugging Face Spaces
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))