Feedalytics_2.0

Sleeping

App Files Files Community

Feedalytics_2.0 / app.py

KhanOmerKhan

Update app.py

1d97694 verified 6 months ago

raw

history blame contribute delete

14.1 kB

	# -- coding: utf-8 --
	"""Hugging Face Deployment.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/18VOpfepQVq0IUPYF25R0ltS0U6_T1M0u
	"""
	import json
	import gspread
	import requests
	import tempfile
	import os
	import re
	from docx import Document
	import pdfplumber
	from google.oauth2.service_account import Credentials
	import gradio as gr
	import openai
	from datetime import datetime


	from googleapiclient.discovery import build
	from googleapiclient.http import MediaIoBaseDownload
	import io


	# ===============================
	# CONFIGURATION
	# ===============================

	# OpenAI API key from Hugging Face secret
	openai.api_key = os.environ.get("OPENAI_API_KEY")
	DESTINATION_SHEET_NAME = "Resume Shortlisting - Master"

	# Google Sheets credentials from Hugging Face secret
	import json
	import os
	from google.oauth2.service_account import Credentials
	import gspread

	# Load the JSON from Hugging Face secret
	gsheets_json = os.environ.get("GSHEETS_JSON")
	if not gsheets_json:
	raise ValueError("GSHEETS_JSON environment variable not set.")

	service_account_info = json.loads(gsheets_json)

	# Convert literal \n into actual newlines in the private key
	if "private_key" in service_account_info:
	service_account_info["private_key"] = service_account_info["private_key"].replace("\\n", "\n")

	GOOGLE_SHEET_CREDENTIALS = service_account_info

	# Create Google Sheets client
	creds = Credentials.from_service_account_info(GOOGLE_SHEET_CREDENTIALS,
	scopes=[
	"https://www.googleapis.com/auth/spreadsheets",
	"https://www.googleapis.com/auth/drive"
	])
	client = gspread.authorize(creds)

	# ===============================
	# GOOGLE SHEET CLIENT
	# ===============================

	def get_gsheet_client():
	scopes = [
	"https://www.googleapis.com/auth/spreadsheets",
	"https://www.googleapis.com/auth/drive"
	]
	credentials = Credentials.from_service_account_info(GOOGLE_SHEET_CREDENTIALS, scopes=scopes)
	return gspread.authorize(credentials)

	# ===============================
	# TEXT EXTRACTION HELPERS
	# ===============================

	def extract_text_from_docx(path):
	try:
	doc = Document(path)
	return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
	except Exception as e:
	print(f"[DOCX ERROR] {e}")
	return ""

	def extract_text_from_pdf(path):
	try:
	with pdfplumber.open(path) as pdf:
	return "\n".join([page.extract_text() or "" for page in pdf.pages])
	except Exception as e:
	print(f"[PDF ERROR] {e}")
	return ""


	def extract_text_from_jd(file_path):
	ext = os.path.splitext(file_path)[1].lower()
	if ext == ".pdf":
	with pdfplumber.open(file_path) as pdf:
	return "\n".join([page.extract_text() or "" for page in pdf.pages])
	elif ext == ".docx":
	doc = Document(file_path)
	return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
	else:
	return None

	# ===============================
	# RESUME DOWNLOAD & EXTRACTION
	# ===============================

	def download_file_from_drive(file_id):
	"""Download a Google Docs file as DOCX using the Drive API."""
	service = build('drive', 'v3', credentials=creds)
	request = service.files().export_media(
	fileId=file_id,
	mimeType='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
	)
	fh = io.BytesIO()
	downloader = MediaIoBaseDownload(fh, request)
	done = False
	while not done:
	status, done = downloader.next_chunk()
	fh.seek(0)
	return fh.read()




	from googleapiclient.discovery import build
	from googleapiclient.http import MediaIoBaseDownload
	import io

	def download_and_extract_resumes_from_sheet(source_sheet_name):
	"""
	Downloads resumes from a Google Sheet and extracts text from Google Docs or Drive files.
	Returns a list of tuples: (unique_id, resume_text)
	"""
	client = get_gsheet_client()
	sheet = client.open(source_sheet_name).get_worksheet(0)
	data = sheet.get_all_records()

	resume_column_name = "Resume Link"
	unique_id_column = "Unique ID"
	resume_entries = []

	# Create Drive API client
	drive_service = build('drive', 'v3', credentials=creds)

	for row in data:
	link = row.get(resume_column_name)
	unique_id = row.get(unique_id_column)
	if not link or not unique_id:
	continue

	# Extract file ID from link
	match = re.search(r"/d/([a-zA-Z0-9_-]+)", link)
	file_id = match.group(1) if match else None
	if not file_id:
	print(f"[WARN] Could not extract file ID from link: {link}")
	continue

	try:
	# Try exporting as DOCX first
	request = drive_service.files().export_media(fileId=file_id, mimeType='application/vnd.openxmlformats-officedocument.wordprocessingml.document')
	fh = io.BytesIO()
	downloader = MediaIoBaseDownload(fh, request)
	done = False
	while not done:
	status, done = downloader.next_chunk()
	fh.seek(0)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp_file:
	tmp_file.write(fh.read())
	tmp_path = tmp_file.name

	text = extract_text_from_docx(tmp_path)
	os.remove(tmp_path)

	if text.strip():
	resume_entries.append((unique_id.strip(), text.strip()))
	else:
	print(f"[WARN] No text extracted for resume ID {unique_id}")

	except Exception as e:
	print(f"[ERROR] Could not download {unique_id}: {e}")
	continue

	return resume_entries


	# ===============================
	# OPENAI EVALUATION
	# ===============================

	def evaluate_resumes(resume_entries, job_description):
	results = {"shortlisted": [], "potential": [], "rejected": []}

	for i, (unique_id, resume) in enumerate(resume_entries):
	try:
	prompt = [
	{
	"role": "system",
	"content": (
	"You are an AI hiring assistant. Given a job description and resume, return:\n"
	"1. Candidate name.\n2. A match score (0-100%).\n3. A short explanation (2-4 lines)."
	)
	},
	{
	"role": "user",
	"content": f"Job Description:\n{job_description}\n\nResume:\n{resume}"
	}
	]

	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo-16k",
	messages=prompt,
	temperature=0,
	max_tokens=500
	)
	content = response["choices"][0]["message"]["content"]

	score_match = re.search(r"(\d{1,3})%", content)
	score = int(score_match.group(1)) if score_match else 0
	status = "shortlisted" if score >= 90 else "potential" if score >= 65 else "rejected"

	results[status].append((unique_id, content))

	except Exception as e:
	results["rejected"].append((unique_id, f"[ERROR] {e}"))

	return results

	# ===============================
	# WRITE TO MASTER SHEET
	# ===============================

	def extract_name_and_score(text):
	name_match = re.search(r"(?i)^(.+?)\n", text)
	score_match = re.search(r"(\d{1,3})%", text)
	return name_match.group(1).strip() if name_match else "Unknown", score_match.group(1) if score_match else "0"


	def write_to_sheet(sheet, candidates, status, job_title, recruiter_name):
	for unique_id, feedback in candidates:
	name, score = extract_name_and_score(feedback)
	now = datetime.now()

	sheet.append_row([
	unique_id, # Unique ID
	name, # Candidate Name
	job_title, # Vacancy Position
	score, # Score
	status, # Status
	now.strftime("%Y-%m-%d"), # Date
	now.strftime("%Y-%m-%d %H:%M:%S"), # Timestamp
	feedback, # AI Feedback
	"", # Line Manager Feedback (empty)
	recruiter_name, # Recruiter
	"" # Resume (empty)
	])

	# ===============================
	# MAIN JOB PROCESS
	# ===============================

	def process_job_description(file_path, recruiter_name, source_sheet_name):
	try:
	jd_text = extract_text_from_jd(file_path)
	if not jd_text or not jd_text.strip():
	return "❌ Could not extract job description text.", ""

	job_title = os.path.splitext(os.path.basename(file_path))[0]
	resume_texts = download_and_extract_resumes_from_sheet(source_sheet_name)

	if not resume_texts:
	return "❌ No resumes extracted.", ""

	results = evaluate_resumes(resume_texts, jd_text)
	client = get_gsheet_client()
	dest_ws = client.open(DESTINATION_SHEET_NAME).sheet1

	write_to_sheet(dest_ws, results["shortlisted"], "shortlisted", job_title, recruiter_name)
	write_to_sheet(dest_ws, results["potential"], "potential", job_title, recruiter_name)
	write_to_sheet(dest_ws, results["rejected"], "not selected", job_title, recruiter_name)

	summary = (
	f"✅ Shortlisting Complete!\n\n"
	f"Shortlisted: {len(results['shortlisted'])}\n"
	f"Potential: {len(results['potential'])}\n"
	f"Rejected: {len(results['rejected'])}\n"
	)
	details = "\n\n".join([f"{c}\n{f}" for c, f in results["shortlisted"] + results["potential"] + results["rejected"]])
	return summary, details

	except Exception as e:
	import traceback
	print("[ERROR] process_job_description failed:")
	traceback.print_exc() # This will print the actual stack trace in Hugging Face logs
	return f"❌ Error: {str(e)}", ""

	# ===============================
	# SYNC FEEDBACK & RESUMES (NEW)
	# ===============================

	def sync_feedback_and_resumes():
	try:
	client = get_gsheet_client()
	master_ws = client.open(DESTINATION_SHEET_NAME).sheet1
	master_data = master_ws.get_all_records()
	updates = 0

	feedback_col = master_ws.find("Line Manager Feedback").col
	resume_col = master_ws.find("Resume").col

	for i, row in enumerate(master_data, start=2):
	unique_id = row.get("Unique ID")
	position = row.get("Vacancy Position")
	existing_feedback = row.get("Line Manager Feedback", "")
	existing_resume = row.get("Resume", "")

	if (existing_feedback and existing_feedback.strip()) and (existing_resume and existing_resume.strip()):
	continue

	if not unique_id or not position:
	continue

	try:
	folder_name = position
	response_sheet_name = f"{folder_name} - (Responses)"
	source_ws = client.open(response_sheet_name).get_worksheet(0)
	source_data = source_ws.get_all_records()

	for src_row in source_data:
	if str(src_row.get("Unique ID")).strip() == str(unique_id).strip():
	feedback = src_row.get("Line Manager Feedback", "")
	resume_link = src_row.get("Please upload your resume", "")

	if (not existing_feedback and feedback) or (not existing_resume and resume_link):
	if feedback:
	master_ws.update_cell(i, feedback_col, feedback)
	if resume_link:
	master_ws.update_cell(i, resume_col, resume_link)
	updates += 1
	break

	except Exception as e:
	# Safely handle Response objects and other exceptions
	msg = f"HTTP error {e.status_code} for URL {e.url}" if isinstance(e, requests.models.Response) else str(e)
	print(f"[WARN] Error accessing response sheet for {position}: {msg}")
	continue

	return f"✅ Sync complete! {updates} records updated."

	except Exception as e:
	msg = f"HTTP error {e.status_code} for URL {e.url}" if isinstance(e, requests.models.Response) else str(e)
	return f"❌ Error during sync: {msg}"

	# ===============================
	# GRADIO INTERFACE
	# ===============================

	with gr.Blocks() as demo:
	gr.Markdown("## 📄 Resume Intelligence & Feedback Sync Tool")

	recruiter_name_input = gr.Textbox(label="Recruiter's Name", placeholder="Enter your name")
	source_sheet_name_input = gr.Textbox(label="Source Sheet Name", placeholder="e.g., Key Account Manager - JBS Consulting (Responses)")
	jd_file = gr.File(label="Upload Job Description (PDF/DOCX)", type="filepath")

	with gr.Row():
	submit_btn = gr.Button("🚀 Run Resume Shortlisting")
	sync_btn = gr.Button("🔁 Sync Feedback & Resumes")

	output_summary = gr.Textbox(label="Summary", lines=4)
	output_details = gr.Textbox(label="Evaluation Details", lines=20)

	submit_btn.click(
	fn=process_job_description,
	inputs=[jd_file, recruiter_name_input, source_sheet_name_input],
	outputs=[output_summary, output_details]
	)

	sync_btn.click(
	fn=sync_feedback_and_resumes,
	inputs=[],
	outputs=[output_summary]
	)

	# Launch the app for Hugging Face Spaces
	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))