Spaces:

vernon1224
/

resume-screener

Running

App Files Files Community

resume-screener / app.py

vernon1224

Update app.py

a438728 verified 8 months ago

raw

history blame contribute delete

8.59 kB

	# PDFs
	from langchain_community.document_loaders import PyPDFLoader
	from langchain.vectorstores import FAISS
	from langchain.embeddings import HuggingFaceEmbeddings as HFE
	from langchain.schema import Document

	# Groq
	from langchain_groq import ChatGroq
	from langchain_core.messages import HumanMessage
	from langchain_community.chat_message_histories import ChatMessageHistory
	from langchain_core.chat_history import BaseChatMessageHistory
	from langchain_core.runnables.history import RunnableWithMessageHistory
	from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
	from groq import Groq

	# Expanded Queries
	import ast

	# Cross Encoder
	from sentence_transformers import CrossEncoder

	# BM25
	from rank_bm25 import BM25Okapi
	import numpy as np

	# Gradio
	import gradio as gr

	# GROQ_API = userdata.get('GROQ_API')
	embed_model = "sentence-transformers/all-MiniLM-L6-v2"
	cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')

	prompt = ChatPromptTemplate.from_messages(
	[
	("system", """
	You are a helpful HR assistant specializing in the resume screening phase.
	Your goal is to identify the best, most suitable, or highest-potential
	candidates whose qualifications align well with the provided job title
	and job description. If a question or request falls outside the scope
	of resume screening and candidate alignment,
	please respond with 'I don't know'.
	"""),
	MessagesPlaceholder(variable_name="history", optional=True),
	("system", "Context: {context}"),
	("human", "{question}"),
	]
	)

	query_expansion_prompt = ChatPromptTemplate([
	("system", """
	You are an expert HR assistant. Given a job description and a user query,
	generate 3 alternative, diverse search queries that capture different
	aspects of what makes a great candidate for this role. Each query should
	focus on a different facet (e.g., skills, leadership, hands-on experience,
	certifications, unique achievements).

	If the job description is empty, generate a general job description for the role
	mentioned in the user query and then create the 3 alternative search queries based on that.

	Return ONLY the generated queries as a Python list of strings. Do not include
	any other explanatory text or formatting.
	"""),
	("human", "Job Description: {job_description}\nUser Query: {user_query}")
	])

	JUDGE_PROMPT = """
	You are an expert recruiter. Given the job description, the user query, and the system's answer, rate:

	Faithfulness: Does the answer accurately reflect the resume(s) provided? (1-5)
	Relevance: Does the answer address the job requirements and user query? (1-5)

	Provide your feedback as follows:

	Faithfulness: <score>
	Relevance: <score>
	Justification: <brief explanation>

	Job Description:
	{job_description}

	User Query:
	{user_query}

	System Answer:
	{system_answer}
	"""

	def load_single_pdf(path):
	loader = PyPDFLoader(path)
	pages = loader.load()
	full_text = "\n".join([page.page_content for page in pages])
	return Document(page_content=full_text)

	def chunks_embed(chunks, model_name):
	"""Create embeds for doc chunks and store in FAISS"""
	embeds = HFE(model_name=model_name)
	# Create FAISS index
	db = FAISS.from_documents(chunks, embeds)
	print(f"Created FAISS Index with {len(chunks)} documents.")
	return db

	def search_docs_mmr(db, query, k, fetch_k, lambda_mult):
	"""
	Retrieve the most similar docs to the query using MMR
	(Maximum Marginal Relevance)
	"""
	if not db:
	print("Error: No document database available")
	return []

	docs = db.max_marginal_relevance_search(
	query, k=fetch_k, lambda_mult=lambda_mult
	)
	return docs

	def combine_results(results):
	# Combine the content from results to create context
	context = ""
	for doc in results:
	context += doc.page_content + "\n"
	return context

	# 1. Prepare corpus for BM25
	def prepare_bm25_corpus(docs):
	# Tokenize for BM25 (simple whitespace split, can improve)
	return [doc.page_content.lower().split() for doc in docs]

	# 2. Initialize BM25
	def init_bm25(docs):
	corpus = prepare_bm25_corpus(docs)
	return BM25Okapi(corpus)

	# 3. BM25 Search
	def bm25_search(bm25, query, docs, top_k=10):
	query_tokens = query.lower().split()
	scores = bm25.get_scores(query_tokens)
	top_indices = np.argsort(scores)[::-1][:top_k]
	return [docs[i] for i in top_indices], [scores[i] for i in top_indices]

	# Hybrid Merge Functino
	def hybrid_merge(semantic_results, bm25_results):
	# Merge by union, keeping order (semantic first, then BM25 if not already present)
	seen = set()
	merged = []
	for doc in semantic_results + bm25_results:
	if doc.page_content not in seen:
	merged.append(doc)
	seen.add(doc.page_content)
	return merged

	def llm_judge_groq(api_key, job_description, user_query, system_answer):
	judge_prompt = JUDGE_PROMPT.format(
	job_description=job_description,
	user_query=user_query,
	system_answer=system_answer
	)
	client = Groq(api_key=api_key)
	completion = client.chat.completions.create(
	model="deepseek-r1-distill-llama-70b",
	messages=[{"role": "user", "content": judge_prompt}],
	max_tokens=512
	)
	return completion.choices[0].message.content

	def screen_resumes(api_key, job_description, user_query, files):
	embed_model = "sentence-transformers/all-MiniLM-L6-v2"
	cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')

	# Model and prompt setup (inside function, using user API key)
	model = ChatGroq(model="llama-3.1-8b-instant", api_key=api_key)
	history = {}
	def get_session_history(session_id: str):
	if session_id not in history:
	history[session_id] = ChatMessageHistory()
	return history[session_id]
	with_message_history = RunnableWithMessageHistory(model, get_session_history)
	chain = prompt \| model
	with_message_history = RunnableWithMessageHistory(
	chain,
	get_session_history,
	input_messages_key="question",
	history_messages_key="history"
	)

	# Load and process resumes
	resume_paths = [file.name for file in files]
	chunks = [load_single_pdf(path) for path in resume_paths]
	embeds = chunks_embed(chunks, embed_model)
	bm25 = init_bm25(chunks)

	# Query Expansion
	prompt_value = query_expansion_prompt.invoke({
	"job_description": job_description,
	"user_query": user_query,
	})
	expanded_queries_response = model.invoke(prompt_value.messages)
	expanded_queries = ast.literal_eval(expanded_queries_response.content)

	# Hybrid Retrieval
	all_semantic = []
	all_bm25 = []
	for q in expanded_queries:
	semantic_docs = search_docs_mmr(embeds, q, 10, 100, 0.7)
	bm25_docs, _ = bm25_search(bm25, q, chunks, top_k=10)
	all_semantic.extend(semantic_docs)
	all_bm25.extend(bm25_docs)
	merged_results = hybrid_merge(all_semantic, all_bm25)
	unique_results_list = merged_results

	# Cross-encoder Re-ranking
	pairs = [(user_query, doc.page_content) for doc in unique_results_list]
	scores = cross_encoder.predict(pairs)
	ranked = sorted(zip(scores, unique_results_list), key=lambda x: x[0], reverse=True)
	top_n = min(5, len(ranked))
	ranked_top_n = [doc for score, doc in ranked[:top_n]]
	context = "\n\n".join([doc.page_content for doc in ranked_top_n])

	# LLM Final Reasoning
	inputs = {
	"context": context,
	"question": user_query,
	}
	config = {"configurable": {"session_id": "GradioSession"}}
	response = with_message_history.invoke(inputs, config=config)
	system_output = response.content

	# LLM-as-a-Judge Evaluation
	judge_feedback = llm_judge_groq(api_key, job_description, user_query, system_output)

	return system_output, context, judge_feedback

	demo = gr.Interface(
	fn=screen_resumes,
	inputs=[
	gr.Textbox(label="Groq API Key", type="password", lines=1, placeholder="sk..."),
	gr.Textbox(lines=4, label="Job Description"),
	gr.Textbox(lines=2, label="User Query"),
	gr.File(file_count="multiple", label="Upload Resume PDFs")
	],
	outputs=[
	gr.Textbox(label="Screening Result (LLM Output)"),
	gr.Textbox(label="Top Ranked Resumes (Raw Text)"),
	gr.Textbox(label="LLM-as-a-Judge Evaluation (DeepSeek)")
	],
	title="Resume Screening Assistant (Hybrid + LLM-as-a-Judge)",
	description="Enter your Groq API key, upload resumes, enter a job description and query, get the best candidates with explanations, and see an automated evaluation."
	)

	demo.launch(share=True)