Spaces:

vernon1224
/

resume-screener

Running

App Files Files Community

vernon1224 commited on Apr 22

Commit

9db3523

verified ·

1 Parent(s): 9bd5ec5

Add Application and Requirements files

Browse files

Files changed (2) hide show

app.py +250 -0
requirements.txt +11 -0

app.py ADDED Viewed

	@@ -0,0 +1,250 @@

+# PDFs
+from langchain_community.document_loaders import PyPDFLoader
+from langchain.vectorstores import FAISS
+from langchain.embeddings import HuggingFaceEmbeddings as HFE
+from langchain.schema import Document
+# Groq
+from langchain_groq import ChatGroq
+from google.colab import userdata
+from langchain_core.messages import HumanMessage
+from langchain_community.chat_message_histories import ChatMessageHistory
+from langchain_core.chat_history import BaseChatMessageHistory
+from langchain_core.runnables.history import RunnableWithMessageHistory
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from groq import Groq
+# Expanded Queries
+import ast
+# Cross Encoder
+from sentence_transformers import CrossEncoder
+# BM25
+from rank_bm25 import BM25Okapi
+import numpy as np
+# Gradio
+import gradio as gr
+# GROQ_API = userdata.get('GROQ_API')
+embed_model = "sentence-transformers/all-MiniLM-L6-v2"
+cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
+prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", """
+        You are a helpful HR assistant specializing in the resume screening phase.
+        Your goal is to identify the best, most suitable, or highest-potential
+        candidates whose qualifications align well with the provided job title
+        and job description. If a question or request falls outside the scope
+        of resume screening and candidate alignment,
+        please respond with 'I don't know'.
+        """),
+        MessagesPlaceholder(variable_name="history", optional=True),
+        ("system", "Context: {context}"),
+        ("human", "{question}"),
+    ]
+)
+query_expansion_prompt = ChatPromptTemplate([
+    ("system", """
+    You are an expert HR assistant. Given a job description and a user query,
+    generate 3 alternative, diverse search queries that capture different
+    aspects of what makes a great candidate for this role. Each query should
+    focus on a different facet (e.g., skills, leadership, hands-on experience,
+                                        certifications, unique achievements).
+    If the job description is empty, generate a general job description for the role
+    mentioned in the user query and then create the 3 alternative search queries based on that.
+    Return ONLY the generated queries as a Python list of strings. Do not include
+    any other explanatory text or formatting.
+    """),
+    ("human", "Job Description: {job_description}\nUser Query: {user_query}")
+])
+JUDGE_PROMPT = """
+You are an expert recruiter. Given the job description, the user query, and the system's answer, rate:
+Faithfulness: Does the answer accurately reflect the resume(s) provided? (1-5)
+Relevance: Does the answer address the job requirements and user query? (1-5)
+Provide your feedback as follows:
+Faithfulness: <score>
+Relevance: <score>
+Justification: <brief explanation>
+Job Description:
+{job_description}
+User Query:
+{user_query}
+System Answer:
+{system_answer}
+"""
+def load_single_pdf(path):
+  loader = PyPDFLoader(path)
+  pages = loader.load()
+  full_text = "\n".join([page.page_content for page in pages])
+  return Document(page_content=full_text)
+def chunks_embed(chunks, model_name):
+  """Create embeds for doc chunks and store in FAISS"""
+  embeds = HFE(model_name=model_name)
+  # Create FAISS index
+  db = FAISS.from_documents(chunks, embeds)
+  print(f"Created FAISS Index with {len(chunks)} documents.")
+  return db
+def search_docs_mmr(db, query, k, fetch_k, lambda_mult):
+  """
+  Retrieve the most similar docs to the query using MMR
+  (Maximum Marginal Relevance)
+  """
+  if not db:
+    print("Error: No document database available")
+    return []
+  docs = db.max_marginal_relevance_search(
+      query, k=fetch_k, lambda_mult=lambda_mult
+  )
+  return docs
+def combine_results(results):
+  # Combine the content from results to create context
+  context = ""
+  for doc in results:
+    context += doc.page_content + "\n"
+  return context
+# 1. Prepare corpus for BM25
+def prepare_bm25_corpus(docs):
+  # Tokenize for BM25 (simple whitespace split, can improve)
+  return [doc.page_content.lower().split() for doc in docs]
+# 2. Initialize BM25
+def init_bm25(docs):
+  corpus = prepare_bm25_corpus(docs)
+  return BM25Okapi(corpus)
+# 3. BM25 Search
+def bm25_search(bm25, query, docs, top_k=10):
+  query_tokens = query.lower().split()
+  scores = bm25.get_scores(query_tokens)
+  top_indices = np.argsort(scores)[::-1][:top_k]
+  return [docs[i] for i in top_indices], [scores[i] for i in top_indices]
+# Hybrid Merge Functino
+def hybrid_merge(semantic_results, bm25_results):
+  # Merge by union, keeping order (semantic first, then BM25 if not already present)
+  seen = set()
+  merged = []
+  for doc in semantic_results + bm25_results:
+      if doc.page_content not in seen:
+          merged.append(doc)
+          seen.add(doc.page_content)
+  return merged
+def llm_judge_groq(api_key, job_description, user_query, system_answer):
+  judge_prompt = JUDGE_PROMPT.format(
+      job_description=job_description,
+      user_query=user_query,
+      system_answer=system_answer
+  )
+  client = Groq(api_key=api_key)
+  completion = client.chat.completions.create(
+      model="deepseek-r1-distill-llama-70b",
+      messages=[{"role": "user", "content": judge_prompt}],
+      max_tokens=512
+  )
+  return completion.choices[0].message.content
+def screen_resumes(api_key, job_description, user_query, files):
+  embed_model = "sentence-transformers/all-MiniLM-L6-v2"
+  cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
+  # Model and prompt setup (inside function, using user API key)
+  model = ChatGroq(model="llama-3.1-8b-instant", api_key=api_key)
+  history = {}
+  def get_session_history(session_id: str):
+      if session_id not in history:
+          history[session_id] = ChatMessageHistory()
+      return history[session_id]
+  with_message_history = RunnableWithMessageHistory(model, get_session_history)
+  chain = prompt | model
+  with_message_history = RunnableWithMessageHistory(
+      chain,
+      get_session_history,
+      input_messages_key="question",
+      history_messages_key="history"
+  )
+  # Load and process resumes
+  resume_paths = [file.name for file in files]
+  chunks = [load_single_pdf(path) for path in resume_paths]
+  embeds = chunks_embed(chunks, embed_model)
+  bm25 = init_bm25(chunks)
+  # Query Expansion
+  prompt_value = query_expansion_prompt.invoke({
+      "job_description": job_description,
+      "user_query": user_query,
+  })
+  expanded_queries_response = model.invoke(prompt_value.messages)
+  expanded_queries = ast.literal_eval(expanded_queries_response.content)
+  # Hybrid Retrieval
+  all_semantic = []
+  all_bm25 = []
+  for q in expanded_queries:
+      semantic_docs = search_docs_mmr(embeds, q, 10, 100, 0.7)
+      bm25_docs, _ = bm25_search(bm25, q, chunks, top_k=10)
+      all_semantic.extend(semantic_docs)
+      all_bm25.extend(bm25_docs)
+  merged_results = hybrid_merge(all_semantic, all_bm25)
+  unique_results_list = merged_results
+  # Cross-encoder Re-ranking
+  pairs = [(user_query, doc.page_content) for doc in unique_results_list]
+  scores = cross_encoder.predict(pairs)
+  ranked = sorted(zip(scores, unique_results_list), key=lambda x: x[0], reverse=True)
+  top_n = min(5, len(ranked))
+  ranked_top_n = [doc for score, doc in ranked[:top_n]]
+  context = "\n\n".join([doc.page_content for doc in ranked_top_n])
+  # LLM Final Reasoning
+  inputs = {
+      "context": context,
+      "question": user_query,
+  }
+  config = {"configurable": {"session_id": "GradioSession"}}
+  response = with_message_history.invoke(inputs, config=config)
+  system_output = response.content
+  # LLM-as-a-Judge Evaluation
+  judge_feedback = llm_judge_groq(api_key, job_description, user_query, system_output)
+  return system_output, context, judge_feedback
+demo = gr.Interface(
+    fn=screen_resumes,
+    inputs=[
+        gr.Textbox(label="Groq API Key", type="password", lines=1, placeholder="sk..."),
+        gr.Textbox(lines=4, label="Job Description"),
+        gr.Textbox(lines=2, label="User Query"),
+        gr.File(file_count="multiple", label="Upload Resume PDFs")
+    ],
+    outputs=[
+        gr.Textbox(label="Screening Result (LLM Output)"),
+        gr.Textbox(label="Top Ranked Resumes (Raw Text)"),
+        gr.Textbox(label="LLM-as-a-Judge Evaluation (DeepSeek)")
+    ],
+    title="Resume Screening Assistant (Hybrid + LLM-as-a-Judge)",
+    description="Enter your Groq API key, upload resumes, enter a job description and query, get the best candidates with explanations, and see an automated evaluation."
+)
+demo.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+gradio
+langchain
+langchain-community
+langchain-huggingface
+langchain-groq
+faiss-cpu
+pypdf
+torch
+sentence-transformers
+rank_bm25
+groq