vernon1224 commited on
Commit
9db3523
·
verified ·
1 Parent(s): 9bd5ec5

Add Application and Requirements files

Browse files
Files changed (2) hide show
  1. app.py +250 -0
  2. requirements.txt +11 -0
app.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PDFs
2
+ from langchain_community.document_loaders import PyPDFLoader
3
+ from langchain.vectorstores import FAISS
4
+ from langchain.embeddings import HuggingFaceEmbeddings as HFE
5
+ from langchain.schema import Document
6
+
7
+ # Groq
8
+ from langchain_groq import ChatGroq
9
+ from google.colab import userdata
10
+ from langchain_core.messages import HumanMessage
11
+ from langchain_community.chat_message_histories import ChatMessageHistory
12
+ from langchain_core.chat_history import BaseChatMessageHistory
13
+ from langchain_core.runnables.history import RunnableWithMessageHistory
14
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
15
+ from groq import Groq
16
+
17
+ # Expanded Queries
18
+ import ast
19
+
20
+ # Cross Encoder
21
+ from sentence_transformers import CrossEncoder
22
+
23
+ # BM25
24
+ from rank_bm25 import BM25Okapi
25
+ import numpy as np
26
+
27
+ # Gradio
28
+ import gradio as gr
29
+
30
+ # GROQ_API = userdata.get('GROQ_API')
31
+ embed_model = "sentence-transformers/all-MiniLM-L6-v2"
32
+ cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
33
+
34
+ prompt = ChatPromptTemplate.from_messages(
35
+ [
36
+ ("system", """
37
+ You are a helpful HR assistant specializing in the resume screening phase.
38
+ Your goal is to identify the best, most suitable, or highest-potential
39
+ candidates whose qualifications align well with the provided job title
40
+ and job description. If a question or request falls outside the scope
41
+ of resume screening and candidate alignment,
42
+ please respond with 'I don't know'.
43
+ """),
44
+ MessagesPlaceholder(variable_name="history", optional=True),
45
+ ("system", "Context: {context}"),
46
+ ("human", "{question}"),
47
+ ]
48
+ )
49
+
50
+ query_expansion_prompt = ChatPromptTemplate([
51
+ ("system", """
52
+ You are an expert HR assistant. Given a job description and a user query,
53
+ generate 3 alternative, diverse search queries that capture different
54
+ aspects of what makes a great candidate for this role. Each query should
55
+ focus on a different facet (e.g., skills, leadership, hands-on experience,
56
+ certifications, unique achievements).
57
+
58
+ If the job description is empty, generate a general job description for the role
59
+ mentioned in the user query and then create the 3 alternative search queries based on that.
60
+
61
+ Return ONLY the generated queries as a Python list of strings. Do not include
62
+ any other explanatory text or formatting.
63
+ """),
64
+ ("human", "Job Description: {job_description}\nUser Query: {user_query}")
65
+ ])
66
+
67
+ JUDGE_PROMPT = """
68
+ You are an expert recruiter. Given the job description, the user query, and the system's answer, rate:
69
+
70
+ Faithfulness: Does the answer accurately reflect the resume(s) provided? (1-5)
71
+ Relevance: Does the answer address the job requirements and user query? (1-5)
72
+
73
+ Provide your feedback as follows:
74
+
75
+ Faithfulness: <score>
76
+ Relevance: <score>
77
+ Justification: <brief explanation>
78
+
79
+ Job Description:
80
+ {job_description}
81
+
82
+ User Query:
83
+ {user_query}
84
+
85
+ System Answer:
86
+ {system_answer}
87
+ """
88
+
89
+ def load_single_pdf(path):
90
+ loader = PyPDFLoader(path)
91
+ pages = loader.load()
92
+ full_text = "\n".join([page.page_content for page in pages])
93
+ return Document(page_content=full_text)
94
+
95
+ def chunks_embed(chunks, model_name):
96
+ """Create embeds for doc chunks and store in FAISS"""
97
+ embeds = HFE(model_name=model_name)
98
+ # Create FAISS index
99
+ db = FAISS.from_documents(chunks, embeds)
100
+ print(f"Created FAISS Index with {len(chunks)} documents.")
101
+ return db
102
+
103
+ def search_docs_mmr(db, query, k, fetch_k, lambda_mult):
104
+ """
105
+ Retrieve the most similar docs to the query using MMR
106
+ (Maximum Marginal Relevance)
107
+ """
108
+ if not db:
109
+ print("Error: No document database available")
110
+ return []
111
+
112
+ docs = db.max_marginal_relevance_search(
113
+ query, k=fetch_k, lambda_mult=lambda_mult
114
+ )
115
+ return docs
116
+
117
+ def combine_results(results):
118
+ # Combine the content from results to create context
119
+ context = ""
120
+ for doc in results:
121
+ context += doc.page_content + "\n"
122
+ return context
123
+
124
+ # 1. Prepare corpus for BM25
125
+ def prepare_bm25_corpus(docs):
126
+ # Tokenize for BM25 (simple whitespace split, can improve)
127
+ return [doc.page_content.lower().split() for doc in docs]
128
+
129
+ # 2. Initialize BM25
130
+ def init_bm25(docs):
131
+ corpus = prepare_bm25_corpus(docs)
132
+ return BM25Okapi(corpus)
133
+
134
+ # 3. BM25 Search
135
+ def bm25_search(bm25, query, docs, top_k=10):
136
+ query_tokens = query.lower().split()
137
+ scores = bm25.get_scores(query_tokens)
138
+ top_indices = np.argsort(scores)[::-1][:top_k]
139
+ return [docs[i] for i in top_indices], [scores[i] for i in top_indices]
140
+
141
+ # Hybrid Merge Functino
142
+ def hybrid_merge(semantic_results, bm25_results):
143
+ # Merge by union, keeping order (semantic first, then BM25 if not already present)
144
+ seen = set()
145
+ merged = []
146
+ for doc in semantic_results + bm25_results:
147
+ if doc.page_content not in seen:
148
+ merged.append(doc)
149
+ seen.add(doc.page_content)
150
+ return merged
151
+
152
+ def llm_judge_groq(api_key, job_description, user_query, system_answer):
153
+ judge_prompt = JUDGE_PROMPT.format(
154
+ job_description=job_description,
155
+ user_query=user_query,
156
+ system_answer=system_answer
157
+ )
158
+ client = Groq(api_key=api_key)
159
+ completion = client.chat.completions.create(
160
+ model="deepseek-r1-distill-llama-70b",
161
+ messages=[{"role": "user", "content": judge_prompt}],
162
+ max_tokens=512
163
+ )
164
+ return completion.choices[0].message.content
165
+
166
+ def screen_resumes(api_key, job_description, user_query, files):
167
+ embed_model = "sentence-transformers/all-MiniLM-L6-v2"
168
+ cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
169
+
170
+ # Model and prompt setup (inside function, using user API key)
171
+ model = ChatGroq(model="llama-3.1-8b-instant", api_key=api_key)
172
+ history = {}
173
+ def get_session_history(session_id: str):
174
+ if session_id not in history:
175
+ history[session_id] = ChatMessageHistory()
176
+ return history[session_id]
177
+ with_message_history = RunnableWithMessageHistory(model, get_session_history)
178
+ chain = prompt | model
179
+ with_message_history = RunnableWithMessageHistory(
180
+ chain,
181
+ get_session_history,
182
+ input_messages_key="question",
183
+ history_messages_key="history"
184
+ )
185
+
186
+ # Load and process resumes
187
+ resume_paths = [file.name for file in files]
188
+ chunks = [load_single_pdf(path) for path in resume_paths]
189
+ embeds = chunks_embed(chunks, embed_model)
190
+ bm25 = init_bm25(chunks)
191
+
192
+ # Query Expansion
193
+ prompt_value = query_expansion_prompt.invoke({
194
+ "job_description": job_description,
195
+ "user_query": user_query,
196
+ })
197
+ expanded_queries_response = model.invoke(prompt_value.messages)
198
+ expanded_queries = ast.literal_eval(expanded_queries_response.content)
199
+
200
+ # Hybrid Retrieval
201
+ all_semantic = []
202
+ all_bm25 = []
203
+ for q in expanded_queries:
204
+ semantic_docs = search_docs_mmr(embeds, q, 10, 100, 0.7)
205
+ bm25_docs, _ = bm25_search(bm25, q, chunks, top_k=10)
206
+ all_semantic.extend(semantic_docs)
207
+ all_bm25.extend(bm25_docs)
208
+ merged_results = hybrid_merge(all_semantic, all_bm25)
209
+ unique_results_list = merged_results
210
+
211
+ # Cross-encoder Re-ranking
212
+ pairs = [(user_query, doc.page_content) for doc in unique_results_list]
213
+ scores = cross_encoder.predict(pairs)
214
+ ranked = sorted(zip(scores, unique_results_list), key=lambda x: x[0], reverse=True)
215
+ top_n = min(5, len(ranked))
216
+ ranked_top_n = [doc for score, doc in ranked[:top_n]]
217
+ context = "\n\n".join([doc.page_content for doc in ranked_top_n])
218
+
219
+ # LLM Final Reasoning
220
+ inputs = {
221
+ "context": context,
222
+ "question": user_query,
223
+ }
224
+ config = {"configurable": {"session_id": "GradioSession"}}
225
+ response = with_message_history.invoke(inputs, config=config)
226
+ system_output = response.content
227
+
228
+ # LLM-as-a-Judge Evaluation
229
+ judge_feedback = llm_judge_groq(api_key, job_description, user_query, system_output)
230
+
231
+ return system_output, context, judge_feedback
232
+
233
+ demo = gr.Interface(
234
+ fn=screen_resumes,
235
+ inputs=[
236
+ gr.Textbox(label="Groq API Key", type="password", lines=1, placeholder="sk..."),
237
+ gr.Textbox(lines=4, label="Job Description"),
238
+ gr.Textbox(lines=2, label="User Query"),
239
+ gr.File(file_count="multiple", label="Upload Resume PDFs")
240
+ ],
241
+ outputs=[
242
+ gr.Textbox(label="Screening Result (LLM Output)"),
243
+ gr.Textbox(label="Top Ranked Resumes (Raw Text)"),
244
+ gr.Textbox(label="LLM-as-a-Judge Evaluation (DeepSeek)")
245
+ ],
246
+ title="Resume Screening Assistant (Hybrid + LLM-as-a-Judge)",
247
+ description="Enter your Groq API key, upload resumes, enter a job description and query, get the best candidates with explanations, and see an automated evaluation."
248
+ )
249
+
250
+ demo.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ langchain
3
+ langchain-community
4
+ langchain-huggingface
5
+ langchain-groq
6
+ faiss-cpu
7
+ pypdf
8
+ torch
9
+ sentence-transformers
10
+ rank_bm25
11
+ groq