telcom commited on
Commit
f7b57d9
Β·
verified Β·
1 Parent(s): bebc177

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -29
app.py CHANGED
@@ -1,29 +1,43 @@
1
  import gradio as gr
2
  import torch
3
  import spaces
 
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
5
  from langchain_text_splitters import RecursiveCharacterTextSplitter
6
  from langchain_community.vectorstores import FAISS
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
8
  import PyPDF2
9
  from docx import Document
10
 
 
11
  class ResumeRAG:
12
  def __init__(self):
13
  self.has_cuda = torch.cuda.is_available()
14
  self.device = "cuda" if self.has_cuda else "cpu"
15
  print(f"Using device: {self.device}")
16
 
 
17
  self.embeddings = HuggingFaceEmbeddings(
18
  model_name="sentence-transformers/all-MiniLM-L6-v2",
19
  model_kwargs={"device": self.device},
20
  )
21
 
 
 
 
 
 
 
 
22
  model_name = "mistralai/Mistral-7B-Instruct-v0.2"
23
 
24
  if not self.has_cuda:
25
- raise RuntimeError("GPU not available. Set Space hardware to GPU or use the CPU fallback option.")
 
 
26
 
 
27
  quantization_config = BitsAndBytesConfig(
28
  load_in_4bit=True,
29
  bnb_4bit_compute_dtype=torch.float16,
@@ -31,43 +45,58 @@ class ResumeRAG:
31
  bnb_4bit_quant_type="nf4",
32
  )
33
 
34
- print("Loading model...")
35
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
36
  self.model = AutoModelForCausalLM.from_pretrained(
37
  model_name,
38
  quantization_config=quantization_config,
39
- device_map="auto",
40
- trust_remote_code=True,
41
  )
42
 
43
- self.vector_store = None
44
- self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
 
45
 
46
  def extract_text_from_pdf(self, file_path: str) -> str:
47
- with open(file_path, "rb") as f:
48
- reader = PyPDF2.PdfReader(f)
49
- return "".join([(p.extract_text() or "") for p in reader.pages])
 
 
 
50
 
51
  def extract_text_from_docx(self, file_path: str) -> str:
52
- doc = Document(file_path)
53
- return "\n".join([p.text for p in doc.paragraphs])
 
 
 
54
 
55
  def process_resume(self, file) -> str:
56
  if file is None:
57
  return "Please upload a resume file."
58
 
59
  file_path = file.name
60
- if file_path.endswith(".pdf"):
61
  text = self.extract_text_from_pdf(file_path)
62
- elif file_path.endswith(".docx"):
63
  text = self.extract_text_from_docx(file_path)
64
  else:
65
  return "Unsupported file format. Please upload PDF or DOCX."
66
 
 
 
 
67
  if not text.strip():
68
  return "No text could be extracted from the resume."
69
 
70
  chunks = self.text_splitter.split_text(text)
 
 
 
71
  self.vector_store = FAISS.from_texts(chunks, self.embeddings)
72
  return f"βœ… Resume processed successfully! Extracted {len(chunks)} text chunks."
73
 
@@ -79,10 +108,14 @@ Context:
79
 
80
  Question: {question}
81
 
82
- Answer only from the context. If missing, say it is not in the resume. [/INST]"""
83
 
84
- # IMPORTANT: do NOT push inputs to self.device when device_map="auto"
85
  inputs = self.tokenizer(prompt, return_tensors="pt")
 
 
 
 
 
86
  with torch.no_grad():
87
  outputs = self.model.generate(
88
  **inputs,
@@ -94,47 +127,108 @@ Answer only from the context. If missing, say it is not in the resume. [/INST]""
94
  )
95
 
96
  text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
97
- return text.split("[/INST]")[-1].strip()
 
 
 
 
98
 
99
  def query(self, question: str):
100
  if self.vector_store is None:
101
  return "Please upload a resume first.", ""
 
102
  if not question.strip():
103
  return "Please enter a question.", ""
104
 
105
  docs = self.vector_store.similarity_search(question, k=3)
106
  context = "\n\n".join([d.page_content for d in docs])
 
107
  answer = self.generate_answer(question, context)
108
 
109
- torch.cuda.empty_cache()
 
 
110
  return answer, context
111
 
 
 
112
  rag_system = ResumeRAG()
113
 
114
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
115
- gr.Markdown("# πŸ“„ Resume RAG Q&A System")
 
 
 
 
 
 
 
116
 
117
  with gr.Row():
118
  with gr.Column(scale=1):
119
- file_input = gr.File(label="Upload PDF or DOCX", file_types=[".pdf", ".docx"])
120
- upload_btn = gr.Button("Process Resume", variant="primary")
 
 
 
 
121
  upload_status = gr.Textbox(label="Status", interactive=False)
122
 
 
 
 
 
 
 
 
 
 
 
 
123
  with gr.Column(scale=2):
124
- question_input = gr.Textbox(label="Your Question", lines=2)
125
- submit_btn = gr.Button("Get Answer", variant="primary")
126
- answer_output = gr.Textbox(label="Answer", lines=8, interactive=False)
 
 
 
 
 
 
 
 
 
 
 
127
  with gr.Accordion("πŸ“š Retrieved Context", open=False):
128
- context_output = gr.Textbox(label="Relevant Resume Sections", lines=6, interactive=False)
 
 
 
 
129
 
130
- # Wrap the callback so Spaces sees a GPU-decorated function
131
  @spaces.GPU
132
  def query_gpu(q):
133
  return rag_system.query(q)
134
 
135
- upload_btn.click(rag_system.process_resume, inputs=[file_input], outputs=[upload_status])
136
- submit_btn.click(query_gpu, inputs=[question_input], outputs=[answer_output, context_output])
137
- question_input.submit(query_gpu, inputs=[question_input], outputs=[answer_output, context_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  if __name__ == "__main__":
140
- demo.launch()
 
1
  import gradio as gr
2
  import torch
3
  import spaces
4
+
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
6
  from langchain_text_splitters import RecursiveCharacterTextSplitter
7
  from langchain_community.vectorstores import FAISS
8
  from langchain_community.embeddings import HuggingFaceEmbeddings
9
+
10
  import PyPDF2
11
  from docx import Document
12
 
13
+
14
  class ResumeRAG:
15
  def __init__(self):
16
  self.has_cuda = torch.cuda.is_available()
17
  self.device = "cuda" if self.has_cuda else "cpu"
18
  print(f"Using device: {self.device}")
19
 
20
+ # Embeddings (small + fast)
21
  self.embeddings = HuggingFaceEmbeddings(
22
  model_name="sentence-transformers/all-MiniLM-L6-v2",
23
  model_kwargs={"device": self.device},
24
  )
25
 
26
+ self.text_splitter = RecursiveCharacterTextSplitter(
27
+ chunk_size=500,
28
+ chunk_overlap=50
29
+ )
30
+
31
+ self.vector_store = None
32
+
33
  model_name = "mistralai/Mistral-7B-Instruct-v0.2"
34
 
35
  if not self.has_cuda:
36
+ raise RuntimeError(
37
+ "No CUDA GPU detected. Use a GPU Space/ZeroGPU, or switch to a smaller CPU model."
38
+ )
39
 
40
+ # 4-bit quantization for GPU efficiency
41
  quantization_config = BitsAndBytesConfig(
42
  load_in_4bit=True,
43
  bnb_4bit_compute_dtype=torch.float16,
 
45
  bnb_4bit_quant_type="nf4",
46
  )
47
 
48
+ print("Loading tokenizer...")
49
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
50
+
51
+ print("Loading model...")
52
  self.model = AutoModelForCausalLM.from_pretrained(
53
  model_name,
54
  quantization_config=quantization_config,
55
+ device_map="auto", # important for Spaces
56
+ trust_remote_code=True
57
  )
58
 
59
+ # Ensure pad token exists
60
+ if self.tokenizer.pad_token_id is None:
61
+ self.tokenizer.pad_token = self.tokenizer.eos_token
62
 
63
  def extract_text_from_pdf(self, file_path: str) -> str:
64
+ try:
65
+ with open(file_path, "rb") as f:
66
+ reader = PyPDF2.PdfReader(f)
67
+ return "".join([(p.extract_text() or "") for p in reader.pages])
68
+ except Exception as e:
69
+ return f"Error reading PDF: {e}"
70
 
71
  def extract_text_from_docx(self, file_path: str) -> str:
72
+ try:
73
+ doc = Document(file_path)
74
+ return "\n".join([p.text for p in doc.paragraphs])
75
+ except Exception as e:
76
+ return f"Error reading DOCX: {e}"
77
 
78
  def process_resume(self, file) -> str:
79
  if file is None:
80
  return "Please upload a resume file."
81
 
82
  file_path = file.name
83
+ if file_path.lower().endswith(".pdf"):
84
  text = self.extract_text_from_pdf(file_path)
85
+ elif file_path.lower().endswith(".docx"):
86
  text = self.extract_text_from_docx(file_path)
87
  else:
88
  return "Unsupported file format. Please upload PDF or DOCX."
89
 
90
+ if text.startswith("Error"):
91
+ return text
92
+
93
  if not text.strip():
94
  return "No text could be extracted from the resume."
95
 
96
  chunks = self.text_splitter.split_text(text)
97
+ if not chunks:
98
+ return "No text chunks could be created from the resume."
99
+
100
  self.vector_store = FAISS.from_texts(chunks, self.embeddings)
101
  return f"βœ… Resume processed successfully! Extracted {len(chunks)} text chunks."
102
 
 
108
 
109
  Question: {question}
110
 
111
+ Answer only from the context. If the answer is not in the context, say it is not in the resume. [/INST]"""
112
 
 
113
  inputs = self.tokenizer(prompt, return_tensors="pt")
114
+
115
+ # FIX: move inputs onto the SAME device as the model's embedding weights
116
+ target_device = self.model.get_input_embeddings().weight.device
117
+ inputs = {k: v.to(target_device) for k, v in inputs.items()}
118
+
119
  with torch.no_grad():
120
  outputs = self.model.generate(
121
  **inputs,
 
127
  )
128
 
129
  text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
130
+
131
+ # If the full prompt is included, return only the last segment
132
+ if "[/INST]" in text:
133
+ return text.split("[/INST]")[-1].strip()
134
+ return text.strip()
135
 
136
  def query(self, question: str):
137
  if self.vector_store is None:
138
  return "Please upload a resume first.", ""
139
+
140
  if not question.strip():
141
  return "Please enter a question.", ""
142
 
143
  docs = self.vector_store.similarity_search(question, k=3)
144
  context = "\n\n".join([d.page_content for d in docs])
145
+
146
  answer = self.generate_answer(question, context)
147
 
148
+ if torch.cuda.is_available():
149
+ torch.cuda.empty_cache()
150
+
151
  return answer, context
152
 
153
+
154
+ print("Initializing Resume RAG System...")
155
  rag_system = ResumeRAG()
156
 
157
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
158
+ gr.Markdown(
159
+ """
160
+ # πŸ“„ Resume RAG Q&A System
161
+ Powered by Mistral-7B + FAISS vector search
162
+
163
+ Upload your resume and ask questions about experience, skills, education, and more.
164
+ """
165
+ )
166
 
167
  with gr.Row():
168
  with gr.Column(scale=1):
169
+ gr.Markdown("### πŸ“€ Upload Resume")
170
+ file_input = gr.File(
171
+ label="Upload PDF or DOCX",
172
+ file_types=[".pdf", ".docx"]
173
+ )
174
+ upload_btn = gr.Button("Process Resume", variant="primary", size="lg")
175
  upload_status = gr.Textbox(label="Status", interactive=False)
176
 
177
+ gr.Markdown(
178
+ """
179
+ ---
180
+ **Example Questions:**
181
+ - What programming languages does the candidate know?
182
+ - Summarize the work experience
183
+ - What is the education background?
184
+ - List all technical skills
185
+ """
186
+ )
187
+
188
  with gr.Column(scale=2):
189
+ gr.Markdown("### πŸ’¬ Ask Questions")
190
+ question_input = gr.Textbox(
191
+ label="Your Question",
192
+ placeholder="e.g., What are the candidate's key skills?",
193
+ lines=2
194
+ )
195
+ submit_btn = gr.Button("Get Answer", variant="primary", size="lg")
196
+
197
+ answer_output = gr.Textbox(
198
+ label="Answer",
199
+ lines=8,
200
+ interactive=False
201
+ )
202
+
203
  with gr.Accordion("πŸ“š Retrieved Context", open=False):
204
+ context_output = gr.Textbox(
205
+ label="Relevant Resume Sections",
206
+ lines=6,
207
+ interactive=False
208
+ )
209
 
210
+ # GPU-decorated handler for ZeroGPU/Spaces GPU
211
  @spaces.GPU
212
  def query_gpu(q):
213
  return rag_system.query(q)
214
 
215
+ upload_btn.click(
216
+ fn=rag_system.process_resume,
217
+ inputs=[file_input],
218
+ outputs=[upload_status]
219
+ )
220
+
221
+ submit_btn.click(
222
+ fn=query_gpu,
223
+ inputs=[question_input],
224
+ outputs=[answer_output, context_output]
225
+ )
226
+
227
+ question_input.submit(
228
+ fn=query_gpu,
229
+ inputs=[question_input],
230
+ outputs=[answer_output, context_output]
231
+ )
232
 
233
  if __name__ == "__main__":
234
+ demo.launch(share=True)