ResumeQA / app.py
telcom's picture
Update app.py
36c4581 verified
import gradio as gr
import torch
import spaces
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
import PyPDF2
from docx import Document
class ResumeRAG:
def __init__(self):
self.has_cuda = torch.cuda.is_available()
self.device = "cuda" if self.has_cuda else "cpu"
print(f"Using device: {self.device}")
# Embeddings (small + fast)
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={"device": self.device},
)
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50
)
self.vector_store = None
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
if not self.has_cuda:
raise RuntimeError(
"No CUDA GPU detected. Use a GPU Space/ZeroGPU, or switch to a smaller CPU model."
)
# 4-bit quantization for GPU efficiency
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
)
print("Loading tokenizer...")
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
print("Loading model...")
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=quantization_config,
device_map="auto", # important for Spaces
trust_remote_code=True
)
# Ensure pad token exists
if self.tokenizer.pad_token_id is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
def extract_text_from_pdf(self, file_path: str) -> str:
try:
with open(file_path, "rb") as f:
reader = PyPDF2.PdfReader(f)
return "".join([(p.extract_text() or "") for p in reader.pages])
except Exception as e:
return f"Error reading PDF: {e}"
def extract_text_from_docx(self, file_path: str) -> str:
try:
doc = Document(file_path)
return "\n".join([p.text for p in doc.paragraphs])
except Exception as e:
return f"Error reading DOCX: {e}"
def process_resume(self, file) -> str:
if file is None:
return "Please upload a resume file."
file_path = file.name
if file_path.lower().endswith(".pdf"):
text = self.extract_text_from_pdf(file_path)
elif file_path.lower().endswith(".docx"):
text = self.extract_text_from_docx(file_path)
else:
return "Unsupported file format. Please upload PDF or DOCX."
if text.startswith("Error"):
return text
if not text.strip():
return "No text could be extracted from the resume."
chunks = self.text_splitter.split_text(text)
if not chunks:
return "No text chunks could be created from the resume."
self.vector_store = FAISS.from_texts(chunks, self.embeddings)
return f"βœ… Resume processed successfully! Extracted {len(chunks)} text chunks."
def generate_answer(self, question: str, context: str) -> str:
prompt = f"""[INST] You are a helpful assistant analyzing a resume.
Context:
{context}
Question: {question}
Answer only from the context. If the answer is not in the context, say it is not in the resume. [/INST]"""
inputs = self.tokenizer(prompt, return_tensors="pt")
# FIX: move inputs onto the SAME device as the model's embedding weights
target_device = self.model.get_input_embeddings().weight.device
inputs = {k: v.to(target_device) for k, v in inputs.items()}
with torch.no_grad():
outputs = self.model.generate(
**inputs,
max_new_tokens=1024,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id,
)
text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# If the full prompt is included, return only the last segment
if "[/INST]" in text:
return text.split("[/INST]")[-1].strip()
return text.strip()
def query(self, question: str):
if self.vector_store is None:
return "Please upload a resume first.", ""
if not question.strip():
return "Please enter a question.", ""
docs = self.vector_store.similarity_search(question, k=3)
context = "\n\n".join([d.page_content for d in docs])
answer = self.generate_answer(question, context)
if torch.cuda.is_available():
torch.cuda.empty_cache()
return answer, context
print("Initializing Resume RAG System...")
rag_system = ResumeRAG()
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
gr.Markdown(
"""
# πŸ“„ Resume RAG Q&A System
Powered by Mistral-7B + FAISS vector search
Upload your resume and ask questions about experience, skills, education, and more.
"""
)
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸ“€ Upload Resume")
file_input = gr.File(
label="Upload PDF or DOCX",
file_types=[".pdf", ".docx"]
)
upload_btn = gr.Button("Process Resume", variant="primary", size="lg")
upload_status = gr.Textbox(label="Status", interactive=False)
gr.Markdown(
"""
---
**Example Questions:**
- What programming languages does the candidate know?
- Summarize the work experience
- What is the education background?
- List all technical skills
"""
)
with gr.Column(scale=2):
gr.Markdown("### πŸ’¬ Ask Questions")
question_input = gr.Textbox(
label="Your Question",
placeholder="e.g., What are the candidate's key skills?",
lines=2
)
submit_btn = gr.Button("Get Answer", variant="primary", size="lg")
answer_output = gr.Textbox(
label="Answer",
lines=8,
interactive=False
)
with gr.Accordion("πŸ“š Retrieved Context", open=False):
context_output = gr.Textbox(
label="Relevant Resume Sections",
lines=6,
interactive=False
)
# GPU-decorated handler for ZeroGPU/Spaces GPU
@spaces.GPU
def query_gpu(q):
return rag_system.query(q)
upload_btn.click(
fn=rag_system.process_resume,
inputs=[file_input],
outputs=[upload_status]
)
submit_btn.click(
fn=query_gpu,
inputs=[question_input],
outputs=[answer_output, context_output]
)
question_input.submit(
fn=query_gpu,
inputs=[question_input],
outputs=[answer_output, context_output]
)
if __name__ == "__main__":
demo.launch(share=True)