Spaces:

telcom
/

ResumeQA

Sleeping

App Files Files Community

telcom commited on 28 days ago

Commit

f7b57d9

verified ·

1 Parent(s): bebc177

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -29

app.py CHANGED Viewed

@@ -1,29 +1,43 @@
 import gradio as gr
 import torch
 import spaces
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 import PyPDF2
 from docx import Document
 class ResumeRAG:
     def __init__(self):
         self.has_cuda = torch.cuda.is_available()
         self.device = "cuda" if self.has_cuda else "cpu"
         print(f"Using device: {self.device}")
         self.embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
             model_kwargs={"device": self.device},
         )
         model_name = "mistralai/Mistral-7B-Instruct-v0.2"
         if not self.has_cuda:
-            raise RuntimeError("GPU not available. Set Space hardware to GPU or use the CPU fallback option.")
         quantization_config = BitsAndBytesConfig(
             load_in_4bit=True,
             bnb_4bit_compute_dtype=torch.float16,
@@ -31,43 +45,58 @@ class ResumeRAG:
             bnb_4bit_quant_type="nf4",
         )
-        print("Loading model...")
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
         self.model = AutoModelForCausalLM.from_pretrained(
             model_name,
             quantization_config=quantization_config,
-            device_map="auto",
-            trust_remote_code=True,
         )
-        self.vector_store = None
-        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
     def extract_text_from_pdf(self, file_path: str) -> str:
-        with open(file_path, "rb") as f:
-            reader = PyPDF2.PdfReader(f)
-            return "".join([(p.extract_text() or "") for p in reader.pages])
     def extract_text_from_docx(self, file_path: str) -> str:
-        doc = Document(file_path)
-        return "\n".join([p.text for p in doc.paragraphs])
     def process_resume(self, file) -> str:
         if file is None:
             return "Please upload a resume file."
         file_path = file.name
-        if file_path.endswith(".pdf"):
             text = self.extract_text_from_pdf(file_path)
-        elif file_path.endswith(".docx"):
             text = self.extract_text_from_docx(file_path)
         else:
             return "Unsupported file format. Please upload PDF or DOCX."
         if not text.strip():
             return "No text could be extracted from the resume."
         chunks = self.text_splitter.split_text(text)
         self.vector_store = FAISS.from_texts(chunks, self.embeddings)
         return f"✅ Resume processed successfully! Extracted {len(chunks)} text chunks."
@@ -79,10 +108,14 @@ Context:
 Question: {question}
-Answer only from the context. If missing, say it is not in the resume. [/INST]"""
-        # IMPORTANT: do NOT push inputs to self.device when device_map="auto"
         inputs = self.tokenizer(prompt, return_tensors="pt")
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
@@ -94,47 +127,108 @@ Answer only from the context. If missing, say it is not in the resume. [/INST]""
             )
         text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return text.split("[/INST]")[-1].strip()
     def query(self, question: str):
         if self.vector_store is None:
             return "Please upload a resume first.", ""
         if not question.strip():
             return "Please enter a question.", ""
         docs = self.vector_store.similarity_search(question, k=3)
         context = "\n\n".join([d.page_content for d in docs])
         answer = self.generate_answer(question, context)
-        torch.cuda.empty_cache()
         return answer, context
 rag_system = ResumeRAG()
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
-    gr.Markdown("# 📄 Resume RAG Q&A System")
     with gr.Row():
         with gr.Column(scale=1):
-            file_input = gr.File(label="Upload PDF or DOCX", file_types=[".pdf", ".docx"])
-            upload_btn = gr.Button("Process Resume", variant="primary")
             upload_status = gr.Textbox(label="Status", interactive=False)
         with gr.Column(scale=2):
-            question_input = gr.Textbox(label="Your Question", lines=2)
-            submit_btn = gr.Button("Get Answer", variant="primary")
-            answer_output = gr.Textbox(label="Answer", lines=8, interactive=False)
             with gr.Accordion("📚 Retrieved Context", open=False):
-                context_output = gr.Textbox(label="Relevant Resume Sections", lines=6, interactive=False)
-    # Wrap the callback so Spaces sees a GPU-decorated function
     @spaces.GPU
     def query_gpu(q):
         return rag_system.query(q)
-    upload_btn.click(rag_system.process_resume, inputs=[file_input], outputs=[upload_status])
-    submit_btn.click(query_gpu, inputs=[question_input], outputs=[answer_output, context_output])
-    question_input.submit(query_gpu, inputs=[question_input], outputs=[answer_output, context_output])
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
 import spaces
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 import PyPDF2
 from docx import Document
 class ResumeRAG:
     def __init__(self):
         self.has_cuda = torch.cuda.is_available()
         self.device = "cuda" if self.has_cuda else "cpu"
         print(f"Using device: {self.device}")
+        # Embeddings (small + fast)
         self.embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
             model_kwargs={"device": self.device},
         )
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=500,
+            chunk_overlap=50
+        )
+        self.vector_store = None
         model_name = "mistralai/Mistral-7B-Instruct-v0.2"
         if not self.has_cuda:
+            raise RuntimeError(
+                "No CUDA GPU detected. Use a GPU Space/ZeroGPU, or switch to a smaller CPU model."
+            )
+        # 4-bit quantization for GPU efficiency
         quantization_config = BitsAndBytesConfig(
             load_in_4bit=True,
             bnb_4bit_compute_dtype=torch.float16,
             bnb_4bit_quant_type="nf4",
         )
+        print("Loading tokenizer...")
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        print("Loading model...")
         self.model = AutoModelForCausalLM.from_pretrained(
             model_name,
             quantization_config=quantization_config,
+            device_map="auto",           # important for Spaces
+            trust_remote_code=True
         )
+        # Ensure pad token exists
+        if self.tokenizer.pad_token_id is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
     def extract_text_from_pdf(self, file_path: str) -> str:
+        try:
+            with open(file_path, "rb") as f:
+                reader = PyPDF2.PdfReader(f)
+                return "".join([(p.extract_text() or "") for p in reader.pages])
+        except Exception as e:
+            return f"Error reading PDF: {e}"
     def extract_text_from_docx(self, file_path: str) -> str:
+        try:
+            doc = Document(file_path)
+            return "\n".join([p.text for p in doc.paragraphs])
+        except Exception as e:
+            return f"Error reading DOCX: {e}"
     def process_resume(self, file) -> str:
         if file is None:
             return "Please upload a resume file."
         file_path = file.name
+        if file_path.lower().endswith(".pdf"):
             text = self.extract_text_from_pdf(file_path)
+        elif file_path.lower().endswith(".docx"):
             text = self.extract_text_from_docx(file_path)
         else:
             return "Unsupported file format. Please upload PDF or DOCX."
+        if text.startswith("Error"):
+            return text
         if not text.strip():
             return "No text could be extracted from the resume."
         chunks = self.text_splitter.split_text(text)
+        if not chunks:
+            return "No text chunks could be created from the resume."
         self.vector_store = FAISS.from_texts(chunks, self.embeddings)
         return f"✅ Resume processed successfully! Extracted {len(chunks)} text chunks."
 Question: {question}
+Answer only from the context. If the answer is not in the context, say it is not in the resume. [/INST]"""
         inputs = self.tokenizer(prompt, return_tensors="pt")
+        # FIX: move inputs onto the SAME device as the model's embedding weights
+        target_device = self.model.get_input_embeddings().weight.device
+        inputs = {k: v.to(target_device) for k, v in inputs.items()}
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
             )
         text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # If the full prompt is included, return only the last segment
+        if "[/INST]" in text:
+            return text.split("[/INST]")[-1].strip()
+        return text.strip()
     def query(self, question: str):
         if self.vector_store is None:
             return "Please upload a resume first.", ""
         if not question.strip():
             return "Please enter a question.", ""
         docs = self.vector_store.similarity_search(question, k=3)
         context = "\n\n".join([d.page_content for d in docs])
         answer = self.generate_answer(question, context)
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
         return answer, context
+print("Initializing Resume RAG System...")
 rag_system = ResumeRAG()
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
+    gr.Markdown(
+        """
+# 📄 Resume RAG Q&A System
+Powered by Mistral-7B + FAISS vector search
+Upload your resume and ask questions about experience, skills, education, and more.
+"""
+    )
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Markdown("### 📤 Upload Resume")
+            file_input = gr.File(
+                label="Upload PDF or DOCX",
+                file_types=[".pdf", ".docx"]
+            )
+            upload_btn = gr.Button("Process Resume", variant="primary", size="lg")
             upload_status = gr.Textbox(label="Status", interactive=False)
+            gr.Markdown(
+                """
+---
+**Example Questions:**
+- What programming languages does the candidate know?
+- Summarize the work experience
+- What is the education background?
+- List all technical skills
+"""
+            )
         with gr.Column(scale=2):
+            gr.Markdown("### 💬 Ask Questions")
+            question_input = gr.Textbox(
+                label="Your Question",
+                placeholder="e.g., What are the candidate's key skills?",
+                lines=2
+            )
+            submit_btn = gr.Button("Get Answer", variant="primary", size="lg")
+            answer_output = gr.Textbox(
+                label="Answer",
+                lines=8,
+                interactive=False
+            )
             with gr.Accordion("📚 Retrieved Context", open=False):
+                context_output = gr.Textbox(
+                    label="Relevant Resume Sections",
+                    lines=6,
+                    interactive=False
+                )
+    # GPU-decorated handler for ZeroGPU/Spaces GPU
     @spaces.GPU
     def query_gpu(q):
         return rag_system.query(q)
+    upload_btn.click(
+        fn=rag_system.process_resume,
+        inputs=[file_input],
+        outputs=[upload_status]
+    )
+    submit_btn.click(
+        fn=query_gpu,
+        inputs=[question_input],
+        outputs=[answer_output, context_output]
+    )
+    question_input.submit(
+        fn=query_gpu,
+        inputs=[question_input],
+        outputs=[answer_output, context_output]
+    )
 if __name__ == "__main__":
+    demo.launch(share=True)