Spaces:

telcom
/

ResumeQA

Running on Zero

App Files Files Community

telcom commited on 19 days ago

Commit

bebc177

verified ·

1 Parent(s): 9d229d6

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -151

app.py CHANGED Viewed

@@ -1,114 +1,88 @@
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-from sentence_transformers import SentenceTransformer
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 import PyPDF2
 from docx import Document
-import numpy as np
-from typing import List, Tuple
-import gc
 class ResumeRAG:
     def __init__(self):
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"Using device: {self.device}")
-        # Initialize embedding model (lightweight)
         self.embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
-            model_kwargs={'device': self.device}
         )
-        # Initialize LLM with 4-bit quantization for GPU efficiency
         quantization_config = BitsAndBytesConfig(
             load_in_4bit=True,
             bnb_4bit_compute_dtype=torch.float16,
             bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4"
         )
-        model_name = "mistralai/Mistral-7B-Instruct-v0.2"
         print("Loading model...")
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
         self.model = AutoModelForCausalLM.from_pretrained(
             model_name,
             quantization_config=quantization_config,
             device_map="auto",
-            trust_remote_code=True
         )
         self.vector_store = None
-        self.text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=500,
-            chunk_overlap=50
-        )
     def extract_text_from_pdf(self, file_path: str) -> str:
-        """Extract text from PDF file"""
-        try:
-            with open(file_path, 'rb') as file:
-                pdf_reader = PyPDF2.PdfReader(file)
-                text = ""
-                for page in pdf_reader.pages:
-                    text += page.extract_text()
-            return text
-        except Exception as e:
-            return f"Error reading PDF: {str(e)}"
     def extract_text_from_docx(self, file_path: str) -> str:
-        """Extract text from DOCX file"""
-        try:
-            doc = Document(file_path)
-            text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
-            return text
-        except Exception as e:
-            return f"Error reading DOCX: {str(e)}"
     def process_resume(self, file) -> str:
-        """Process uploaded resume and create vector store"""
         if file is None:
             return "Please upload a resume file."
-        # Extract text based on file type
         file_path = file.name
-        if file_path.endswith('.pdf'):
             text = self.extract_text_from_pdf(file_path)
-        elif file_path.endswith('.docx'):
             text = self.extract_text_from_docx(file_path)
         else:
             return "Unsupported file format. Please upload PDF or DOCX."
-        if text.startswith("Error"):
-            return text
-        # Split text into chunks
-        chunks = self.text_splitter.split_text(text)
-        if not chunks:
             return "No text could be extracted from the resume."
-        # Create vector store
         self.vector_store = FAISS.from_texts(chunks, self.embeddings)
-        return f"✅ Resume processed successfully! Extracted {len(chunks)} text chunks. You can now ask questions."
     def generate_answer(self, question: str, context: str) -> str:
-        """Generate answer using LLM"""
-        prompt = f"""[INST] You are a helpful assistant analyzing a resume. Use the following context to answer the question accurately and concisely.
-Context from resume:
 {context}
 Question: {question}
-Provide a clear, specific answer based only on the information in the context. If the information is not in the context, say so. [/INST]"""
-        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
@@ -116,108 +90,51 @@ Provide a clear, specific answer based only on the information in the context. I
                 temperature=0.7,
                 top_p=0.9,
                 do_sample=True,
-                pad_token_id=self.tokenizer.eos_token_id
             )
-        answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract only the assistant's response
-        answer = answer.split("[/INST]")[-1].strip()
-        return answer
-    def query(self, question: str) -> Tuple[str, str]:
-        """Query the RAG system"""
         if self.vector_store is None:
             return "Please upload a resume first.", ""
         if not question.strip():
             return "Please enter a question.", ""
-        # Retrieve relevant chunks
         docs = self.vector_store.similarity_search(question, k=3)
-        context = "\n\n".join([doc.page_content for doc in docs])
-        # Generate answer
         answer = self.generate_answer(question, context)
-        # Clear cache to manage GPU memory
-        if self.device == "cuda":
-            torch.cuda.empty_cache()
         return answer, context
-# Initialize RAG system
-print("Initializing Resume RAG System...")
 rag_system = ResumeRAG()
-# Create Gradio interface
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
-    gr.Markdown("""
-    # 📄 Resume RAG Q&A System
-    ### Powered by Mistral-7B + FAISS Vector Search
-    Upload your resume and ask questions about experience, skills, education, and more!
-    """)
     with gr.Row():
         with gr.Column(scale=1):
-            gr.Markdown("### 📤 Upload Resume")
-            file_input = gr.File(
-                label="Upload PDF or DOCX",
-                file_types=[".pdf", ".docx"]
-            )
-            upload_btn = gr.Button("Process Resume", variant="primary", size="lg")
             upload_status = gr.Textbox(label="Status", interactive=False)
-            gr.Markdown("""
-            ---
-            **Example Questions:**
-            - What programming languages does the candidate know?
-            - Summarize the work experience
-            - What is the candidate's education background?
-            - List all technical skills
-            """)
         with gr.Column(scale=2):
-            gr.Markdown("### 💬 Ask Questions")
-            question_input = gr.Textbox(
-                label="Your Question",
-                placeholder="e.g., What are the candidate's key skills?",
-                lines=2
-            )
-            submit_btn = gr.Button("Get Answer", variant="primary", size="lg")
-            answer_output = gr.Textbox(
-                label="Answer",
-                lines=8,
-                interactive=False
-            )
             with gr.Accordion("📚 Retrieved Context", open=False):
-                context_output = gr.Textbox(
-                    label="Relevant Resume Sections",
-                    lines=6,
-                    interactive=False
-                )
-    # Event handlers
-    upload_btn.click(
-        fn=rag_system.process_resume,
-        inputs=[file_input],
-        outputs=[upload_status]
-    )
-    submit_btn.click(
-        fn=rag_system.query,
-        inputs=[question_input],
-        outputs=[answer_output, context_output]
-    )
-    question_input.submit(
-        fn=rag_system.query,
-        inputs=[question_input],
-        outputs=[answer_output, context_output]
-    )
 if __name__ == "__main__":
-    demo.launch(share=False)

 import gradio as gr
 import torch
+import spaces
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 import PyPDF2
 from docx import Document
 class ResumeRAG:
     def __init__(self):
+        self.has_cuda = torch.cuda.is_available()
+        self.device = "cuda" if self.has_cuda else "cpu"
         print(f"Using device: {self.device}")
         self.embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
+            model_kwargs={"device": self.device},
         )
+        model_name = "mistralai/Mistral-7B-Instruct-v0.2"
+        if not self.has_cuda:
+            raise RuntimeError("GPU not available. Set Space hardware to GPU or use the CPU fallback option.")
         quantization_config = BitsAndBytesConfig(
             load_in_4bit=True,
             bnb_4bit_compute_dtype=torch.float16,
             bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",
         )
         print("Loading model...")
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
         self.model = AutoModelForCausalLM.from_pretrained(
             model_name,
             quantization_config=quantization_config,
             device_map="auto",
+            trust_remote_code=True,
         )
         self.vector_store = None
+        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
     def extract_text_from_pdf(self, file_path: str) -> str:
+        with open(file_path, "rb") as f:
+            reader = PyPDF2.PdfReader(f)
+            return "".join([(p.extract_text() or "") for p in reader.pages])
     def extract_text_from_docx(self, file_path: str) -> str:
+        doc = Document(file_path)
+        return "\n".join([p.text for p in doc.paragraphs])
     def process_resume(self, file) -> str:
         if file is None:
             return "Please upload a resume file."
         file_path = file.name
+        if file_path.endswith(".pdf"):
             text = self.extract_text_from_pdf(file_path)
+        elif file_path.endswith(".docx"):
             text = self.extract_text_from_docx(file_path)
         else:
             return "Unsupported file format. Please upload PDF or DOCX."
+        if not text.strip():
             return "No text could be extracted from the resume."
+        chunks = self.text_splitter.split_text(text)
         self.vector_store = FAISS.from_texts(chunks, self.embeddings)
+        return f"✅ Resume processed successfully! Extracted {len(chunks)} text chunks."
     def generate_answer(self, question: str, context: str) -> str:
+        prompt = f"""[INST] You are a helpful assistant analyzing a resume.
+Context:
 {context}
 Question: {question}
+Answer only from the context. If missing, say it is not in the resume. [/INST]"""
+        # IMPORTANT: do NOT push inputs to self.device when device_map="auto"
+        inputs = self.tokenizer(prompt, return_tensors="pt")
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
                 temperature=0.7,
                 top_p=0.9,
                 do_sample=True,
+                pad_token_id=self.tokenizer.eos_token_id,
             )
+        text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return text.split("[/INST]")[-1].strip()
+    def query(self, question: str):
         if self.vector_store is None:
             return "Please upload a resume first.", ""
         if not question.strip():
             return "Please enter a question.", ""
         docs = self.vector_store.similarity_search(question, k=3)
+        context = "\n\n".join([d.page_content for d in docs])
         answer = self.generate_answer(question, context)
+        torch.cuda.empty_cache()
         return answer, context
 rag_system = ResumeRAG()
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
+    gr.Markdown("# 📄 Resume RAG Q&A System")
     with gr.Row():
         with gr.Column(scale=1):
+            file_input = gr.File(label="Upload PDF or DOCX", file_types=[".pdf", ".docx"])
+            upload_btn = gr.Button("Process Resume", variant="primary")
             upload_status = gr.Textbox(label="Status", interactive=False)
         with gr.Column(scale=2):
+            question_input = gr.Textbox(label="Your Question", lines=2)
+            submit_btn = gr.Button("Get Answer", variant="primary")
+            answer_output = gr.Textbox(label="Answer", lines=8, interactive=False)
             with gr.Accordion("📚 Retrieved Context", open=False):
+                context_output = gr.Textbox(label="Relevant Resume Sections", lines=6, interactive=False)
+    # Wrap the callback so Spaces sees a GPU-decorated function
+    @spaces.GPU
+    def query_gpu(q):
+        return rag_system.query(q)
+    upload_btn.click(rag_system.process_resume, inputs=[file_input], outputs=[upload_status])
+    submit_btn.click(query_gpu, inputs=[question_input], outputs=[answer_output, context_output])
+    question_input.submit(query_gpu, inputs=[question_input], outputs=[answer_output, context_output])
 if __name__ == "__main__":
+    demo.launch()