Spaces:

telcom
/

ResumeQA

Sleeping

App Files Files Community

telcom commited on Jan 19

Commit

7bc81e6

verified ·

1 Parent(s): 8627025

Update app.py

Browse files

Files changed (1) hide show

app.py +215 -350

app.py CHANGED Viewed

@@ -1,358 +1,223 @@
-import os
-import re
 import gradio as gr
-import numpy as np
-import faiss
-import requests
-from pypdf import PdfReader
 from docx import Document
-from fastembed import TextEmbedding
-from llama_cpp import Llama
-# -------------------------
-# Config
-# -------------------------
-EMBED_MODEL = os.getenv("EMBED_MODEL_ID", "BAAI/bge-small-en-v1.5")
-TOP_K = int(os.getenv("TOP_K", "5"))
-CHUNK_CHARS = int(os.getenv("CHUNK_CHARS", "1400"))
-CHUNK_OVERLAP = int(os.getenv("CHUNK_OVERLAP", "250"))
-MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "260"))
-TEMPERATURE = float(os.getenv("TEMPERATURE", "0.2"))
-# GGUF model path and optional public download URL
-MODEL_PATH = os.getenv("GGUF_MODEL_PATH", "models/model.gguf")
-MODEL_URL = os.getenv("GGUF_MODEL_URL", "")  # optional, public direct link to a .gguf
-# GPU layers: -1 means "as many as possible"
-N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "-1"))
-N_CTX = int(os.getenv("N_CTX", "4096"))
-# -------------------------
-# Helpers: file -> text
-# -------------------------
-def _clean_text(s: str) -> str:
-    s = s.replace("\x00", " ")
-    s = re.sub(r"[ \t]+", " ", s)
-    s = re.sub(r"\n{3,}", "\n\n", s)
-    return s.strip()
-def extract_text_from_pdf(path: str) -> str:
-    reader = PdfReader(path)
-    parts = []
-    for page in reader.pages:
-        txt = page.extract_text() or ""
-        if txt.strip():
-            parts.append(txt)
-    return _clean_text("\n\n".join(parts))
-def extract_text_from_docx(path: str) -> str:
-    doc = Document(path)
-    parts = []
-    for p in doc.paragraphs:
-        t = (p.text or "").strip()
-        if t:
-            parts.append(t)
-    return _clean_text("\n".join(parts))
-def extract_resume_text(file_path: str) -> str:
-    lower = file_path.lower()
-    if lower.endswith(".pdf"):
-        return extract_text_from_pdf(file_path)
-    if lower.endswith(".docx"):
-        return extract_text_from_docx(file_path)
-    raise ValueError("Unsupported file type. Please upload a PDF or DOCX.")
-# -------------------------
-# Chunking
-# -------------------------
-def chunk_text(text: str, chunk_chars: int = CHUNK_CHARS, overlap: int = CHUNK_OVERLAP):
-    text = text.strip()
-    if not text:
-        return []
-    chunks = []
-    start = 0
-    n = len(text)
-    while start < n:
-        end = min(start + chunk_chars, n)
-        chunk = text[start:end].strip()
-        if chunk:
-            chunks.append(chunk)
-        if end == n:
-            break
-        start = max(0, end - overlap)
-    return chunks
-# -------------------------
-# Vector store (FAISS)
-# -------------------------
-def normalize(v: np.ndarray) -> np.ndarray:
-    norm = np.linalg.norm(v, axis=1, keepdims=True) + 1e-12
-    return v / norm
-def build_faiss_index(embeddings: np.ndarray):
-    embeddings = normalize(embeddings.astype("float32"))
-    dim = embeddings.shape[1]
-    index = faiss.IndexFlatIP(dim)
-    index.add(embeddings)
-    return index
-def retrieve(query: str, embedder: TextEmbedding, index, chunks, top_k: int = TOP_K):
-    q_vec = list(embedder.embed([query]))[0]
-    q_emb = np.array(q_vec, dtype="float32")[None, :]
-    q_emb = normalize(q_emb)
-    scores, ids = index.search(q_emb, top_k)
-    hits = []
-    for score, idx in zip(scores[0], ids[0]):
-        if idx == -1:
-            continue
-        hits.append({"score": float(score), "chunk": chunks[int(idx)], "id": int(idx)})
-    return hits
-def format_sources(hits):
-    lines = []
-    for i, h in enumerate(hits, start=1):
-        snippet = re.sub(r"\s+", " ", h["chunk"].strip())
-        if len(snippet) > 220:
-            snippet = snippet[:220] + "..."
-        lines.append(f"- Source {i} (score {h['score']:.3f}): {snippet}")
-    return "\n".join(lines)
-# -------------------------
-# Local LLM (llama.cpp)
-# -------------------------
-_LLM = None
-def ensure_model_file():
-    os.makedirs(os.path.dirname(MODEL_PATH) or ".", exist_ok=True)
-    if os.path.exists(MODEL_PATH) and os.path.getsize(MODEL_PATH) > 10_000_000:
-        return
-    if not MODEL_URL:
-        raise RuntimeError(
-            "GGUF model file not found. Set GGUF_MODEL_PATH to an existing .gguf in the repo, "
-            "or provide GGUF_MODEL_URL (public direct link to a .gguf)."
         )
-    # Download the model once
-    with requests.get(MODEL_URL, stream=True, timeout=120) as r:
-        r.raise_for_status()
-        with open(MODEL_PATH, "wb") as f:
-            for chunk in r.iter_content(chunk_size=1024 * 1024):
-                if chunk:
-                    f.write(chunk)
-def get_llm():
-    global _LLM
-    if _LLM is not None:
-        return _LLM
-    ensure_model_file()
-    # If CUDA build is present, n_gpu_layers=-1 will push as much as possible to GPU
-    _LLM = Llama(
-        model_path=MODEL_PATH,
-        n_ctx=N_CTX,
-        n_threads=max(2, os.cpu_count() or 4),
-        n_gpu_layers=N_GPU_LAYERS,
-        verbose=False,
-    )
-    return _LLM
-def answer_with_llm(question: str, hits: list):
-    llm = get_llm()
-    sources_text = "\n\n".join([f"[Source {i+1}]\n{h['chunk']}" for i, h in enumerate(hits)])
-    system = (
-        "You are a resume assistant.\n"
-        "Answer ONLY using the provided SOURCES.\n"
-        "If the answer is not explicitly supported by the SOURCES, say: "
-        "'I cannot find that in the uploaded resume.'\n"
-        "Do not invent roles, dates, skills, employers, or achievements.\n"
-        "Keep it concise and professional.\n"
-    )
-    prompt = (
-        f"{system}\n\n"
-        f"SOURCES:\n{sources_text}\n\n"
-        f"QUESTION:\n{question}\n\n"
-        f"ANSWER:"
     )
-    out = llm(
-        prompt,
-        max_tokens=MAX_NEW_TOKENS,
-        temperature=TEMPERATURE,
-        top_p=0.9,
-        repeat_penalty=1.05,
-        stop=["\n\nQUESTION:", "\n\nSOURCES:"],
     )
-    text = out["choices"][0]["text"].strip()
-    return text
-# -------------------------
-# App state
-# -------------------------
-class AppState:
-    def __init__(self):
-        self.embedder = None
-        self.index = None
-        self.chunks = []
-        self.ready = False
-STATE = AppState()
-# -------------------------
-# UI helpers
-# -------------------------
-def status_badge(is_ready: bool, msg: str):
-    color = "#22c55e" if is_ready else "#ef4444"
-    label = "READY" if is_ready else "NOT READY"
-    return f"""
-    <div style="display:flex;align-items:center;gap:10px;padding:10px 12px;border-radius:12px;
-                border:1px solid rgba(255,255,255,0.14);background:rgba(0,0,0,0.18);">
-      <div style="width:12px;height:12px;border-radius:999px;background:{color};"></div>
-      <div style="font-weight:900;letter-spacing:0.6px;">{label}</div>
-      <div style="opacity:0.92;">{msg}</div>
-    </div>
-    """
-CSS = """
-:root { color-scheme: dark; }
-.gradio-container { background: #070b14 !important; color: #f8fafc !important; }
-.gr-box, .block, .wrap, .panel { background: #0b1220 !important; border: 1px solid rgba(255,255,255,0.14) !important; }
-label, .md, .prose { color: #f8fafc !important; }
-textarea, input[type="text"] { background: #050814 !important; color: #f8fafc !important; border: 1px solid rgba(255,255,255,0.18) !important; }
-button.primary { background: #60a5fa !important; color: #061018 !important; font-weight: 900 !important; border: none !important; }
-button.secondary { background: transparent !important; color: #f8fafc !important; border: 1px solid rgba(255,255,255,0.18) !important; }
-footer { display:none !important; }
-"""
-# -------------------------
-# Callbacks (messages format)
-# -------------------------
-def on_build(file_obj):
-    STATE.embedder = None
-    STATE.index = None
-    STATE.chunks = []
-    STATE.ready = False
-    if file_obj is None:
-        return status_badge(False, "Upload a PDF or DOCX to begin."), gr.update(interactive=False), []
-    try:
-        text = extract_resume_text(file_obj.name)
-    except Exception:
-        return status_badge(False, "Could not read this file. Try a DOCX or a text-based PDF."), gr.update(interactive=False), []
-    if not text.strip():
-        return status_badge(False, "No extractable text found (scanned PDF). Upload a DOCX instead."), gr.update(interactive=False), []
-    chunks = chunk_text(text)
-    if not chunks:
-        return status_badge(False, "Could not chunk the resume. Try DOCX."), gr.update(interactive=False), []
-    try:
-        embedder = TextEmbedding(model_name=EMBED_MODEL)
-        vecs = np.array(list(embedder.embed(chunks)), dtype="float32")
-        index = build_faiss_index(vecs)
-    except Exception:
-        return status_badge(False, "Embedding/indexing failed. Try again or use DOCX."), gr.update(interactive=False), []
-    STATE.embedder = embedder
-    STATE.index = index
-    STATE.chunks = chunks
-    STATE.ready = True
-    # Warm up LLM lazily later, do not block UI
-    return status_badge(True, "Resume loaded. Ask your question below."), gr.update(interactive=True), []
-def on_ask(question, history):
-    history = history or []
-    q = (question or "").strip()
-    if not q:
-        return history
-    if not STATE.ready:
-        history.append({"role": "user", "content": q})
-        history.append({"role": "assistant", "content": "Please upload your resume first (PDF or DOCX)."})
-        return history
-    hits = retrieve(q, STATE.embedder, STATE.index, STATE.chunks, top_k=TOP_K)
-    try:
-        answer = answer_with_llm(q, hits)
-    except Exception as e:
-        answer = f"Local model error: {e}"
-    final = f"{answer}\n\nSources:\n{format_sources(hits)}"
-    history.append({"role": "user", "content": q})
-    history.append({"role": "assistant", "content": final})
-    return history
-def on_clear():
-    return []
-# -------------------------
-# UI
-# -------------------------
-with gr.Blocks(title="ResumeQA") as demo:
-    gr.Markdown(
-        """
-        <div style="margin-bottom:10px;">
-          <div style="font-size:28px;font-weight:900;">ResumeQA</div>
-          <div style="opacity:0.82;margin-top:2px;">
-            Upload a resume, then ask questions. Everything runs locally.
-          </div>
-        </div>
-        """
     )
-    status_html = gr.HTML(status_badge(False, "Upload a PDF or DOCX to begin."))
-    uploader = gr.File(label="Upload resume (PDF or DOCX)", file_types=[".pdf", ".docx"], height=90)
-    build_btn = gr.Button("Build resume index", variant="primary")
-    chatbot = gr.Chatbot(label="Chat", height=430)
-    with gr.Row():
-        question = gr.Textbox(
-            label="Your question",
-            placeholder="Example: What roles have I held, and what impact did I deliver?",
-            interactive=False
-        )
-        ask_btn = gr.Button("Ask", variant="primary")
-    clear_btn = gr.Button("Clear chat", variant="secondary")
-    build_btn.click(fn=on_build, inputs=[uploader], outputs=[status_html, question, chatbot])
-    ask_btn.click(fn=on_ask, inputs=[question, chatbot], outputs=[chatbot]).then(lambda: "", None, question)
-    question.submit(fn=on_ask, inputs=[question, chatbot], outputs=[chatbot]).then(lambda: "", None, question)
-    clear_btn.click(fn=on_clear, inputs=None, outputs=[chatbot])
-demo.queue(default_concurrency_limit=4).launch(css=CSS, ssr_mode=False)

 import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+from sentence_transformers import SentenceTransformer
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import FAISS
+from langchain.embeddings import HuggingFaceEmbeddings
+import PyPDF2
 from docx import Document
+import numpy as np
+from typing import List, Tuple
+import gc
+class ResumeRAG:
+    def __init__(self):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Using device: {self.device}")
+        # Initialize embedding model (lightweight)
+        self.embeddings = HuggingFaceEmbeddings(
+            model_name="sentence-transformers/all-MiniLM-L6-v2",
+            model_kwargs={'device': self.device}
         )
+        # Initialize LLM with 4-bit quantization for GPU efficiency
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4"
+        )
+        model_name = "mistralai/Mistral-7B-Instruct-v0.2"
+        print("Loading model...")
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            quantization_config=quantization_config,
+            device_map="auto",
+            trust_remote_code=True
+        )
+        self.vector_store = None
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=500,
+            chunk_overlap=50
+        )
+    def extract_text_from_pdf(self, file_path: str) -> str:
+        """Extract text from PDF file"""
+        try:
+            with open(file_path, 'rb') as file:
+                pdf_reader = PyPDF2.PdfReader(file)
+                text = ""
+                for page in pdf_reader.pages:
+                    text += page.extract_text()
+            return text
+        except Exception as e:
+            return f"Error reading PDF: {str(e)}"
+    def extract_text_from_docx(self, file_path: str) -> str:
+        """Extract text from DOCX file"""
+        try:
+            doc = Document(file_path)
+            text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
+            return text
+        except Exception as e:
+            return f"Error reading DOCX: {str(e)}"
+    def process_resume(self, file) -> str:
+        """Process uploaded resume and create vector store"""
+        if file is None:
+            return "Please upload a resume file."
+        # Extract text based on file type
+        file_path = file.name
+        if file_path.endswith('.pdf'):
+            text = self.extract_text_from_pdf(file_path)
+        elif file_path.endswith('.docx'):
+            text = self.extract_text_from_docx(file_path)
+        else:
+            return "Unsupported file format. Please upload PDF or DOCX."
+        if text.startswith("Error"):
+            return text
+        # Split text into chunks
+        chunks = self.text_splitter.split_text(text)
+        if not chunks:
+            return "No text could be extracted from the resume."
+        # Create vector store
+        self.vector_store = FAISS.from_texts(chunks, self.embeddings)
+        return f"✅ Resume processed successfully! Extracted {len(chunks)} text chunks. You can now ask questions."
+    def generate_answer(self, question: str, context: str) -> str:
+        """Generate answer using LLM"""
+        prompt = f"""[INST] You are a helpful assistant analyzing a resume. Use the following context to answer the question accurately and concisely.
+Context from resume:
+{context}
+Question: {question}
+Provide a clear, specific answer based only on the information in the context. If the information is not in the context, say so. [/INST]"""
+        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
+        with torch.no_grad():
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=256,
+                temperature=0.7,
+                top_p=0.9,
+                do_sample=True,
+                pad_token_id=self.tokenizer.eos_token_id
+            )
+        answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract only the assistant's response
+        answer = answer.split("[/INST]")[-1].strip()
+        return answer
+    def query(self, question: str) -> Tuple[str, str]:
+        """Query the RAG system"""
+        if self.vector_store is None:
+            return "Please upload a resume first.", ""
+        if not question.strip():
+            return "Please enter a question.", ""
+        # Retrieve relevant chunks
+        docs = self.vector_store.similarity_search(question, k=3)
+        context = "\n\n".join([doc.page_content for doc in docs])
+        # Generate answer
+        answer = self.generate_answer(question, context)
+        # Clear cache to manage GPU memory
+        if self.device == "cuda":
+            torch.cuda.empty_cache()
+        return answer, context
+# Initialize RAG system
+print("Initializing Resume RAG System...")
+rag_system = ResumeRAG()
+# Create Gradio interface
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
+    gr.Markdown("""
+    # 📄 Resume RAG Q&A System
+    ### Powered by Mistral-7B + FAISS Vector Search
+    Upload your resume and ask questions about experience, skills, education, and more!
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### 📤 Upload Resume")
+            file_input = gr.File(
+                label="Upload PDF or DOCX",
+                file_types=[".pdf", ".docx"]
+            )
+            upload_btn = gr.Button("Process Resume", variant="primary", size="lg")
+            upload_status = gr.Textbox(label="Status", interactive=False)
+            gr.Markdown("""
+            ---
+            **Example Questions:**
+            - What programming languages does the candidate know?
+            - Summarize the work experience
+            - What is the candidate's education background?
+            - List all technical skills
+            """)
+        with gr.Column(scale=2):
+            gr.Markdown("### 💬 Ask Questions")
+            question_input = gr.Textbox(
+                label="Your Question",
+                placeholder="e.g., What are the candidate's key skills?",
+                lines=2
+            )
+            submit_btn = gr.Button("Get Answer", variant="primary", size="lg")
+            answer_output = gr.Textbox(
+                label="Answer",
+                lines=8,
+                interactive=False
+            )
+            with gr.Accordion("📚 Retrieved Context", open=False):
+                context_output = gr.Textbox(
+                    label="Relevant Resume Sections",
+                    lines=6,
+                    interactive=False
+                )
+    # Event handlers
+    upload_btn.click(
+        fn=rag_system.process_resume,
+        inputs=[file_input],
+        outputs=[upload_status]
     )
+    submit_btn.click(
+        fn=rag_system.query,
+        inputs=[question_input],
+        outputs=[answer_output, context_output]
     )
+    question_input.submit(
+        fn=rag_system.query,
+        inputs=[question_input],
+        outputs=[answer_output, context_output]
     )
+if __name__ == "__main__":
+    demo.launch(share=False)