telcom commited on
Commit
7bc81e6
·
verified ·
1 Parent(s): 8627025

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +215 -350
app.py CHANGED
@@ -1,358 +1,223 @@
1
- import os
2
- import re
3
  import gradio as gr
4
- import numpy as np
5
- import faiss
6
- import requests
7
-
8
- from pypdf import PdfReader
 
 
9
  from docx import Document
10
- from fastembed import TextEmbedding
11
- from llama_cpp import Llama
12
-
13
-
14
- # -------------------------
15
- # Config
16
- # -------------------------
17
- EMBED_MODEL = os.getenv("EMBED_MODEL_ID", "BAAI/bge-small-en-v1.5")
18
-
19
- TOP_K = int(os.getenv("TOP_K", "5"))
20
- CHUNK_CHARS = int(os.getenv("CHUNK_CHARS", "1400"))
21
- CHUNK_OVERLAP = int(os.getenv("CHUNK_OVERLAP", "250"))
22
-
23
- MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "260"))
24
- TEMPERATURE = float(os.getenv("TEMPERATURE", "0.2"))
25
-
26
- # GGUF model path and optional public download URL
27
- MODEL_PATH = os.getenv("GGUF_MODEL_PATH", "models/model.gguf")
28
- MODEL_URL = os.getenv("GGUF_MODEL_URL", "") # optional, public direct link to a .gguf
29
-
30
- # GPU layers: -1 means "as many as possible"
31
- N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "-1"))
32
- N_CTX = int(os.getenv("N_CTX", "4096"))
33
-
34
-
35
- # -------------------------
36
- # Helpers: file -> text
37
- # -------------------------
38
- def _clean_text(s: str) -> str:
39
- s = s.replace("\x00", " ")
40
- s = re.sub(r"[ \t]+", " ", s)
41
- s = re.sub(r"\n{3,}", "\n\n", s)
42
- return s.strip()
43
-
44
-
45
- def extract_text_from_pdf(path: str) -> str:
46
- reader = PdfReader(path)
47
- parts = []
48
- for page in reader.pages:
49
- txt = page.extract_text() or ""
50
- if txt.strip():
51
- parts.append(txt)
52
- return _clean_text("\n\n".join(parts))
53
-
54
-
55
- def extract_text_from_docx(path: str) -> str:
56
- doc = Document(path)
57
- parts = []
58
- for p in doc.paragraphs:
59
- t = (p.text or "").strip()
60
- if t:
61
- parts.append(t)
62
- return _clean_text("\n".join(parts))
63
-
64
-
65
- def extract_resume_text(file_path: str) -> str:
66
- lower = file_path.lower()
67
- if lower.endswith(".pdf"):
68
- return extract_text_from_pdf(file_path)
69
- if lower.endswith(".docx"):
70
- return extract_text_from_docx(file_path)
71
- raise ValueError("Unsupported file type. Please upload a PDF or DOCX.")
72
-
73
-
74
- # -------------------------
75
- # Chunking
76
- # -------------------------
77
- def chunk_text(text: str, chunk_chars: int = CHUNK_CHARS, overlap: int = CHUNK_OVERLAP):
78
- text = text.strip()
79
- if not text:
80
- return []
81
- chunks = []
82
- start = 0
83
- n = len(text)
84
- while start < n:
85
- end = min(start + chunk_chars, n)
86
- chunk = text[start:end].strip()
87
- if chunk:
88
- chunks.append(chunk)
89
- if end == n:
90
- break
91
- start = max(0, end - overlap)
92
- return chunks
93
-
94
-
95
- # -------------------------
96
- # Vector store (FAISS)
97
- # -------------------------
98
- def normalize(v: np.ndarray) -> np.ndarray:
99
- norm = np.linalg.norm(v, axis=1, keepdims=True) + 1e-12
100
- return v / norm
101
-
102
-
103
- def build_faiss_index(embeddings: np.ndarray):
104
- embeddings = normalize(embeddings.astype("float32"))
105
- dim = embeddings.shape[1]
106
- index = faiss.IndexFlatIP(dim)
107
- index.add(embeddings)
108
- return index
109
-
110
-
111
- def retrieve(query: str, embedder: TextEmbedding, index, chunks, top_k: int = TOP_K):
112
- q_vec = list(embedder.embed([query]))[0]
113
- q_emb = np.array(q_vec, dtype="float32")[None, :]
114
- q_emb = normalize(q_emb)
115
-
116
- scores, ids = index.search(q_emb, top_k)
117
- hits = []
118
- for score, idx in zip(scores[0], ids[0]):
119
- if idx == -1:
120
- continue
121
- hits.append({"score": float(score), "chunk": chunks[int(idx)], "id": int(idx)})
122
- return hits
123
-
124
-
125
- def format_sources(hits):
126
- lines = []
127
- for i, h in enumerate(hits, start=1):
128
- snippet = re.sub(r"\s+", " ", h["chunk"].strip())
129
- if len(snippet) > 220:
130
- snippet = snippet[:220] + "..."
131
- lines.append(f"- Source {i} (score {h['score']:.3f}): {snippet}")
132
- return "\n".join(lines)
133
-
134
-
135
- # -------------------------
136
- # Local LLM (llama.cpp)
137
- # -------------------------
138
- _LLM = None
139
-
140
- def ensure_model_file():
141
- os.makedirs(os.path.dirname(MODEL_PATH) or ".", exist_ok=True)
142
- if os.path.exists(MODEL_PATH) and os.path.getsize(MODEL_PATH) > 10_000_000:
143
- return
144
 
145
- if not MODEL_URL:
146
- raise RuntimeError(
147
- "GGUF model file not found. Set GGUF_MODEL_PATH to an existing .gguf in the repo, "
148
- "or provide GGUF_MODEL_URL (public direct link to a .gguf)."
 
 
 
 
 
149
  )
150
-
151
- # Download the model once
152
- with requests.get(MODEL_URL, stream=True, timeout=120) as r:
153
- r.raise_for_status()
154
- with open(MODEL_PATH, "wb") as f:
155
- for chunk in r.iter_content(chunk_size=1024 * 1024):
156
- if chunk:
157
- f.write(chunk)
158
-
159
- def get_llm():
160
- global _LLM
161
- if _LLM is not None:
162
- return _LLM
163
-
164
- ensure_model_file()
165
-
166
- # If CUDA build is present, n_gpu_layers=-1 will push as much as possible to GPU
167
- _LLM = Llama(
168
- model_path=MODEL_PATH,
169
- n_ctx=N_CTX,
170
- n_threads=max(2, os.cpu_count() or 4),
171
- n_gpu_layers=N_GPU_LAYERS,
172
- verbose=False,
173
- )
174
- return _LLM
175
-
176
-
177
- def answer_with_llm(question: str, hits: list):
178
- llm = get_llm()
179
-
180
- sources_text = "\n\n".join([f"[Source {i+1}]\n{h['chunk']}" for i, h in enumerate(hits)])
181
-
182
- system = (
183
- "You are a resume assistant.\n"
184
- "Answer ONLY using the provided SOURCES.\n"
185
- "If the answer is not explicitly supported by the SOURCES, say: "
186
- "'I cannot find that in the uploaded resume.'\n"
187
- "Do not invent roles, dates, skills, employers, or achievements.\n"
188
- "Keep it concise and professional.\n"
189
- )
190
-
191
- prompt = (
192
- f"{system}\n\n"
193
- f"SOURCES:\n{sources_text}\n\n"
194
- f"QUESTION:\n{question}\n\n"
195
- f"ANSWER:"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  )
197
-
198
- out = llm(
199
- prompt,
200
- max_tokens=MAX_NEW_TOKENS,
201
- temperature=TEMPERATURE,
202
- top_p=0.9,
203
- repeat_penalty=1.05,
204
- stop=["\n\nQUESTION:", "\n\nSOURCES:"],
205
  )
206
-
207
- text = out["choices"][0]["text"].strip()
208
- return text
209
-
210
-
211
- # -------------------------
212
- # App state
213
- # -------------------------
214
- class AppState:
215
- def __init__(self):
216
- self.embedder = None
217
- self.index = None
218
- self.chunks = []
219
- self.ready = False
220
-
221
- STATE = AppState()
222
-
223
-
224
- # -------------------------
225
- # UI helpers
226
- # -------------------------
227
- def status_badge(is_ready: bool, msg: str):
228
- color = "#22c55e" if is_ready else "#ef4444"
229
- label = "READY" if is_ready else "NOT READY"
230
- return f"""
231
- <div style="display:flex;align-items:center;gap:10px;padding:10px 12px;border-radius:12px;
232
- border:1px solid rgba(255,255,255,0.14);background:rgba(0,0,0,0.18);">
233
- <div style="width:12px;height:12px;border-radius:999px;background:{color};"></div>
234
- <div style="font-weight:900;letter-spacing:0.6px;">{label}</div>
235
- <div style="opacity:0.92;">{msg}</div>
236
- </div>
237
- """
238
-
239
-
240
- CSS = """
241
- :root { color-scheme: dark; }
242
- .gradio-container { background: #070b14 !important; color: #f8fafc !important; }
243
- .gr-box, .block, .wrap, .panel { background: #0b1220 !important; border: 1px solid rgba(255,255,255,0.14) !important; }
244
- label, .md, .prose { color: #f8fafc !important; }
245
- textarea, input[type="text"] { background: #050814 !important; color: #f8fafc !important; border: 1px solid rgba(255,255,255,0.18) !important; }
246
- button.primary { background: #60a5fa !important; color: #061018 !important; font-weight: 900 !important; border: none !important; }
247
- button.secondary { background: transparent !important; color: #f8fafc !important; border: 1px solid rgba(255,255,255,0.18) !important; }
248
- footer { display:none !important; }
249
- """
250
-
251
-
252
- # -------------------------
253
- # Callbacks (messages format)
254
- # -------------------------
255
- def on_build(file_obj):
256
- STATE.embedder = None
257
- STATE.index = None
258
- STATE.chunks = []
259
- STATE.ready = False
260
-
261
- if file_obj is None:
262
- return status_badge(False, "Upload a PDF or DOCX to begin."), gr.update(interactive=False), []
263
-
264
- try:
265
- text = extract_resume_text(file_obj.name)
266
- except Exception:
267
- return status_badge(False, "Could not read this file. Try a DOCX or a text-based PDF."), gr.update(interactive=False), []
268
-
269
- if not text.strip():
270
- return status_badge(False, "No extractable text found (scanned PDF). Upload a DOCX instead."), gr.update(interactive=False), []
271
-
272
- chunks = chunk_text(text)
273
- if not chunks:
274
- return status_badge(False, "Could not chunk the resume. Try DOCX."), gr.update(interactive=False), []
275
-
276
- try:
277
- embedder = TextEmbedding(model_name=EMBED_MODEL)
278
- vecs = np.array(list(embedder.embed(chunks)), dtype="float32")
279
- index = build_faiss_index(vecs)
280
- except Exception:
281
- return status_badge(False, "Embedding/indexing failed. Try again or use DOCX."), gr.update(interactive=False), []
282
-
283
- STATE.embedder = embedder
284
- STATE.index = index
285
- STATE.chunks = chunks
286
- STATE.ready = True
287
-
288
- # Warm up LLM lazily later, do not block UI
289
- return status_badge(True, "Resume loaded. Ask your question below."), gr.update(interactive=True), []
290
-
291
-
292
- def on_ask(question, history):
293
- history = history or []
294
- q = (question or "").strip()
295
- if not q:
296
- return history
297
-
298
- if not STATE.ready:
299
- history.append({"role": "user", "content": q})
300
- history.append({"role": "assistant", "content": "Please upload your resume first (PDF or DOCX)."})
301
- return history
302
-
303
- hits = retrieve(q, STATE.embedder, STATE.index, STATE.chunks, top_k=TOP_K)
304
-
305
- try:
306
- answer = answer_with_llm(q, hits)
307
- except Exception as e:
308
- answer = f"Local model error: {e}"
309
-
310
- final = f"{answer}\n\nSources:\n{format_sources(hits)}"
311
-
312
- history.append({"role": "user", "content": q})
313
- history.append({"role": "assistant", "content": final})
314
- return history
315
-
316
-
317
- def on_clear():
318
- return []
319
-
320
-
321
- # -------------------------
322
- # UI
323
- # -------------------------
324
- with gr.Blocks(title="ResumeQA") as demo:
325
- gr.Markdown(
326
- """
327
- <div style="margin-bottom:10px;">
328
- <div style="font-size:28px;font-weight:900;">ResumeQA</div>
329
- <div style="opacity:0.82;margin-top:2px;">
330
- Upload a resume, then ask questions. Everything runs locally.
331
- </div>
332
- </div>
333
- """
334
  )
335
 
336
- status_html = gr.HTML(status_badge(False, "Upload a PDF or DOCX to begin."))
337
-
338
- uploader = gr.File(label="Upload resume (PDF or DOCX)", file_types=[".pdf", ".docx"], height=90)
339
- build_btn = gr.Button("Build resume index", variant="primary")
340
-
341
- chatbot = gr.Chatbot(label="Chat", height=430)
342
-
343
- with gr.Row():
344
- question = gr.Textbox(
345
- label="Your question",
346
- placeholder="Example: What roles have I held, and what impact did I deliver?",
347
- interactive=False
348
- )
349
- ask_btn = gr.Button("Ask", variant="primary")
350
-
351
- clear_btn = gr.Button("Clear chat", variant="secondary")
352
-
353
- build_btn.click(fn=on_build, inputs=[uploader], outputs=[status_html, question, chatbot])
354
- ask_btn.click(fn=on_ask, inputs=[question, chatbot], outputs=[chatbot]).then(lambda: "", None, question)
355
- question.submit(fn=on_ask, inputs=[question, chatbot], outputs=[chatbot]).then(lambda: "", None, question)
356
- clear_btn.click(fn=on_clear, inputs=None, outputs=[chatbot])
357
-
358
- demo.queue(default_concurrency_limit=4).launch(css=CSS, ssr_mode=False)
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
+ from sentence_transformers import SentenceTransformer
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.embeddings import HuggingFaceEmbeddings
8
+ import PyPDF2
9
  from docx import Document
10
+ import numpy as np
11
+ from typing import List, Tuple
12
+ import gc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ class ResumeRAG:
15
+ def __init__(self):
16
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
17
+ print(f"Using device: {self.device}")
18
+
19
+ # Initialize embedding model (lightweight)
20
+ self.embeddings = HuggingFaceEmbeddings(
21
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
22
+ model_kwargs={'device': self.device}
23
  )
24
+
25
+ # Initialize LLM with 4-bit quantization for GPU efficiency
26
+ quantization_config = BitsAndBytesConfig(
27
+ load_in_4bit=True,
28
+ bnb_4bit_compute_dtype=torch.float16,
29
+ bnb_4bit_use_double_quant=True,
30
+ bnb_4bit_quant_type="nf4"
31
+ )
32
+
33
+ model_name = "mistralai/Mistral-7B-Instruct-v0.2"
34
+
35
+ print("Loading model...")
36
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
37
+ self.model = AutoModelForCausalLM.from_pretrained(
38
+ model_name,
39
+ quantization_config=quantization_config,
40
+ device_map="auto",
41
+ trust_remote_code=True
42
+ )
43
+
44
+ self.vector_store = None
45
+ self.text_splitter = RecursiveCharacterTextSplitter(
46
+ chunk_size=500,
47
+ chunk_overlap=50
48
+ )
49
+
50
+ def extract_text_from_pdf(self, file_path: str) -> str:
51
+ """Extract text from PDF file"""
52
+ try:
53
+ with open(file_path, 'rb') as file:
54
+ pdf_reader = PyPDF2.PdfReader(file)
55
+ text = ""
56
+ for page in pdf_reader.pages:
57
+ text += page.extract_text()
58
+ return text
59
+ except Exception as e:
60
+ return f"Error reading PDF: {str(e)}"
61
+
62
+ def extract_text_from_docx(self, file_path: str) -> str:
63
+ """Extract text from DOCX file"""
64
+ try:
65
+ doc = Document(file_path)
66
+ text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
67
+ return text
68
+ except Exception as e:
69
+ return f"Error reading DOCX: {str(e)}"
70
+
71
+ def process_resume(self, file) -> str:
72
+ """Process uploaded resume and create vector store"""
73
+ if file is None:
74
+ return "Please upload a resume file."
75
+
76
+ # Extract text based on file type
77
+ file_path = file.name
78
+ if file_path.endswith('.pdf'):
79
+ text = self.extract_text_from_pdf(file_path)
80
+ elif file_path.endswith('.docx'):
81
+ text = self.extract_text_from_docx(file_path)
82
+ else:
83
+ return "Unsupported file format. Please upload PDF or DOCX."
84
+
85
+ if text.startswith("Error"):
86
+ return text
87
+
88
+ # Split text into chunks
89
+ chunks = self.text_splitter.split_text(text)
90
+
91
+ if not chunks:
92
+ return "No text could be extracted from the resume."
93
+
94
+ # Create vector store
95
+ self.vector_store = FAISS.from_texts(chunks, self.embeddings)
96
+
97
+ return f"✅ Resume processed successfully! Extracted {len(chunks)} text chunks. You can now ask questions."
98
+
99
+ def generate_answer(self, question: str, context: str) -> str:
100
+ """Generate answer using LLM"""
101
+ prompt = f"""[INST] You are a helpful assistant analyzing a resume. Use the following context to answer the question accurately and concisely.
102
+
103
+ Context from resume:
104
+ {context}
105
+
106
+ Question: {question}
107
+
108
+ Provide a clear, specific answer based only on the information in the context. If the information is not in the context, say so. [/INST]"""
109
+
110
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
111
+
112
+ with torch.no_grad():
113
+ outputs = self.model.generate(
114
+ **inputs,
115
+ max_new_tokens=256,
116
+ temperature=0.7,
117
+ top_p=0.9,
118
+ do_sample=True,
119
+ pad_token_id=self.tokenizer.eos_token_id
120
+ )
121
+
122
+ answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
123
+ # Extract only the assistant's response
124
+ answer = answer.split("[/INST]")[-1].strip()
125
+
126
+ return answer
127
+
128
+ def query(self, question: str) -> Tuple[str, str]:
129
+ """Query the RAG system"""
130
+ if self.vector_store is None:
131
+ return "Please upload a resume first.", ""
132
+
133
+ if not question.strip():
134
+ return "Please enter a question.", ""
135
+
136
+ # Retrieve relevant chunks
137
+ docs = self.vector_store.similarity_search(question, k=3)
138
+ context = "\n\n".join([doc.page_content for doc in docs])
139
+
140
+ # Generate answer
141
+ answer = self.generate_answer(question, context)
142
+
143
+ # Clear cache to manage GPU memory
144
+ if self.device == "cuda":
145
+ torch.cuda.empty_cache()
146
+
147
+ return answer, context
148
+
149
+ # Initialize RAG system
150
+ print("Initializing Resume RAG System...")
151
+ rag_system = ResumeRAG()
152
+
153
+ # Create Gradio interface
154
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
155
+ gr.Markdown("""
156
+ # 📄 Resume RAG Q&A System
157
+ ### Powered by Mistral-7B + FAISS Vector Search
158
+
159
+ Upload your resume and ask questions about experience, skills, education, and more!
160
+ """)
161
+
162
+ with gr.Row():
163
+ with gr.Column(scale=1):
164
+ gr.Markdown("### 📤 Upload Resume")
165
+ file_input = gr.File(
166
+ label="Upload PDF or DOCX",
167
+ file_types=[".pdf", ".docx"]
168
+ )
169
+ upload_btn = gr.Button("Process Resume", variant="primary", size="lg")
170
+ upload_status = gr.Textbox(label="Status", interactive=False)
171
+
172
+ gr.Markdown("""
173
+ ---
174
+ **Example Questions:**
175
+ - What programming languages does the candidate know?
176
+ - Summarize the work experience
177
+ - What is the candidate's education background?
178
+ - List all technical skills
179
+ """)
180
+
181
+ with gr.Column(scale=2):
182
+ gr.Markdown("### 💬 Ask Questions")
183
+ question_input = gr.Textbox(
184
+ label="Your Question",
185
+ placeholder="e.g., What are the candidate's key skills?",
186
+ lines=2
187
+ )
188
+ submit_btn = gr.Button("Get Answer", variant="primary", size="lg")
189
+
190
+ answer_output = gr.Textbox(
191
+ label="Answer",
192
+ lines=8,
193
+ interactive=False
194
+ )
195
+
196
+ with gr.Accordion("📚 Retrieved Context", open=False):
197
+ context_output = gr.Textbox(
198
+ label="Relevant Resume Sections",
199
+ lines=6,
200
+ interactive=False
201
+ )
202
+
203
+ # Event handlers
204
+ upload_btn.click(
205
+ fn=rag_system.process_resume,
206
+ inputs=[file_input],
207
+ outputs=[upload_status]
208
  )
209
+
210
+ submit_btn.click(
211
+ fn=rag_system.query,
212
+ inputs=[question_input],
213
+ outputs=[answer_output, context_output]
 
 
 
214
  )
215
+
216
+ question_input.submit(
217
+ fn=rag_system.query,
218
+ inputs=[question_input],
219
+ outputs=[answer_output, context_output]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  )
221
 
222
+ if __name__ == "__main__":
223
+ demo.launch(share=False)