jayjay-12345 commited on
Commit
f1b302b
·
verified ·
1 Parent(s): 777df97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -14
app.py CHANGED
@@ -216,13 +216,13 @@ import gc
216
  from sentence_transformers import SentenceTransformer
217
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
218
 
219
- # 1. Load RAG Memory
220
  embed_model = SentenceTransformer('all-MiniLM-L6-v2')
221
  index = faiss.read_index("faiss_index.bin")
222
  with open("processed_chunks.json", "r") as f:
223
  chunks = json.load(f)
224
 
225
- # 2. Define Models
226
  MODELS = {
227
  "IBM Granite 3.1 2B": "ibm-granite/granite-3.1-2b-instruct",
228
  "Microsoft Phi 3.5 Mini": "microsoft/Phi-3.5-mini-instruct",
@@ -230,7 +230,7 @@ MODELS = {
230
  "SmolLM 1.7B": "HuggingFaceTB/SmolLM-1.7B-Instruct"
231
  }
232
 
233
- # --- TAB 1: RAG CHATBOT LOGIC ---
234
  def ask_specific_model(model_name, prompt):
235
  pipe = pipeline("text-generation", model=MODELS[model_name], device_map="cpu")
236
  res = pipe(prompt, max_new_tokens=60, do_sample=False)
@@ -261,20 +261,20 @@ def compare_hr_bots(question):
261
  yield results[0], results[1], results[2], results[3], source
262
 
263
 
264
- # --- TAB 2: PERPLEXITY EVALUATION LOGIC ---
265
  def calculate_perplexity(model_name):
266
  try:
267
  model_id = MODELS[model_name]
268
 
269
- # Grab a chunk of our actual HR data to test on
270
  sample_texts = [chunk['text'] for chunk in chunks[:3]]
271
  test_text = " ".join(sample_texts)
272
 
273
- # Load Model
274
  tokenizer = AutoTokenizer.from_pretrained(model_id)
275
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu")
276
 
277
- # Tokenize and compute loss
278
  inputs = tokenizer(test_text, return_tensors="pt")
279
  with torch.no_grad():
280
  outputs = model(input_ids=inputs["input_ids"], labels=inputs["input_ids"])
@@ -282,7 +282,7 @@ def calculate_perplexity(model_name):
282
 
283
  perplexity = torch.exp(loss).item()
284
 
285
- # STRICT MEMORY CLEANUP
286
  del model
287
  del tokenizer
288
  del inputs
@@ -295,10 +295,10 @@ def calculate_perplexity(model_name):
295
  return f"Error calculating perplexity: {str(e)}"
296
 
297
 
298
- # --- GRADIO UI BUILDER ---
299
  with gr.Blocks(theme=gr.themes.Soft()) as interface:
300
- gr.Markdown("# ADU HR Knowledge Assistant & Evaluation Toolkit")
301
- gr.Markdown("Enterprise RAG Prototype using strictly Open-Source LLMs.")
302
 
303
  with gr.Tabs():
304
  # TAB 1 UI
@@ -310,8 +310,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as interface:
310
  out_granite = gr.Textbox(label="IBM Granite 3.1 2B")
311
  out_phi = gr.Textbox(label="Microsoft Phi 3.5 Mini")
312
  with gr.Row():
313
- out_qwen = gr.Textbox(label="Qwen 2.5 1.5B")
314
- out_smol = gr.Textbox(label="SmolLM 1.7B")
315
  out_source = gr.Textbox(label="Source Document Used")
316
 
317
  submit_btn.click(
@@ -320,7 +320,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as interface:
320
  outputs=[out_granite, out_phi, out_qwen, out_smol, out_source]
321
  )
322
 
323
- # TAB 2 UI
324
  with gr.TabItem("📊 Perplexity Evaluator"):
325
  gr.Markdown("Select a single model to calculate its perplexity against our internal HR dataset. **Warning: Takes 30-60 seconds on CPU.**")
326
 
 
216
  from sentence_transformers import SentenceTransformer
217
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
218
 
219
+
220
  embed_model = SentenceTransformer('all-MiniLM-L6-v2')
221
  index = faiss.read_index("faiss_index.bin")
222
  with open("processed_chunks.json", "r") as f:
223
  chunks = json.load(f)
224
 
225
+
226
  MODELS = {
227
  "IBM Granite 3.1 2B": "ibm-granite/granite-3.1-2b-instruct",
228
  "Microsoft Phi 3.5 Mini": "microsoft/Phi-3.5-mini-instruct",
 
230
  "SmolLM 1.7B": "HuggingFaceTB/SmolLM-1.7B-Instruct"
231
  }
232
 
233
+
234
  def ask_specific_model(model_name, prompt):
235
  pipe = pipeline("text-generation", model=MODELS[model_name], device_map="cpu")
236
  res = pipe(prompt, max_new_tokens=60, do_sample=False)
 
261
  yield results[0], results[1], results[2], results[3], source
262
 
263
 
264
+
265
  def calculate_perplexity(model_name):
266
  try:
267
  model_id = MODELS[model_name]
268
 
269
+
270
  sample_texts = [chunk['text'] for chunk in chunks[:3]]
271
  test_text = " ".join(sample_texts)
272
 
273
+
274
  tokenizer = AutoTokenizer.from_pretrained(model_id)
275
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu")
276
 
277
+
278
  inputs = tokenizer(test_text, return_tensors="pt")
279
  with torch.no_grad():
280
  outputs = model(input_ids=inputs["input_ids"], labels=inputs["input_ids"])
 
282
 
283
  perplexity = torch.exp(loss).item()
284
 
285
+
286
  del model
287
  del tokenizer
288
  del inputs
 
295
  return f"Error calculating perplexity: {str(e)}"
296
 
297
 
298
+
299
  with gr.Blocks(theme=gr.themes.Soft()) as interface:
300
+ gr.Markdown("# ADQ Enterprise HR Knowledge Assistant & Evaluation Toolkit")
301
+ gr.Markdown("Comparing grounding quality across 4 open-source LLMs using Enterprise HR Policies. Please be patient since there is a limit of 16GB RAM :)")
302
 
303
  with gr.Tabs():
304
  # TAB 1 UI
 
310
  out_granite = gr.Textbox(label="IBM Granite 3.1 2B")
311
  out_phi = gr.Textbox(label="Microsoft Phi 3.5 Mini")
312
  with gr.Row():
313
+ out_qwen = gr.Textbox(label="Alibaba Qwen 2.5 1.5B")
314
+ out_smol = gr.Textbox(label="HuggingFace SmolLM 1.7B")
315
  out_source = gr.Textbox(label="Source Document Used")
316
 
317
  submit_btn.click(
 
320
  outputs=[out_granite, out_phi, out_qwen, out_smol, out_source]
321
  )
322
 
323
+
324
  with gr.TabItem("📊 Perplexity Evaluator"):
325
  gr.Markdown("Select a single model to calculate its perplexity against our internal HR dataset. **Warning: Takes 30-60 seconds on CPU.**")
326