Spaces:

jayjay-12345
/

CODS641_Enterprise_FAQ_Bot

Sleeping

App Files Files Community

jayjay-12345 commited on about 1 month ago

Commit

f1b302b

verified ·

1 Parent(s): 777df97

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -14

app.py CHANGED Viewed

@@ -216,13 +216,13 @@ import gc
 from sentence_transformers import SentenceTransformer
 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
-# 1. Load RAG Memory
 embed_model = SentenceTransformer('all-MiniLM-L6-v2')
 index = faiss.read_index("faiss_index.bin")
 with open("processed_chunks.json", "r") as f:
     chunks = json.load(f)
-# 2. Define Models
 MODELS = {
     "IBM Granite 3.1 2B": "ibm-granite/granite-3.1-2b-instruct",
     "Microsoft Phi 3.5 Mini": "microsoft/Phi-3.5-mini-instruct",
@@ -230,7 +230,7 @@ MODELS = {
     "SmolLM 1.7B": "HuggingFaceTB/SmolLM-1.7B-Instruct"
 }
-# --- TAB 1: RAG CHATBOT LOGIC ---
 def ask_specific_model(model_name, prompt):
     pipe = pipeline("text-generation", model=MODELS[model_name], device_map="cpu")
     res = pipe(prompt, max_new_tokens=60, do_sample=False)
@@ -261,20 +261,20 @@ def compare_hr_bots(question):
         yield results[0], results[1], results[2], results[3], source
-# --- TAB 2: PERPLEXITY EVALUATION LOGIC ---
 def calculate_perplexity(model_name):
     try:
         model_id = MODELS[model_name]
-        # Grab a chunk of our actual HR data to test on
         sample_texts = [chunk['text'] for chunk in chunks[:3]]
         test_text = " ".join(sample_texts)
-        # Load Model
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu")
-        # Tokenize and compute loss
         inputs = tokenizer(test_text, return_tensors="pt")
         with torch.no_grad():
             outputs = model(input_ids=inputs["input_ids"], labels=inputs["input_ids"])
@@ -282,7 +282,7 @@ def calculate_perplexity(model_name):
         perplexity = torch.exp(loss).item()
-        # STRICT MEMORY CLEANUP
         del model
         del tokenizer
         del inputs
@@ -295,10 +295,10 @@ def calculate_perplexity(model_name):
         return f"Error calculating perplexity: {str(e)}"
-# --- GRADIO UI BUILDER ---
 with gr.Blocks(theme=gr.themes.Soft()) as interface:
-    gr.Markdown("# ADU HR Knowledge Assistant & Evaluation Toolkit")
-    gr.Markdown("Enterprise RAG Prototype using strictly Open-Source LLMs.")
     with gr.Tabs():
         # TAB 1 UI
@@ -310,8 +310,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as interface:
                 out_granite = gr.Textbox(label="IBM Granite 3.1 2B")
                 out_phi = gr.Textbox(label="Microsoft Phi 3.5 Mini")
             with gr.Row():
-                out_qwen = gr.Textbox(label="Qwen 2.5 1.5B")
-                out_smol = gr.Textbox(label="SmolLM 1.7B")
             out_source = gr.Textbox(label="Source Document Used")
             submit_btn.click(
@@ -320,7 +320,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as interface:
                 outputs=[out_granite, out_phi, out_qwen, out_smol, out_source]
             )
-        # TAB 2 UI
         with gr.TabItem("📊 Perplexity Evaluator"):
             gr.Markdown("Select a single model to calculate its perplexity against our internal HR dataset. **Warning: Takes 30-60 seconds on CPU.**")

 from sentence_transformers import SentenceTransformer
 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 embed_model = SentenceTransformer('all-MiniLM-L6-v2')
 index = faiss.read_index("faiss_index.bin")
 with open("processed_chunks.json", "r") as f:
     chunks = json.load(f)
 MODELS = {
     "IBM Granite 3.1 2B": "ibm-granite/granite-3.1-2b-instruct",
     "Microsoft Phi 3.5 Mini": "microsoft/Phi-3.5-mini-instruct",
     "SmolLM 1.7B": "HuggingFaceTB/SmolLM-1.7B-Instruct"
 }
 def ask_specific_model(model_name, prompt):
     pipe = pipeline("text-generation", model=MODELS[model_name], device_map="cpu")
     res = pipe(prompt, max_new_tokens=60, do_sample=False)
         yield results[0], results[1], results[2], results[3], source
 def calculate_perplexity(model_name):
     try:
         model_id = MODELS[model_name]
         sample_texts = [chunk['text'] for chunk in chunks[:3]]
         test_text = " ".join(sample_texts)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu")
         inputs = tokenizer(test_text, return_tensors="pt")
         with torch.no_grad():
             outputs = model(input_ids=inputs["input_ids"], labels=inputs["input_ids"])
         perplexity = torch.exp(loss).item()
         del model
         del tokenizer
         del inputs
         return f"Error calculating perplexity: {str(e)}"
 with gr.Blocks(theme=gr.themes.Soft()) as interface:
+    gr.Markdown("# ADQ Enterprise HR Knowledge Assistant & Evaluation Toolkit")
+    gr.Markdown("Comparing grounding quality across 4 open-source LLMs using Enterprise HR Policies. Please be patient since there is a limit of 16GB RAM :)")
     with gr.Tabs():
         # TAB 1 UI
                 out_granite = gr.Textbox(label="IBM Granite 3.1 2B")
                 out_phi = gr.Textbox(label="Microsoft Phi 3.5 Mini")
             with gr.Row():
+                out_qwen = gr.Textbox(label="Alibaba Qwen 2.5 1.5B")
+                out_smol = gr.Textbox(label="HuggingFace SmolLM 1.7B")
             out_source = gr.Textbox(label="Source Document Used")
             submit_btn.click(
                 outputs=[out_granite, out_phi, out_qwen, out_smol, out_source]
             )
         with gr.TabItem("📊 Perplexity Evaluator"):
             gr.Markdown("Select a single model to calculate its perplexity against our internal HR dataset. **Warning: Takes 30-60 seconds on CPU.**")