Update app.py
Browse files
app.py
CHANGED
|
@@ -216,13 +216,13 @@ import gc
|
|
| 216 |
from sentence_transformers import SentenceTransformer
|
| 217 |
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
|
| 218 |
|
| 219 |
-
|
| 220 |
embed_model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 221 |
index = faiss.read_index("faiss_index.bin")
|
| 222 |
with open("processed_chunks.json", "r") as f:
|
| 223 |
chunks = json.load(f)
|
| 224 |
|
| 225 |
-
|
| 226 |
MODELS = {
|
| 227 |
"IBM Granite 3.1 2B": "ibm-granite/granite-3.1-2b-instruct",
|
| 228 |
"Microsoft Phi 3.5 Mini": "microsoft/Phi-3.5-mini-instruct",
|
|
@@ -230,7 +230,7 @@ MODELS = {
|
|
| 230 |
"SmolLM 1.7B": "HuggingFaceTB/SmolLM-1.7B-Instruct"
|
| 231 |
}
|
| 232 |
|
| 233 |
-
|
| 234 |
def ask_specific_model(model_name, prompt):
|
| 235 |
pipe = pipeline("text-generation", model=MODELS[model_name], device_map="cpu")
|
| 236 |
res = pipe(prompt, max_new_tokens=60, do_sample=False)
|
|
@@ -261,20 +261,20 @@ def compare_hr_bots(question):
|
|
| 261 |
yield results[0], results[1], results[2], results[3], source
|
| 262 |
|
| 263 |
|
| 264 |
-
|
| 265 |
def calculate_perplexity(model_name):
|
| 266 |
try:
|
| 267 |
model_id = MODELS[model_name]
|
| 268 |
|
| 269 |
-
|
| 270 |
sample_texts = [chunk['text'] for chunk in chunks[:3]]
|
| 271 |
test_text = " ".join(sample_texts)
|
| 272 |
|
| 273 |
-
|
| 274 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 275 |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu")
|
| 276 |
|
| 277 |
-
|
| 278 |
inputs = tokenizer(test_text, return_tensors="pt")
|
| 279 |
with torch.no_grad():
|
| 280 |
outputs = model(input_ids=inputs["input_ids"], labels=inputs["input_ids"])
|
|
@@ -282,7 +282,7 @@ def calculate_perplexity(model_name):
|
|
| 282 |
|
| 283 |
perplexity = torch.exp(loss).item()
|
| 284 |
|
| 285 |
-
|
| 286 |
del model
|
| 287 |
del tokenizer
|
| 288 |
del inputs
|
|
@@ -295,10 +295,10 @@ def calculate_perplexity(model_name):
|
|
| 295 |
return f"Error calculating perplexity: {str(e)}"
|
| 296 |
|
| 297 |
|
| 298 |
-
|
| 299 |
with gr.Blocks(theme=gr.themes.Soft()) as interface:
|
| 300 |
-
gr.Markdown("#
|
| 301 |
-
gr.Markdown("
|
| 302 |
|
| 303 |
with gr.Tabs():
|
| 304 |
# TAB 1 UI
|
|
@@ -310,8 +310,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as interface:
|
|
| 310 |
out_granite = gr.Textbox(label="IBM Granite 3.1 2B")
|
| 311 |
out_phi = gr.Textbox(label="Microsoft Phi 3.5 Mini")
|
| 312 |
with gr.Row():
|
| 313 |
-
out_qwen = gr.Textbox(label="Qwen 2.5 1.5B")
|
| 314 |
-
out_smol = gr.Textbox(label="SmolLM 1.7B")
|
| 315 |
out_source = gr.Textbox(label="Source Document Used")
|
| 316 |
|
| 317 |
submit_btn.click(
|
|
@@ -320,7 +320,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as interface:
|
|
| 320 |
outputs=[out_granite, out_phi, out_qwen, out_smol, out_source]
|
| 321 |
)
|
| 322 |
|
| 323 |
-
|
| 324 |
with gr.TabItem("📊 Perplexity Evaluator"):
|
| 325 |
gr.Markdown("Select a single model to calculate its perplexity against our internal HR dataset. **Warning: Takes 30-60 seconds on CPU.**")
|
| 326 |
|
|
|
|
| 216 |
from sentence_transformers import SentenceTransformer
|
| 217 |
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
|
| 218 |
|
| 219 |
+
|
| 220 |
embed_model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 221 |
index = faiss.read_index("faiss_index.bin")
|
| 222 |
with open("processed_chunks.json", "r") as f:
|
| 223 |
chunks = json.load(f)
|
| 224 |
|
| 225 |
+
|
| 226 |
MODELS = {
|
| 227 |
"IBM Granite 3.1 2B": "ibm-granite/granite-3.1-2b-instruct",
|
| 228 |
"Microsoft Phi 3.5 Mini": "microsoft/Phi-3.5-mini-instruct",
|
|
|
|
| 230 |
"SmolLM 1.7B": "HuggingFaceTB/SmolLM-1.7B-Instruct"
|
| 231 |
}
|
| 232 |
|
| 233 |
+
|
| 234 |
def ask_specific_model(model_name, prompt):
|
| 235 |
pipe = pipeline("text-generation", model=MODELS[model_name], device_map="cpu")
|
| 236 |
res = pipe(prompt, max_new_tokens=60, do_sample=False)
|
|
|
|
| 261 |
yield results[0], results[1], results[2], results[3], source
|
| 262 |
|
| 263 |
|
| 264 |
+
|
| 265 |
def calculate_perplexity(model_name):
|
| 266 |
try:
|
| 267 |
model_id = MODELS[model_name]
|
| 268 |
|
| 269 |
+
|
| 270 |
sample_texts = [chunk['text'] for chunk in chunks[:3]]
|
| 271 |
test_text = " ".join(sample_texts)
|
| 272 |
|
| 273 |
+
|
| 274 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 275 |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu")
|
| 276 |
|
| 277 |
+
|
| 278 |
inputs = tokenizer(test_text, return_tensors="pt")
|
| 279 |
with torch.no_grad():
|
| 280 |
outputs = model(input_ids=inputs["input_ids"], labels=inputs["input_ids"])
|
|
|
|
| 282 |
|
| 283 |
perplexity = torch.exp(loss).item()
|
| 284 |
|
| 285 |
+
|
| 286 |
del model
|
| 287 |
del tokenizer
|
| 288 |
del inputs
|
|
|
|
| 295 |
return f"Error calculating perplexity: {str(e)}"
|
| 296 |
|
| 297 |
|
| 298 |
+
|
| 299 |
with gr.Blocks(theme=gr.themes.Soft()) as interface:
|
| 300 |
+
gr.Markdown("# ADQ Enterprise HR Knowledge Assistant & Evaluation Toolkit")
|
| 301 |
+
gr.Markdown("Comparing grounding quality across 4 open-source LLMs using Enterprise HR Policies. Please be patient since there is a limit of 16GB RAM :)")
|
| 302 |
|
| 303 |
with gr.Tabs():
|
| 304 |
# TAB 1 UI
|
|
|
|
| 310 |
out_granite = gr.Textbox(label="IBM Granite 3.1 2B")
|
| 311 |
out_phi = gr.Textbox(label="Microsoft Phi 3.5 Mini")
|
| 312 |
with gr.Row():
|
| 313 |
+
out_qwen = gr.Textbox(label="Alibaba Qwen 2.5 1.5B")
|
| 314 |
+
out_smol = gr.Textbox(label="HuggingFace SmolLM 1.7B")
|
| 315 |
out_source = gr.Textbox(label="Source Document Used")
|
| 316 |
|
| 317 |
submit_btn.click(
|
|
|
|
| 320 |
outputs=[out_granite, out_phi, out_qwen, out_smol, out_source]
|
| 321 |
)
|
| 322 |
|
| 323 |
+
|
| 324 |
with gr.TabItem("📊 Perplexity Evaluator"):
|
| 325 |
gr.Markdown("Select a single model to calculate its perplexity against our internal HR dataset. **Warning: Takes 30-60 seconds on CPU.**")
|
| 326 |
|