Spaces:

akshaynayaks9845
/

rml-ai-demo

Sleeping

App Files Files Community

akshaynayaks9845 commited on Aug 19

Commit

0ffb15a

verified ·

1 Parent(s): 70bccee

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +13 -10

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import time
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-MODEL_ID = "akshaynayaks9845/rml-ai-phi1_5-rml-100k"
 # Global model and tokenizer
 _model = None
@@ -42,20 +42,21 @@ def generate_response(prompt, max_new_tokens=64, temperature=0.1):
         # Prepare input
         inputs = _tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
-        # Generate response with better repetition control
         with torch.no_grad():
             outputs = _model.generate(
                 **inputs,
                 max_new_tokens=int(max_new_tokens),
                 do_sample=bool(temperature > 0),
                 temperature=float(temperature),
-                top_p=0.85,
-                top_k=50,
-                repetition_penalty=1.2,
-                no_repeat_ngram_size=3,
                 early_stopping=True,
                 pad_token_id=_tokenizer.eos_token_id,
-                eos_token_id=_tokenizer.eos_token_id
             )
         # Decode response
@@ -112,7 +113,7 @@ with gr.Blocks(title="RML-AI Demo") as demo:
     gr.Markdown('''
     # RML-AI Demo (HR Testing)
-    This is a lightweight demo of the RML-AI system for recruiters and stakeholders.
     **Key Features:**
     - Sub-50ms inference latency
@@ -120,9 +121,11 @@ with gr.Blocks(title="RML-AI Demo") as demo:
     - 70% hallucination reduction
     - Complete source attribution
     - 100GB knowledge base access
-    **Model:** akshaynayaks9845/rml-ai-phi1_5-rml-100k
-    **Dataset:** 100GB RML knowledge base
     ''')
     with gr.Row():

 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+MODEL_ID = "akshaynayaks9845/rml-ai-phi1_5-100gb-local-lora"
 # Global model and tokenizer
 _model = None
         # Prepare input
         inputs = _tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
+        # Generate response with LoRA-optimized settings
         with torch.no_grad():
             outputs = _model.generate(
                 **inputs,
                 max_new_tokens=int(max_new_tokens),
                 do_sample=bool(temperature > 0),
                 temperature=float(temperature),
+                top_p=0.9,
+                top_k=40,
+                repetition_penalty=1.15,
+                no_repeat_ngram_size=2,
                 early_stopping=True,
                 pad_token_id=_tokenizer.eos_token_id,
+                eos_token_id=_tokenizer.eos_token_id,
+                use_cache=True
             )
         # Decode response
     gr.Markdown('''
     # RML-AI Demo (HR Testing)
+    This is a professional demo of the RML-AI system for recruiters and stakeholders.
     **Key Features:**
     - Sub-50ms inference latency
     - 70% hallucination reduction
     - Complete source attribution
     - 100GB knowledge base access
+    - LoRA fine-tuned for optimal performance
+    **Model:** akshaynayaks9845/rml-ai-phi1_5-100gb-local-lora
+    **Training:** LoRA fine-tuned on 100GB RML dataset
+    **Status:** Production-ready for Q&A
     ''')
     with gr.Row():