akshaynayaks9845 commited on
Commit
0ffb15a
·
verified ·
1 Parent(s): 70bccee

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -4,7 +4,7 @@ import time
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import torch
6
 
7
- MODEL_ID = "akshaynayaks9845/rml-ai-phi1_5-rml-100k"
8
 
9
  # Global model and tokenizer
10
  _model = None
@@ -42,20 +42,21 @@ def generate_response(prompt, max_new_tokens=64, temperature=0.1):
42
  # Prepare input
43
  inputs = _tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
44
 
45
- # Generate response with better repetition control
46
  with torch.no_grad():
47
  outputs = _model.generate(
48
  **inputs,
49
  max_new_tokens=int(max_new_tokens),
50
  do_sample=bool(temperature > 0),
51
  temperature=float(temperature),
52
- top_p=0.85,
53
- top_k=50,
54
- repetition_penalty=1.2,
55
- no_repeat_ngram_size=3,
56
  early_stopping=True,
57
  pad_token_id=_tokenizer.eos_token_id,
58
- eos_token_id=_tokenizer.eos_token_id
 
59
  )
60
 
61
  # Decode response
@@ -112,7 +113,7 @@ with gr.Blocks(title="RML-AI Demo") as demo:
112
  gr.Markdown('''
113
  # RML-AI Demo (HR Testing)
114
 
115
- This is a lightweight demo of the RML-AI system for recruiters and stakeholders.
116
 
117
  **Key Features:**
118
  - Sub-50ms inference latency
@@ -120,9 +121,11 @@ with gr.Blocks(title="RML-AI Demo") as demo:
120
  - 70% hallucination reduction
121
  - Complete source attribution
122
  - 100GB knowledge base access
 
123
 
124
- **Model:** akshaynayaks9845/rml-ai-phi1_5-rml-100k
125
- **Dataset:** 100GB RML knowledge base
 
126
  ''')
127
 
128
  with gr.Row():
 
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import torch
6
 
7
+ MODEL_ID = "akshaynayaks9845/rml-ai-phi1_5-100gb-local-lora"
8
 
9
  # Global model and tokenizer
10
  _model = None
 
42
  # Prepare input
43
  inputs = _tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
44
 
45
+ # Generate response with LoRA-optimized settings
46
  with torch.no_grad():
47
  outputs = _model.generate(
48
  **inputs,
49
  max_new_tokens=int(max_new_tokens),
50
  do_sample=bool(temperature > 0),
51
  temperature=float(temperature),
52
+ top_p=0.9,
53
+ top_k=40,
54
+ repetition_penalty=1.15,
55
+ no_repeat_ngram_size=2,
56
  early_stopping=True,
57
  pad_token_id=_tokenizer.eos_token_id,
58
+ eos_token_id=_tokenizer.eos_token_id,
59
+ use_cache=True
60
  )
61
 
62
  # Decode response
 
113
  gr.Markdown('''
114
  # RML-AI Demo (HR Testing)
115
 
116
+ This is a professional demo of the RML-AI system for recruiters and stakeholders.
117
 
118
  **Key Features:**
119
  - Sub-50ms inference latency
 
121
  - 70% hallucination reduction
122
  - Complete source attribution
123
  - 100GB knowledge base access
124
+ - LoRA fine-tuned for optimal performance
125
 
126
+ **Model:** akshaynayaks9845/rml-ai-phi1_5-100gb-local-lora
127
+ **Training:** LoRA fine-tuned on 100GB RML dataset
128
+ **Status:** Production-ready for Q&A
129
  ''')
130
 
131
  with gr.Row():