abhi9953 commited on
Commit
9d0d8d2
·
verified ·
1 Parent(s): 200a141

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -18
app.py CHANGED
@@ -4,19 +4,18 @@ from peft import PeftModel
4
  import torch
5
  import os
6
 
7
- # Hugging Face token (optional)
8
  HF_TOKEN = os.environ.get("HF_TOKEN")
9
 
10
- BASE = "microsoft/phi-2"
11
- LORA = "abhi9953/abhi-ai"
12
 
13
- # Load tokenizer
14
- tokenizer = AutoTokenizer.from_pretrained(BASE)
15
  tokenizer.pad_token = tokenizer.eos_token
16
 
17
- # Load base model (CPU optimized)
18
  model = AutoModelForCausalLM.from_pretrained(
19
- BASE,
20
  torch_dtype=torch.float32,
21
  device_map="cpu"
22
  )
@@ -24,38 +23,45 @@ model = AutoModelForCausalLM.from_pretrained(
24
  # Load LoRA adapter
25
  model = PeftModel.from_pretrained(
26
  model,
27
- LORA,
28
  token=HF_TOKEN
29
  )
30
 
31
  model.eval()
32
 
33
- # 🔥 WARM-UP (removes first-response lag)
34
  with torch.no_grad():
35
  _ = model.generate(
36
  **tokenizer("Hello", return_tensors="pt"),
37
- max_new_tokens=5,
38
  use_cache=True
39
  )
40
 
41
  def chat(msg):
42
- prompt = f"### User:\n{msg}\n\n### Abhi:"
 
 
 
 
 
43
  inputs = tokenizer(prompt, return_tensors="pt")
44
 
45
  with torch.no_grad():
46
  output = model.generate(
47
  **inputs,
48
- max_new_tokens=32, # 🔥 VERY IMPORTANT
49
- do_sample=False, # 🔥 MUCH faster
50
- use_cache=True
 
 
51
  )
52
 
53
- text = tokenizer.decode(output[0], skip_special_tokens=True)
54
- return text.split("### Abhi:")[-1].strip()
 
55
 
56
  gr.Interface(
57
  fn=chat,
58
- inputs="text",
59
  outputs="text",
60
- title="Abhi AI (Fast Mode)"
61
  ).launch()
 
4
  import torch
5
  import os
6
 
7
+ # Optional Hugging Face token
8
  HF_TOKEN = os.environ.get("HF_TOKEN")
9
 
10
+ BASE_MODEL = "microsoft/phi-2"
11
+ LORA_MODEL = "abhi9953/abhi-ai"
12
 
13
+
14
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
15
  tokenizer.pad_token = tokenizer.eos_token
16
 
 
17
  model = AutoModelForCausalLM.from_pretrained(
18
+ BASE_MODEL,
19
  torch_dtype=torch.float32,
20
  device_map="cpu"
21
  )
 
23
  # Load LoRA adapter
24
  model = PeftModel.from_pretrained(
25
  model,
26
+ LORA_MODEL,
27
  token=HF_TOKEN
28
  )
29
 
30
  model.eval()
31
 
 
32
  with torch.no_grad():
33
  _ = model.generate(
34
  **tokenizer("Hello", return_tensors="pt"),
35
+ max_new_tokens=10,
36
  use_cache=True
37
  )
38
 
39
  def chat(msg):
40
+ prompt = f"""### User:
41
+ {msg}
42
+
43
+ ### Abhi:
44
+ """
45
+
46
  inputs = tokenizer(prompt, return_tensors="pt")
47
 
48
  with torch.no_grad():
49
  output = model.generate(
50
  **inputs,
51
+ max_new_tokens=128,
52
+ do_sample=False,
53
+ use_cache=True,
54
+ pad_token_id=tokenizer.eos_token_id,
55
+ eos_token_id=tokenizer.eos_token_id
56
  )
57
 
58
+ decoded = tokenizer.decode(output[0], skip_special_tokens=True)
59
+
60
+ return decoded.split("### Abhi:")[-1].strip()
61
 
62
  gr.Interface(
63
  fn=chat,
64
+ inputs=gr.Textbox(lines=3, placeholder="Talk to Abhi AI..."),
65
  outputs="text",
66
+ title="Abhi AI (Fast + Stable Mode)"
67
  ).launch()