devNaam commited on
Commit
65b3a86
·
1 Parent(s): 3df68a4

Fix base model

Browse files
Files changed (2) hide show
  1. app.py +6 -11
  2. requirements.txt +2 -1
app.py CHANGED
@@ -4,14 +4,13 @@ import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from peft import PeftModel
6
 
7
- BASE_MODEL = "meta-llama/Llama-3.2-3B"
8
  ADAPTER_MODEL = "devNaam/vakilai-llama32-3b-v1"
9
 
10
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
11
 
12
  model = AutoModelForCausalLM.from_pretrained(
13
  BASE_MODEL,
14
- torch_dtype=torch.float16,
15
  device_map="auto"
16
  )
17
 
@@ -23,22 +22,18 @@ def vakil_ai(prompt):
23
 
24
  output = model.generate(
25
  **inputs,
26
- max_new_tokens=300,
27
- temperature=0.7,
28
- top_p=0.9
29
  )
30
 
31
- response = tokenizer.decode(output[0], skip_special_tokens=True)
32
-
33
- return response
34
 
35
 
36
  demo = gr.Interface(
37
  fn=vakil_ai,
38
- inputs=gr.Textbox(lines=4, placeholder="Ask your legal question..."),
39
  outputs="text",
40
- title="AI Vakil – Legal Assistant",
41
- description="VakilAI powered by Llama 3.2"
42
  )
43
 
44
  demo.launch()
 
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from peft import PeftModel
6
 
7
+ BASE_MODEL = "unsloth/llama-3.2-3b-bnb-4bit"
8
  ADAPTER_MODEL = "devNaam/vakilai-llama32-3b-v1"
9
 
10
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
11
 
12
  model = AutoModelForCausalLM.from_pretrained(
13
  BASE_MODEL,
 
14
  device_map="auto"
15
  )
16
 
 
22
 
23
  output = model.generate(
24
  **inputs,
25
+ max_new_tokens=200,
26
+ temperature=0.7
 
27
  )
28
 
29
+ return tokenizer.decode(output[0], skip_special_tokens=True)
 
 
30
 
31
 
32
  demo = gr.Interface(
33
  fn=vakil_ai,
34
+ inputs=gr.Textbox(lines=4),
35
  outputs="text",
36
+ title="AI Vakil"
 
37
  )
38
 
39
  demo.launch()
requirements.txt CHANGED
@@ -3,4 +3,5 @@ torch
3
  accelerate
4
  peft
5
  sentencepiece
6
- gradio
 
 
3
  accelerate
4
  peft
5
  sentencepiece
6
+ gradio
7
+ bitsandbytes