samzito12 commited on
Commit
27a8bb1
·
1 Parent(s): a282b8f

try to improve the inference

Browse files
Files changed (2) hide show
  1. app.py +21 -5
  2. requirements.txt +1 -3
app.py CHANGED
@@ -8,13 +8,25 @@ model_name = "samzito12/lora_model2"
8
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
  tokenizer.pad_token = tokenizer.eos_token
 
11
 
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_name,
14
  device_map="cpu",
 
 
15
  )
16
 
17
- SYSTEM_PROMPT = "You are a helpful AI assistant based on Meta's Llama-3.2-3B model, fine-tuned on the FineTome dataset."
 
 
 
 
 
 
 
 
 
18
 
19
  def chat(message, history):
20
  # Build conversation
@@ -26,15 +38,16 @@ def chat(message, history):
26
  conversation += f"User: {message}\nAssistant:"
27
 
28
  # Tokenize
29
- inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=2048)
30
 
31
  # Generate
32
  with torch.no_grad():
33
  outputs = model.generate(
34
  **inputs,
35
- max_new_tokens=256,
36
  temperature=0.7,
37
  do_sample=True,
 
38
  pad_token_id=tokenizer.eos_token_id,
39
  eos_token_id=tokenizer.eos_token_id
40
  )
@@ -54,7 +67,7 @@ demo = gr.ChatInterface(
54
  chat,
55
  title="🦙 My Fine-Tuned Llama-3.2-3B Chatbot",
56
  description="""
57
- **Model:** Llama-3.2-3B fine-tuned on FineTome-100k dataset
58
 
59
  it's a custom fine-tuned model for ID2223 Lab 2.
60
  """,
@@ -63,7 +76,10 @@ demo = gr.ChatInterface(
63
  "Explain machine learning in simple terms",
64
  "Write a Python function to reverse a string"
65
  ],
66
- theme="soft"
 
 
 
67
  )
68
 
69
  if __name__ == "__main__":
 
8
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
  tokenizer.pad_token = tokenizer.eos_token
11
+ tokenizer.padding_side = "left"
12
 
13
  model = AutoModelForCausalLM.from_pretrained(
14
  model_name,
15
  device_map="cpu",
16
+ torch_dtype=torch.float32,
17
+ lower_cpu_mem_usage=True
18
  )
19
 
20
+ print("⚙️ Quantification du modèle pour optimisation CPU...")
21
+ model = torch.quantization.quantize_dynamic(
22
+ model,
23
+ {torch.nn.Linear},
24
+ dtype=torch.qint8
25
+ )
26
+
27
+ model.eval()
28
+
29
+ SYSTEM_PROMPT = "You are a helpful AI assistant based on Meta's Llama-3.2-3B model, fine-tuned on a code dataset."
30
 
31
  def chat(message, history):
32
  # Build conversation
 
38
  conversation += f"User: {message}\nAssistant:"
39
 
40
  # Tokenize
41
+ inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024, padding=True)
42
 
43
  # Generate
44
  with torch.no_grad():
45
  outputs = model.generate(
46
  **inputs,
47
+ max_new_tokens=128,
48
  temperature=0.7,
49
  do_sample=True,
50
+ use_cache=True,
51
  pad_token_id=tokenizer.eos_token_id,
52
  eos_token_id=tokenizer.eos_token_id
53
  )
 
67
  chat,
68
  title="🦙 My Fine-Tuned Llama-3.2-3B Chatbot",
69
  description="""
70
+ **Model:** Llama-3.2-3B fine-tuned on a code dataset
71
 
72
  it's a custom fine-tuned model for ID2223 Lab 2.
73
  """,
 
76
  "Explain machine learning in simple terms",
77
  "Write a Python function to reverse a string"
78
  ],
79
+ theme="soft",
80
+ retry_btn=None, # Désactive retry pour éviter surcharge
81
+ undo_btn=None,
82
+ clear_btn="Clear"
83
  )
84
 
85
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,6 +1,4 @@
1
  gradio
2
  transformers
3
  torch
4
- accelerate
5
- huggingface-hub
6
- bitsandbytes
 
1
  gradio
2
  transformers
3
  torch
4
+ accelerate