samzito12 commited on
Commit
e41281f
·
1 Parent(s): a8e01ad

try to improve the inference

Browse files
Files changed (1) hide show
  1. app.py +2 -10
app.py CHANGED
@@ -14,18 +14,10 @@ model = AutoModelForCausalLM.from_pretrained(
14
  model_name,
15
  device_map="cpu",
16
  torch_dtype=torch.float32,
17
- low_cpu_mem_usage=True,
18
- offload_folder="offload",
19
- offload_state_dict=True
20
- )
21
-
22
- print("⚙️ Quantification du modèle pour optimisation CPU...")
23
- model = torch.quantization.quantize_dynamic(
24
- model,
25
- {torch.nn.Linear},
26
- dtype=torch.qint8
27
  )
28
 
 
29
  model.eval()
30
 
31
  SYSTEM_PROMPT = "You are a helpful AI assistant based on Meta's Llama-3.2-3B model, fine-tuned on a code dataset."
 
14
  model_name,
15
  device_map="cpu",
16
  torch_dtype=torch.float32,
17
+ low_cpu_mem_usage=True
 
 
 
 
 
 
 
 
 
18
  )
19
 
20
+ print("✅ Modèle chargé avec optimisations CPU")
21
  model.eval()
22
 
23
  SYSTEM_PROMPT = "You are a helpful AI assistant based on Meta's Llama-3.2-3B model, fine-tuned on a code dataset."