yasserrmd commited on
Commit
73f8944
·
verified ·
1 Parent(s): 21ddb34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import gradio as gr
3
  import torch
4
- from transformers import AutoTokenizer, Mistral3ForConditionalGeneration, TextIteratorStreamer
5
  from threading import Thread
6
  import re
7
  import time
@@ -41,16 +41,23 @@ class SinaReasonMedicalChat:
41
  try:
42
  print(f"Loading medical model: {MODEL_NAME}")
43
  self.tokenizer = AutoTokenizer.from_pretrained(
44
- "mistralai/Magistral-Small-2509"
 
45
  )
46
-
 
 
 
 
 
 
47
  # Add padding token if not present
48
  if self.tokenizer.pad_token is None:
49
  self.tokenizer.pad_token = self.tokenizer.eos_token
50
 
51
  self.model = Mistral3ForConditionalGeneration.from_pretrained(
52
  MODEL_NAME,
53
- dtype=torch.bfloat16
54
  )
55
 
56
 
 
1
  import gradio as gr
2
  import gradio as gr
3
  import torch
4
+ from transformers import AutoTokenizer, Mistral3ForConditionalGeneration, TextIteratorStreamer,BitsAndBytesConfig
5
  from threading import Thread
6
  import re
7
  import time
 
41
  try:
42
  print(f"Loading medical model: {MODEL_NAME}")
43
  self.tokenizer = AutoTokenizer.from_pretrained(
44
+ "mistralai/Magistral-Small-2509",
45
+ tokenizer_type="mistral"
46
  )
47
+ bnb_config = BitsAndBytesConfig(
48
+ load_in_4bit=True,
49
+ bnb_4bit_use_double_quant=True,
50
+ bnb_4bit_quant_type="nf4", # normal float4
51
+ bnb_4bit_compute_dtype=torch.bfloat16 # computation precision
52
+ )
53
+
54
  # Add padding token if not present
55
  if self.tokenizer.pad_token is None:
56
  self.tokenizer.pad_token = self.tokenizer.eos_token
57
 
58
  self.model = Mistral3ForConditionalGeneration.from_pretrained(
59
  MODEL_NAME,
60
+ quantization_config=bnb_config
61
  )
62
 
63