yasserrmd commited on
Commit
c66b667
·
verified ·
1 Parent(s): dfc8638

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -41,13 +41,14 @@ class SinaReasonMedicalChat:
41
  """Load the SinaReason medical model and tokenizer using Unsloth"""
42
  try:
43
  print(f"Loading medical model with Unsloth: {MODEL_NAME}")
 
44
 
45
  # Use FastLanguageModel from Unsloth to load the model and tokenizer
46
  self.model, self.tokenizer = FastLanguageModel.from_pretrained(
47
  model_name=MODEL_NAME,
48
  dtype=torch.bfloat16,
49
  load_in_4bit=True, # Or False if you have enough VRAM for 16-bit
50
- device_map="cpu",
51
  )
52
 
53
  print("SinaReason medical model loaded successfully with Unsloth!")
@@ -74,7 +75,7 @@ class SinaReasonMedicalChat:
74
  temperature: float = 0.7, top_p: float = 0.95) -> Tuple[str, List[List[str]]]:
75
  """Generate medical reasoning responses using the Unsloth model."""
76
  # No need for model.to(DEVICE), Unsloth's device_map handles it.
77
- self.model.to("cuda")
78
  self.model.eval()
79
  if not message.strip():
80
  return "", history
 
41
  """Load the SinaReason medical model and tokenizer using Unsloth"""
42
  try:
43
  print(f"Loading medical model with Unsloth: {MODEL_NAME}")
44
+ print("cuda" if torch.cuda.is_available() else "cpu")
45
 
46
  # Use FastLanguageModel from Unsloth to load the model and tokenizer
47
  self.model, self.tokenizer = FastLanguageModel.from_pretrained(
48
  model_name=MODEL_NAME,
49
  dtype=torch.bfloat16,
50
  load_in_4bit=True, # Or False if you have enough VRAM for 16-bit
51
+ device_map="cuda",
52
  )
53
 
54
  print("SinaReason medical model loaded successfully with Unsloth!")
 
75
  temperature: float = 0.7, top_p: float = 0.95) -> Tuple[str, List[List[str]]]:
76
  """Generate medical reasoning responses using the Unsloth model."""
77
  # No need for model.to(DEVICE), Unsloth's device_map handles it.
78
+ #self.model.to("cuda")
79
  self.model.eval()
80
  if not message.strip():
81
  return "", history