Spaces:

yasserrmd
/

SinaReason

Running on Zero

yasserrmd commited on Sep 22

Commit

c66b667

verified ·

1 Parent(s): dfc8638

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -41,13 +41,14 @@ class SinaReasonMedicalChat:
         """Load the SinaReason medical model and tokenizer using Unsloth"""
         try:
             print(f"Loading medical model with Unsloth: {MODEL_NAME}")
             # Use FastLanguageModel from Unsloth to load the model and tokenizer
             self.model, self.tokenizer = FastLanguageModel.from_pretrained(
                 model_name=MODEL_NAME,
                 dtype=torch.bfloat16,
                 load_in_4bit=True, # Or False if you have enough VRAM for 16-bit
-                device_map="cpu",
             )
             print("SinaReason medical model loaded successfully with Unsloth!")
@@ -74,7 +75,7 @@ class SinaReasonMedicalChat:
                            temperature: float = 0.7, top_p: float = 0.95) -> Tuple[str, List[List[str]]]:
         """Generate medical reasoning responses using the Unsloth model."""
         # No need for model.to(DEVICE), Unsloth's device_map handles it.
-        self.model.to("cuda")
         self.model.eval()
         if not message.strip():
             return "", history

         """Load the SinaReason medical model and tokenizer using Unsloth"""
         try:
             print(f"Loading medical model with Unsloth: {MODEL_NAME}")
+            print("cuda" if torch.cuda.is_available() else "cpu")
             # Use FastLanguageModel from Unsloth to load the model and tokenizer
             self.model, self.tokenizer = FastLanguageModel.from_pretrained(
                 model_name=MODEL_NAME,
                 dtype=torch.bfloat16,
                 load_in_4bit=True, # Or False if you have enough VRAM for 16-bit
+                device_map="cuda",
             )
             print("SinaReason medical model loaded successfully with Unsloth!")
                            temperature: float = 0.7, top_p: float = 0.95) -> Tuple[str, List[List[str]]]:
         """Generate medical reasoning responses using the Unsloth model."""
         # No need for model.to(DEVICE), Unsloth's device_map handles it.
+        #self.model.to("cuda")
         self.model.eval()
         if not message.strip():
             return "", history