Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -41,13 +41,14 @@ class SinaReasonMedicalChat:
|
|
| 41 |
"""Load the SinaReason medical model and tokenizer using Unsloth"""
|
| 42 |
try:
|
| 43 |
print(f"Loading medical model with Unsloth: {MODEL_NAME}")
|
|
|
|
| 44 |
|
| 45 |
# Use FastLanguageModel from Unsloth to load the model and tokenizer
|
| 46 |
self.model, self.tokenizer = FastLanguageModel.from_pretrained(
|
| 47 |
model_name=MODEL_NAME,
|
| 48 |
dtype=torch.bfloat16,
|
| 49 |
load_in_4bit=True, # Or False if you have enough VRAM for 16-bit
|
| 50 |
-
device_map="
|
| 51 |
)
|
| 52 |
|
| 53 |
print("SinaReason medical model loaded successfully with Unsloth!")
|
|
@@ -74,7 +75,7 @@ class SinaReasonMedicalChat:
|
|
| 74 |
temperature: float = 0.7, top_p: float = 0.95) -> Tuple[str, List[List[str]]]:
|
| 75 |
"""Generate medical reasoning responses using the Unsloth model."""
|
| 76 |
# No need for model.to(DEVICE), Unsloth's device_map handles it.
|
| 77 |
-
self.model.to("cuda")
|
| 78 |
self.model.eval()
|
| 79 |
if not message.strip():
|
| 80 |
return "", history
|
|
|
|
| 41 |
"""Load the SinaReason medical model and tokenizer using Unsloth"""
|
| 42 |
try:
|
| 43 |
print(f"Loading medical model with Unsloth: {MODEL_NAME}")
|
| 44 |
+
print("cuda" if torch.cuda.is_available() else "cpu")
|
| 45 |
|
| 46 |
# Use FastLanguageModel from Unsloth to load the model and tokenizer
|
| 47 |
self.model, self.tokenizer = FastLanguageModel.from_pretrained(
|
| 48 |
model_name=MODEL_NAME,
|
| 49 |
dtype=torch.bfloat16,
|
| 50 |
load_in_4bit=True, # Or False if you have enough VRAM for 16-bit
|
| 51 |
+
device_map="cuda",
|
| 52 |
)
|
| 53 |
|
| 54 |
print("SinaReason medical model loaded successfully with Unsloth!")
|
|
|
|
| 75 |
temperature: float = 0.7, top_p: float = 0.95) -> Tuple[str, List[List[str]]]:
|
| 76 |
"""Generate medical reasoning responses using the Unsloth model."""
|
| 77 |
# No need for model.to(DEVICE), Unsloth's device_map handles it.
|
| 78 |
+
#self.model.to("cuda")
|
| 79 |
self.model.eval()
|
| 80 |
if not message.strip():
|
| 81 |
return "", history
|