Spaces:

Muhammadidrees
/

WellBeingLLMSInsight

Sleeping

Muhammadidrees commited on Sep 30, 2025

Commit

f176b5b

verified ·

1 Parent(s): 6ea20a4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,11 +7,35 @@ import re
 MODEL_ID = "Muhammadidrees/my-gpt-oss"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID,
-    device_map="auto",
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
-)
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token

 MODEL_ID = "Muhammadidrees/my-gpt-oss"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+# Load model with proper dtype handling
+try:
+    # First try auto dtype (preserves original model dtype)
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_ID,
+        device_map="auto",
+        torch_dtype="auto",
+        low_cpu_mem_usage=True
+    )
+except Exception as e:
+    print(f"Auto dtype failed: {e}")
+    try:
+        # Try BFloat16 specifically
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID,
+            device_map="auto",
+            torch_dtype=torch.bfloat16,
+            low_cpu_mem_usage=True
+        )
+    except Exception as e2:
+        print(f"BFloat16 failed: {e2}")
+        # Final fallback: float32 (works everywhere but slower)
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID,
+            device_map="auto",
+            torch_dtype=torch.float32,
+            low_cpu_mem_usage=True
+        )
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token