Muhammadidrees commited on
Commit
f176b5b
·
verified ·
1 Parent(s): 6ea20a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -5
app.py CHANGED
@@ -7,11 +7,35 @@ import re
7
  MODEL_ID = "Muhammadidrees/my-gpt-oss"
8
 
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
10
- model = AutoModelForCausalLM.from_pretrained(
11
- MODEL_ID,
12
- device_map="auto",
13
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
14
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  if tokenizer.pad_token is None:
17
  tokenizer.pad_token = tokenizer.eos_token
 
7
  MODEL_ID = "Muhammadidrees/my-gpt-oss"
8
 
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
10
+
11
+ # Load model with proper dtype handling
12
+ try:
13
+ # First try auto dtype (preserves original model dtype)
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ MODEL_ID,
16
+ device_map="auto",
17
+ torch_dtype="auto",
18
+ low_cpu_mem_usage=True
19
+ )
20
+ except Exception as e:
21
+ print(f"Auto dtype failed: {e}")
22
+ try:
23
+ # Try BFloat16 specifically
24
+ model = AutoModelForCausalLM.from_pretrained(
25
+ MODEL_ID,
26
+ device_map="auto",
27
+ torch_dtype=torch.bfloat16,
28
+ low_cpu_mem_usage=True
29
+ )
30
+ except Exception as e2:
31
+ print(f"BFloat16 failed: {e2}")
32
+ # Final fallback: float32 (works everywhere but slower)
33
+ model = AutoModelForCausalLM.from_pretrained(
34
+ MODEL_ID,
35
+ device_map="auto",
36
+ torch_dtype=torch.float32,
37
+ low_cpu_mem_usage=True
38
+ )
39
 
40
  if tokenizer.pad_token is None:
41
  tokenizer.pad_token = tokenizer.eos_token