SuperSl6 commited on
Commit
5ae1c86
·
verified ·
1 Parent(s): 8f35217

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -18
app.py CHANGED
@@ -17,24 +17,13 @@ print(f"Using device: {device}")
17
  model_id = "ALLaM-AI/ALLaM-7B-Instruct-preview"
18
  tokenizer = AutoTokenizer.from_pretrained(model_id)
19
 
20
- if device == "cuda": # GPU path
21
- model = AutoModelForCausalLM.from_pretrained(
22
- model_id,
23
- device_map="auto",
24
- torch_dtype=torch.float16,
25
- load_in_8bit=True, # bitsandbytes uses GPU kernels
26
- low_cpu_mem_usage=True, # stream weights, tiny host RAM
27
- max_memory={0: "15GiB"}, # stay well under container cap
28
- )
29
- else: # 🖥️ CPU-only path
30
- model = AutoModelForCausalLM.from_pretrained(
31
- model_id,
32
- device_map={"": "cpu"}, # everything on CPU
33
- torch_dtype=torch.float32, # full precision
34
- load_in_8bit=False, # bitsandbytes not usable on CPU
35
- low_cpu_mem_usage=True, # layer-by-layer streaming
36
- offload_folder="offload", # swap rarely-used tensors to disk
37
- )
38
 
39
 
40
  # ------------------------------------------------------------------
 
17
  model_id = "ALLaM-AI/ALLaM-7B-Instruct-preview"
18
  tokenizer = AutoTokenizer.from_pretrained(model_id)
19
 
20
+ # 🖥️ CPU-only path
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ model_id,
23
+ torch_dtype="auto",
24
+ device_map="auto", # works for CPU or GPU Space
25
+ low_cpu_mem_usage=True,
26
+ )
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  # ------------------------------------------------------------------