Spaces:

SuperSl6
/

Question_Summrization_Demo

Sleeping

SuperSl6 commited on May 22, 2025

Commit

5ae1c86

verified ·

1 Parent(s): 8f35217

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,24 +17,13 @@ print(f"Using device: {device}")
 model_id = "ALLaM-AI/ALLaM-7B-Instruct-preview"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-if device == "cuda":                   # ⚡ GPU path
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        device_map="auto",
-        torch_dtype=torch.float16,
-        load_in_8bit=True,             # bitsandbytes uses GPU kernels
-        low_cpu_mem_usage=True,        # stream weights, tiny host RAM
-        max_memory={0: "15GiB"},       # stay well under container cap
-    )
-else:                                  # 🖥️  CPU-only path
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        device_map={"": "cpu"},        # everything on CPU
-        torch_dtype=torch.float32,     # full precision
-        load_in_8bit=False,            # bitsandbytes not usable on CPU
-        low_cpu_mem_usage=True,        # layer-by-layer streaming
-        offload_folder="offload",      # swap rarely-used tensors to disk
-    )
 # ------------------------------------------------------------------

 model_id = "ALLaM-AI/ALLaM-7B-Instruct-preview"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+                                 # 🖥️  CPU-only path
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype="auto",
+    device_map="auto",        # works for CPU or GPU Space
+    low_cpu_mem_usage=True,
+)
 # ------------------------------------------------------------------