Spaces:

optiviseapp
/

fnmodel

Paused

aeb56 commited on 30 days ago

Commit

96b6724

1 Parent(s): 3fb1215

Fix multi-GPU: use parallelize=True instead of device_map, update env var

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import time
 # Set environment variables for flash-linear-attention and memory management
 os.environ["FLA_USE_TRITON"] = "1"
-os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 # Model configuration
 MODEL_NAME = "optiviseapp/kimi-linear-48b-a3b-instruct-fine-tune"
@@ -179,10 +179,11 @@ class ChatBot:
             yield f"✅ **Memory cleared! Starting evaluation...**\n\nThis will take 30-60 minutes total.\n\n"
             # Run lm_eval with optimized memory settings
             cmd = [
                 "lm_eval",
                 "--model", "hf",
-                "--model_args", f"pretrained={MODEL_NAME},trust_remote_code=True,dtype=bfloat16,device_map=auto,low_cpu_mem_usage=True",
                 "--tasks", task_string,
                 "--batch_size", "1",  # Reduced to minimize memory usage
                 "--output_path", output_dir,

 # Set environment variables for flash-linear-attention and memory management
 os.environ["FLA_USE_TRITON"] = "1"
+os.environ["PYTORCH_ALLOC_CONF"] = "expandable_segments:True"  # Updated from PYTORCH_CUDA_ALLOC_CONF
 # Model configuration
 MODEL_NAME = "optiviseapp/kimi-linear-48b-a3b-instruct-fine-tune"
             yield f"✅ **Memory cleared! Starting evaluation...**\n\nThis will take 30-60 minutes total.\n\n"
             # Run lm_eval with optimized memory settings
+            # Note: We use parallelize=True to distribute across GPUs instead of device_map in model_args
             cmd = [
                 "lm_eval",
                 "--model", "hf",
+                "--model_args", f"pretrained={MODEL_NAME},trust_remote_code=True,dtype=bfloat16,low_cpu_mem_usage=True,parallelize=True",
                 "--tasks", task_string,
                 "--batch_size", "1",  # Reduced to minimize memory usage
                 "--output_path", output_dir,