Spaces:

yakine
/

model

Sleeping

yakine commited on Aug 11, 2024

Commit

faf93aa

verified ·

1 Parent(s): 6a3dc68

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from huggingface_hub import HfFolder
 from io import StringIO
 from tqdm import tqdm
 import accelerate
-from accelerate import init_empty_weights, infer_auto_device_map, dispatch_model
 # Access the Hugging Face API token from environment variables
 hf_token = os.getenv('HF_API_TOKEN')
@@ -29,12 +29,14 @@ text_generator = pipeline("text-generation", model=model_gpt2, tokenizer=tokeniz
 # Load the Llama-3 model and tokenizer once during startup
 tokenizer_llama = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B", token=hf_token)
-model_llama = AutoModelForCausalLM.from_pretrained(
-    "meta-llama/Meta-Llama-3-8B",
-    torch_dtype='auto',
-    device_map='auto',
-    token=hf_token
-)
 # Define your prompt template
 prompt_template = """\

 from io import StringIO
 from tqdm import tqdm
 import accelerate
+from accelerate import init_empty_weights, infer_auto_device_map, dispatch_model, disk_offload
 # Access the Hugging Face API token from environment variables
 hf_token = os.getenv('HF_API_TOKEN')
 # Load the Llama-3 model and tokenizer once during startup
 tokenizer_llama = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B", token=hf_token)
+with init_empty_weights():
+    model_llama = AutoModelForCausalLM.from_pretrained(
+        "meta-llama/Meta-Llama-3-8B",
+        torch_dtype='auto',
+        device_map='auto',  # This can still be used for initial placement
+        token=hf_token
+    )
+disk_offload(model_llama)  # Offload the model to disk
 # Define your prompt template
 prompt_template = """\