Spaces:

yakine
/

model

Sleeping

yakine commited on Aug 11, 2024

Commit

7b590be

verified ·

1 Parent(s): faf93aa

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -29,14 +29,21 @@ text_generator = pipeline("text-generation", model=model_gpt2, tokenizer=tokeniz
 # Load the Llama-3 model and tokenizer once during startup
 tokenizer_llama = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B", token=hf_token)
 with init_empty_weights():
     model_llama = AutoModelForCausalLM.from_pretrained(
         "meta-llama/Meta-Llama-3-8B",
         torch_dtype='auto',
-        device_map='auto',  # This can still be used for initial placement
         token=hf_token
     )
-disk_offload(model_llama)  # Offload the model to disk
 # Define your prompt template
 prompt_template = """\

 # Load the Llama-3 model and tokenizer once during startup
 tokenizer_llama = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B", token=hf_token)
+# Define the offload directory
+offload_dir = "./offload"
+os.makedirs(offload_dir, exist_ok=True)  # Create the directory if it doesn't exist
+# Load the Llama model with disk offloading
 with init_empty_weights():
     model_llama = AutoModelForCausalLM.from_pretrained(
         "meta-llama/Meta-Llama-3-8B",
         torch_dtype='auto',
+        device_map='auto',
         token=hf_token
     )
+# Offload the model to the specified directory
+disk_offload(model_llama, offload_dir)  # Pass the offload directory
 # Define your prompt template
 prompt_template = """\