yakine commited on
Commit
faf93aa
·
verified ·
1 Parent(s): 6a3dc68

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -8,7 +8,7 @@ from huggingface_hub import HfFolder
8
  from io import StringIO
9
  from tqdm import tqdm
10
  import accelerate
11
- from accelerate import init_empty_weights, infer_auto_device_map, dispatch_model
12
 
13
  # Access the Hugging Face API token from environment variables
14
  hf_token = os.getenv('HF_API_TOKEN')
@@ -29,12 +29,14 @@ text_generator = pipeline("text-generation", model=model_gpt2, tokenizer=tokeniz
29
 
30
  # Load the Llama-3 model and tokenizer once during startup
31
  tokenizer_llama = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B", token=hf_token)
32
- model_llama = AutoModelForCausalLM.from_pretrained(
33
- "meta-llama/Meta-Llama-3-8B",
34
- torch_dtype='auto',
35
- device_map='auto',
36
- token=hf_token
37
- )
 
 
38
 
39
  # Define your prompt template
40
  prompt_template = """\
 
8
  from io import StringIO
9
  from tqdm import tqdm
10
  import accelerate
11
+ from accelerate import init_empty_weights, infer_auto_device_map, dispatch_model, disk_offload
12
 
13
  # Access the Hugging Face API token from environment variables
14
  hf_token = os.getenv('HF_API_TOKEN')
 
29
 
30
  # Load the Llama-3 model and tokenizer once during startup
31
  tokenizer_llama = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B", token=hf_token)
32
+ with init_empty_weights():
33
+ model_llama = AutoModelForCausalLM.from_pretrained(
34
+ "meta-llama/Meta-Llama-3-8B",
35
+ torch_dtype='auto',
36
+ device_map='auto', # This can still be used for initial placement
37
+ token=hf_token
38
+ )
39
+ disk_offload(model_llama) # Offload the model to disk
40
 
41
  # Define your prompt template
42
  prompt_template = """\