Spaces:

ArthurLin
/

resume_recommendation_LLM

Sleeping

ArthurLin commited on Aug 19, 2025

Commit

f5dd377

verified ·

1 Parent(s): 8ee592e

Update model.py

Files changed (1) hide show

model.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import torch
-from transformers import pipeline
 import os
 hf_token = os.getenv("LLM_token")
 os.environ["HUGGINGFACE_HUB_TOKEN"] = hf_token
 def load_model(model_path="meta-llama/Meta-Llama-3-8B-Instruct"):
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -12,6 +14,7 @@ def load_model(model_path="meta-llama/Meta-Llama-3-8B-Instruct"):
         "text-generation",
         model=model_path,
         model_kwargs={"torch_dtype": torch.float16} if torch.cuda.is_available() else {},
         device=device,
         token=hf_token
     )

 import torch
+from transformers import pipeline, BitsAndBytesConfig
 import os
 hf_token = os.getenv("LLM_token")
 os.environ["HUGGINGFACE_HUB_TOKEN"] = hf_token
+bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4")
 def load_model(model_path="meta-llama/Meta-Llama-3-8B-Instruct"):
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         "text-generation",
         model=model_path,
         model_kwargs={"torch_dtype": torch.float16} if torch.cuda.is_available() else {},
+        quantization_config=bnb_config,
         device=device,
         token=hf_token
     )