Spaces:
Sleeping
Sleeping
| # import os | |
| # from transformers import AutoModelForCausalLM, AutoTokenizer | |
| # from peft import PeftModel | |
| # def load_model(): | |
| # hf_token = os.getenv("HF_TOKEN") | |
| # if not hf_token: | |
| # raise RuntimeError("HF_TOKEN not set.") | |
| # # Use a user-writable cache directory (important for Docker non-root) | |
| # HF_CACHE = os.path.expanduser("~/.cache/huggingface") | |
| # os.makedirs(HF_CACHE, exist_ok=True) | |
| # os.environ["TRANSFORMERS_CACHE"] = HF_CACHE | |
| # os.environ["HF_HOME"] = HF_CACHE | |
| # base_model = AutoModelForCausalLM.from_pretrained( | |
| # "meta-llama/Llama-2-7b-chat-hf", | |
| # use_auth_token=hf_token, | |
| # cache_dir="/tmp/hf_cache", | |
| # torch_dtype="auto", | |
| # device_map="auto", | |
| # load_in_8bit=True # <-- Try enabling 8-bit | |
| # ) | |
| # model = PeftModel.from_pretrained( | |
| # base_model, | |
| # "BrainGPT/BrainGPT-7B-v0.1", | |
| # use_auth_token=hf_token, | |
| # cache_dir="/tmp/hf_cache" | |
| # ) | |
| # tokenizer = AutoTokenizer.from_pretrained( | |
| # "meta-llama/Llama-2-7b-chat-hf", | |
| # use_auth_token=hf_token, | |
| # cache_dir="/tmp/hf_cache" | |
| # ) | |
| # return model, tokenizer | |
| ## GPT 2 Model | |
| import os | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| def load_model(): | |
| # Use a user-writable cache directory (important for Docker non-root) | |
| HF_CACHE = os.path.expanduser("~/.cache/huggingface") | |
| os.makedirs(HF_CACHE, exist_ok=True) | |
| os.environ["TRANSFORMERS_CACHE"] = HF_CACHE | |
| os.environ["HF_HOME"] = HF_CACHE | |
| model_name = "gpt2" | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_name, | |
| cache_dir=HF_CACHE | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| cache_dir=HF_CACHE | |
| ) | |
| return model, tokenizer | |