| import transformers |
| from huggingface_hub import snapshot_download,constants |
|
|
| def download_llm_to_cache(model_name, revision="main", cache_dir=None): |
| """ |
| Download an LLM from the Hugging Face Hub to the cache without loading it into memory. |
| |
| Args: |
| model_name (str): The name of the model on Hugging Face Hub (e.g., "meta-llama/Llama-2-7b-hf") |
| revision (str, optional): The specific model version to use. Defaults to "main". |
| cache_dir (str, optional): The cache directory to use. If None, uses the default HF cache directory. |
| |
| Returns: |
| str: Path to the model in cache |
| """ |
| |
| if cache_dir is None: |
| cache_dir = constants.HUGGINGFACE_HUB_CACHE |
| |
| try: |
| |
| cached_path = snapshot_download( |
| repo_id=model_name, |
| revision=revision, |
| cache_dir=cache_dir, |
| local_files_only=False |
| ) |
| |
| print(f"Model '{model_name}' is available in cache at: {cached_path}") |
| return cached_path |
| |
| except Exception as e: |
| print(f"Error downloading model '{model_name}': {e}") |
| return None |
|
|
| def load_model(path,cache_dir=None): |
| model = transformers.AutoModelForCausalLM.from_pretrained(path,cache_dir=cache_dir,device_map='auto',trust_remote_code=False) |
| tokenizer = transformers.AutoTokenizer.from_pretrained(path,cache_dir=cache_dir,device_map='auto',trust_remote_code=False) |
| return model,tokenizer |
|
|
| def llm_run(model,tokenizer,genes,N): |
| generate = transformers.pipeline('text-generation',model=model, tokenizer=tokenizer,device_map='auto') |
| output = [] |
| for i,gene in enumerate(genes): |
| out = generate([gene], min_new_tokens=4, max_new_tokens=4, do_sample=True, num_return_sequences=N) |
| output.append(out[0]) |
| yield output |
| return output |