Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import os | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| DEFAULT_GENERATION_MODEL = "CohereLabs/tiny-aya-water" | |
| def load_generation_pipeline(model_name: str = DEFAULT_GENERATION_MODEL): | |
| # Check if we should force offline mode via environment variables | |
| offline = os.environ.get("TRANSFORMERS_OFFLINE", "0") == "1" or os.environ.get("HF_HUB_OFFLINE", "0") == "1" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, local_files_only=offline) | |
| dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float32 | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| trust_remote_code=True, | |
| torch_dtype=dtype, | |
| device_map="auto", | |
| low_cpu_mem_usage=True, | |
| local_files_only=offline, | |
| ) | |
| return pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| max_new_tokens=256, | |
| do_sample=False, | |
| temperature=0.0, | |
| return_full_text=False, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |