Instructions to use facebook/opt-350m with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use facebook/opt-350m with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="facebook/opt-350m")# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m") model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use facebook/opt-350m with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "facebook/opt-350m" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "facebook/opt-350m", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/facebook/opt-350m
- SGLang
How to use facebook/opt-350m with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "facebook/opt-350m" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "facebook/opt-350m", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "facebook/opt-350m" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "facebook/opt-350m", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use facebook/opt-350m with Docker Model Runner:
docker model run hf.co/facebook/opt-350m
Pretraining error
#37
by saivineetha - opened
I was pre-training the facebook opt 350m model on text file. While training after 15 steps the training loss is becoming zero. Can you help me with this.
Code I used
if script_args.load_in_kbits in [4, 8]:
load_in_4bit = script_args.load_in_kbits == 4
load_in_8bit = script_args.load_in_kbits == 8
if script_args.modules_to_save is not None:
load_in_8bit_skip_modules = script_args.modules_to_save.split(",")
else:
load_in_8bit_skip_modules = None
quantization_config = BitsAndBytesConfig(
load_in_4bit=script_args.load_in_kbits == 4,
load_in_8bit=script_args.load_in_kbits == 8,
llm_int8_threshold=6.0,
load_in_8bit_skip_modules=load_in_8bit_skip_modules,
bnb_4bit_compute_dtype=compute_dtype,
bnb_4bit_use_double_quant=script_args.double_quant,
bnb_4bit_quant_type=script_args.quant_type, # {'fp4', 'nf4'}
)
else:
load_in_4bit = False
load_in_8bit = False
quantization_config = None
if quantization_config is not None:
logger.info(f"quantization_config:{quantization_config.to_dict()}")
if script_args.model_name_or_path:
torch_dtype = (
script_args.torch_dtype
if script_args.torch_dtype in ["auto", None]
else getattr(torch, script_args.torch_dtype)
)
device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
model = AutoModelForCausalLM.from_pretrained(
script_args.model_name_or_path,
from_tf=bool(".ckpt" in script_args.model_name_or_path),
config=config,
# cache_dir=script_args.cache_dir,
# revision=model_args.model_revision,
use_auth_token=True if script_args.use_auth_token else None,
torch_dtype=torch_dtype,
low_cpu_mem_usage=True,
device_map=device_map,
load_in_4bit=load_in_4bit,
load_in_8bit=load_in_8bit,
quantization_config=quantization_config,
)
else:
model = AutoModelForCausalLM.from_config(config)
n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
logger.info(
f"Training new model from scratch - Total size={n_params/2**20:.2f}M params"
)
if script_args.load_in_kbits in [4, 8]:
model = prepare_model_for_kbit_training(
model, use_gradient_checkpointing=script_args.gradient_checkpointing
)
model.config.use_cache = False
model_vocab_size = model.get_output_embeddings().weight.size(0)
tokenizer_vocab_size = len(tokenizer)
logger.info(f"Model vocab size: {model_vocab_size}")
logger.info(f"Tokenizer vocab size: {tokenizer_vocab_size}")
if model_vocab_size != tokenizer_vocab_size:
logger.info(f"Resize model vocab size to {tokenizer_vocab_size}")
model.resize_token_embeddings(len(tokenizer))
if script_args.peft_path is not None:
logger.info("Peft from pre-trained model")
model = PeftModel.from_pretrained(
model, script_args.peft_path, device_map=device_map
)
else:
logger.info("Init new peft model")
target_modules = script_args.trainable.split(",")
modules_to_save = script_args.modules_to_save
if modules_to_save is not None:
modules_to_save = modules_to_save.split(",")
lora_rank = script_args.lora_rank
lora_dropout = script_args.lora_dropout
lora_alpha = script_args.lora_alpha
logger.info(f"target_modules: {target_modules}")
logger.info(f"lora_rank: {lora_rank}")
peft_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
target_modules=target_modules,
inference_mode=False,
r=lora_rank,
lora_alpha=lora_alpha,
lora_dropout=lora_dropout,
modules_to_save=modules_to_save,
)
model = get_peft_model(model, peft_config)
for name, module in model.named_modules():
if isinstance(module, LoraLayer):
module = module.to(torch.float16)
# if script_args.bf16:
# module = module.to(torch.bfloat16)
# if script_args.fp16:
# module = module.to(torch.float16)
if "norm" in name:
module = module.to(torch.float16)
if "lm_head" in name or "embed_tokens" in name:
if hasattr(module, "weight"):
module = module.to(torch.float16)
# if script_args.bf16 and module.weight.dtype == torch.float32:
# module = module.to(torch.bfloat16)
# if script_args.fp16 and module.weight.dtype == torch.float32:
# module = module.to(torch.float16)
model.print_trainable_parameters()
logger.info(f"model.modules_to_save: {model.modules_to_save}")
old_state_dict = model.state_dict
model.state_dict = (
lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())
).__get__(model, type(model))
