Instructions to use DavidSeyserHF/rex1-base with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use DavidSeyserHF/rex1-base with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="DavidSeyserHF/rex1-base", trust_remote_code=True)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("DavidSeyserHF/rex1-base", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use DavidSeyserHF/rex1-base with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "DavidSeyserHF/rex1-base" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DavidSeyserHF/rex1-base", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/DavidSeyserHF/rex1-base
- SGLang
How to use DavidSeyserHF/rex1-base with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "DavidSeyserHF/rex1-base" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DavidSeyserHF/rex1-base", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "DavidSeyserHF/rex1-base" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DavidSeyserHF/rex1-base", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use DavidSeyserHF/rex1-base with Docker Model Runner:
docker model run hf.co/DavidSeyserHF/rex1-base
| """Hugging Face model wrapper for REX.""" | |
| from __future__ import annotations | |
| from typing import Any | |
| import torch | |
| from torch import nn | |
| from transformers import PreTrainedModel | |
| from transformers.generation import GenerationMixin | |
| from transformers.modeling_outputs import CausalLMOutputWithPast | |
| from configuration_rex import RexConfig | |
| from model import RexConfig as CoreRexConfig | |
| from model import RexForCausalLM as CoreRexForCausalLM | |
| class RexForCausalLM(PreTrainedModel, GenerationMixin): | |
| config_class = RexConfig | |
| base_model_prefix = "rex" | |
| supports_gradient_checkpointing = False | |
| _tied_weights_keys = ["rex.lm_head.weight"] | |
| def __init__(self, config: RexConfig): | |
| super().__init__(config) | |
| self.rex = CoreRexForCausalLM(CoreRexConfig.from_dict(config.to_core_dict())) | |
| def get_input_embeddings(self) -> nn.Module: | |
| return self.rex.token_embedding | |
| def set_input_embeddings(self, value: nn.Module) -> None: | |
| self.rex.token_embedding = value | |
| if self.rex.cfg.tie_embeddings: | |
| self.rex.lm_head.weight = self.rex.token_embedding.weight | |
| def get_output_embeddings(self) -> nn.Module: | |
| return self.rex.lm_head | |
| def set_output_embeddings(self, new_embeddings: nn.Module) -> None: | |
| self.rex.lm_head = new_embeddings | |
| def prepare_inputs_for_generation(self, input_ids: torch.Tensor, **kwargs: Any) -> dict[str, torch.Tensor]: | |
| return {"input_ids": input_ids[:, -self.config.max_seq_len :]} | |
| def forward( | |
| self, | |
| input_ids: torch.Tensor, | |
| attention_mask: torch.Tensor | None = None, | |
| labels: torch.Tensor | None = None, | |
| past_key_values: Any | None = None, | |
| use_cache: bool | None = None, | |
| **_: Any, | |
| ) -> CausalLMOutputWithPast: | |
| out = self.rex(input_ids=input_ids, labels=labels) | |
| return CausalLMOutputWithPast(loss=out["loss"], logits=out["logits"]) | |