# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("mlx-community/quantized-gemma-2b")
model = AutoModelForCausalLM.from_pretrained("mlx-community/quantized-gemma-2b")Quick Links
mlx-community/quantized-gemma-2b
This model was converted to MLX format from google/gemma-2b.
Refer to the original model card for more details on the model.
Use with mlx
pip install mlx-lm
from mlx_lm import load, generate
model, tokenizer = load("mlx-community/quantized-gemma-2b")
response = generate(model, tokenizer, prompt="hello", verbose=True)
- Downloads last month
- 124
Hardware compatibility
Log In to add your hardware
Quantized
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="mlx-community/quantized-gemma-2b")