Instructions to use MBZUAI/LaMini-T5-738M with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use MBZUAI/LaMini-T5-738M with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="MBZUAI/LaMini-T5-738M")# Load model directly from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-T5-738M") model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-T5-738M") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use MBZUAI/LaMini-T5-738M with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "MBZUAI/LaMini-T5-738M" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "MBZUAI/LaMini-T5-738M", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/MBZUAI/LaMini-T5-738M
- SGLang
How to use MBZUAI/LaMini-T5-738M with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "MBZUAI/LaMini-T5-738M" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "MBZUAI/LaMini-T5-738M", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "MBZUAI/LaMini-T5-738M" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "MBZUAI/LaMini-T5-738M", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use MBZUAI/LaMini-T5-738M with Docker Model Runner:
docker model run hf.co/MBZUAI/LaMini-T5-738M
getting error when running
Entering new RetrievalQA chain...
Token indices sequence length is longer than the specified maximum sequence length for this model (547 > 512). Running this sequence through the model will result in indexing errors
Traceback (most recent call last):
File "C:\Users\ramkanch\Downloads\Newfolder\custom_prompt.py", line 73, in
response = qa.invoke(query)
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain\chains\base.py", line 163, in invoke
raise e
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain\chains\base.py", line 153, in invoke
self._call(inputs, run_manager=run_manager)
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain\chains\retrieval_qa\base.py", line 144, in _call
answer = self.combine_documents_chain.run(
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain_core_api\deprecation.py", line 145, in warning_emitting_wrapper
return wrapped(*args, **kwargs)
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain\chains\base.py", line 550, in run
return self(kwargs, callbacks=callbacks, tags=tags, metadata=metadata)[
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain_core_api\deprecation.py", line 145, in warning_emitting_wrapper
return wrapped(*args, **kwargs)
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain\chains\base.py", line 378, in call
return self.invoke(
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain\chains\base.py", line 163, in invoke
raise e
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain\chains\base.py", line 153, in invoke
self._call(inputs, run_manager=run_manager)
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain\chains\combine_documents\base.py", line 137, in _call
output, extra_return_dict = self.combine_docs(
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain\chains\combine_documents\stuff.py", line 244, in combine_docs
return self.llm_chain.predict(callbacks=callbacks, **inputs), {}
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain\chains\llm.py", line 293, in predict
return self(kwargs, callbacks=callbacks)[self.output_key]
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain_core_api\deprecation.py", line 145, in warning_emitting_wrapper
return wrapped(*args, **kwargs)
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain\chains\base.py", line 378, in call
return self.invoke(
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain\chains\base.py", line 163, in invoke
raise e
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain\chains\base.py", line 153, in invoke
self._call(inputs, run_manager=run_manager)
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain\chains\llm.py", line 103, in _call
response = self.generate([inputs], run_manager=run_manager)
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain\chains\llm.py", line 115, in generate
return self.llm.generate_prompt(
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain_core\language_models\llms.py", line 541, in generate_prompt
return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain_core\language_models\llms.py", line 714, in generate
output = self._generate_helper(
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain_core\language_models\llms.py", line 578, in _generate_helper
raise e
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain_core\language_models\llms.py", line 565, in _generate_helper
self._generate(
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\langchain_community\llms\huggingface_pipeline.py", line 261, in _generate
responses = self.pipeline(
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\transformers\pipelines\text2text_generation.py", line 167, in call
result = super().call(*args, **kwargs)
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\transformers\pipelines\base.py", line 1177, in call
outputs = list(final_iterator)
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\transformers\pipelines\pt_utils.py", line 124, in next
item = next(self.iterator)
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\transformers\pipelines\pt_utils.py", line 125, in next
processed = self.infer(item, **self.params)
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\transformers\pipelines\base.py", line 1102, in forward
model_outputs = self._forward(model_inputs, **forward_params)
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\transformers\pipelines\text2text_generation.py", line 191, in _forward
output_ids = self.model.generate(**model_inputs, **generate_kwargs)
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\torch\utils_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
self._validate_model_kwargs(model_kwargs.copy())
File "C:\Users\ramkanch\Downloads\Newfolder.venv\lib\site-packages\transformers\generation\utils.py", line 1167, in _validate_model_kwargs
raise ValueError(
ValueError: The following model_kwargs are not used by the model: ['return_full_text'] (note: typos in the generate arguments will also show up in this list)