Automatic Speech Recognition
Transformers
Safetensors
phi4mm
text-generation
nlp
code
audio
speech-summarization
speech-translation
visual-question-answering
phi-4-multimodal
phi
phi-4-mini
custom_code
Eval Results
Instructions to use microsoft/Phi-4-multimodal-instruct with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use microsoft/Phi-4-multimodal-instruct with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("automatic-speech-recognition", model="microsoft/Phi-4-multimodal-instruct", trust_remote_code=True)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-4-multimodal-instruct", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| {"add_prefix_space": false, "added_tokens_decoder": {"199999": {"content": "<|endoftext|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "200010": {"content": "<|image|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "200011": {"content": "<|audio|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "200018": {"content": "<|endofprompt|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "200019": {"content": "<|assistant|>", "lstrip": false, "normalized": false, "rstrip": true, "single_word": false, "special": true}, "200020": {"content": "<|end|>", "lstrip": false, "normalized": false, "rstrip": true, "single_word": false, "special": true}, "200021": {"content": "<|user|>", "lstrip": false, "normalized": false, "rstrip": true, "single_word": false, "special": true}, "200022": {"content": "<|system|>", "lstrip": false, "normalized": false, "rstrip": true, "single_word": false, "special": true}, "200023": {"content": "<|tool|>", "lstrip": false, "normalized": false, "rstrip": true, "single_word": false, "special": false}, "200024": {"content": "<|/tool|>", "lstrip": false, "normalized": false, "rstrip": true, "single_word": false, "special": false}, "200025": {"content": "<|tool_call|>", "lstrip": false, "normalized": false, "rstrip": true, "single_word": false, "special": false}, "200026": {"content": "<|/tool_call|>", "lstrip": false, "normalized": false, "rstrip": true, "single_word": false, "special": false}, "200027": {"content": "<|tool_response|>", "lstrip": false, "normalized": false, "rstrip": true, "single_word": false, "special": false}, "200028": {"content": "<|tag|>", "lstrip": false, "normalized": false, "rstrip": true, "single_word": false, "special": true}}, "audio_token": "<|audio|>", "bos_token": "<|endoftext|>", "clean_up_tokenization_spaces": false, "eos_token": "<|endoftext|>", "extra_special_tokens": {"audio_token": "<|audio|>", "image_token": "<|image|>"}, "image_token": "<|image|>", "model_max_length": 131072, "pad_token": "<|endoftext|>", "processor_class": "Phi4MultimodalProcessor", "tokenizer_class": "GPT2Tokenizer", "unk_token": "<|endoftext|>"} |