| library_name: transformers | |
| tags: [] | |
| ## How to Get Started with the Model | |
| ```python | |
| import torch | |
| from transformers import AutoModel, AutoProcessor, pipeline | |
| import librosa | |
| from PIL import Image | |
| model_path = "ocisd4/multi-modal-llama-ocis" | |
| processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True, token="hf_tokens") | |
| pipe = pipeline(model=model_path, trust_remote_code=True, processor=processor, device_map='auto') | |
| audio, sr = librosa.load("/path/to/θ«εεηδΈηζ―ι»ζ―εͺ裑.wav", sr=16000) | |
| image = Image.open("/path/to/ε°εεε».jpg") | |
| turns = [ | |
| dict( | |
| role='system', | |
| content = "You are a travel expert who can accurately analyze the attractions in the pictures. All conversations should be conducted in Traditional Chinese.", | |
| ), | |
| dict( | |
| role='user', | |
| content='<|image|><|begin_of_audio|><|audio|><|end_of_audio|>' | |
| ) | |
| ] | |
| y_pred = pipe({'audio': [audio], 'images': [image], 'turns': turns, 'sampling_rate': sr}, max_new_tokens=300) | |
| print(f"{y_pred}") # ιεΌ΅η §ηδΈηζ―ι»ζ―ε°η£ηγε°εεε»γγ... | |
| ``` | |