from NatureLM.models import NatureLM from NatureLM.infer import Pipeline import gradio as gr model = NatureLM.from_pretrained("EarthSpeciesProject/NatureLM-audio").eval().to("cuda") pipeline = Pipeline(model=model) def transcribe(audio): results = pipeline([audio], ["Caption the audio using the species name."]) return results[0] gr.Interface(fn=transcribe, inputs=gr.Audio(type="filepath"), outputs="text").launch()