| |
|
| | |
| | |
| |
|
| | |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | import gradio as gr |
| | from dcase24t6.nn.hub import baseline_pipeline |
| | import librosa |
| | import torch |
| |
|
| | model = baseline_pipeline() |
| |
|
| | def dcase_inference(mic=None, file=None): |
| |
|
| | if mic is not None: |
| | audio = mic |
| | sr = 48000 |
| | gr.Info(f"sr 1: {sr}") |
| | elif file is not None: |
| | gr.Info(f"file 1: {file}") |
| | audio, sr = librosa.load(file, sr=None) |
| | audio = torch.from_numpy(audio) |
| | gr.Info(f"file 1: {sr}") |
| | else: |
| | return "You must either provide a mic recording or a file" |
| |
|
| | |
| | item = {"audio": audio, "sr": sr} |
| | outputs = model(item) |
| | candidate = outputs["candidates"][0] |
| |
|
| | return candidate |
| |
|
| | def create_app(): |
| |
|
| | with gr.Blocks() as demo: |
| | gr.Markdown( |
| | """ |
| | # DCASE demo for automatic audio captioning |
| | """ |
| | ) |
| | gr.Interface( |
| | fn=dcase_inference, |
| | inputs=[ |
| | gr.Audio(sources="microphone", type="filepath"), |
| | gr.Audio(sources="upload", type="filepath"), |
| | ], |
| | outputs="text", |
| | ) |
| |
|
| | return demo |
| |
|
| | def main(): |
| | |
| | app = create_app() |
| | app.launch(debug=True) |
| |
|
| | |
| | if __name__ == "__main__": |
| | main() |
| |
|
| |
|