import gradio as gr import torch import torchaudio import os import numpy as np from liquid_audio import LFM2AudioModel, LFM2AudioProcessor # --- CONFIGURATION --- MODEL_ID = "LiquidAI/LFM2.5-Audio-1.5B" device = "cuda" if torch.cuda.is_available() else "cpu" print(f"⏳ Loading model: {MODEL_ID} on {device}...") try: processor = LFM2AudioProcessor.from_pretrained(MODEL_ID) model = LFM2AudioModel.from_pretrained(MODEL_ID).to(device) print("✅ Model loaded successfully!") except Exception as e: print(f"❌ Error loading model: {e}") model = None def process_audio(input_audio_path): if model is None: return None, "Error: Model not loaded." try: # 1. Load and process the audio file # Gradio passes audio as a filepath string waveform, sample_rate = torchaudio.load(input_audio_path) # Resample if necessary (Model typically expects 16kHz) if sample_rate != 16000: resampler = torchaudio.transforms.Resample(sample_rate, 16000) waveform = resampler(waveform) sample_rate = 16000 # 2. Prepare inputs for the model inputs = processor( audio=waveform, sampling_rate=sample_rate, return_tensors="pt" ).to(device) # 3. Generate response (Interleaved Audio-to-Audio) # The generate parameters might need tuning based on specific version with torch.no_grad(): generated_ids = model.generate( **inputs, max_new_tokens=256, # Adjust length as needed do_sample=True, temperature=0.7 ) # 4. Decode the output to audio # The processor handles converting tokens back to waveform output_waveform = processor.batch_decode(generated_ids)[0] # Save to a temporary file to return to Gradio output_path = "output_response.wav" torchaudio.save(output_path, torch.tensor(output_waveform), 16000) return output_path except Exception as e: return None, f"Error during inference: {str(e)}" # --- GRADIO INTERFACE --- with gr.Blocks(title="Liquid LFM2.5 Audio") as demo: gr.Markdown("# 💧 LiquidAI LFM2.5 Audio (Speech-to-Speech)") with gr.Row(): input_audio = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Input Speech") output_audio = gr.Audio(type="filepath", label="Response", autoplay=True) submit_btn = gr.Button("Generate Response", variant="primary") submit_btn.click( fn=process_audio, inputs=[input_audio], outputs=[output_audio] ) if __name__ == "__main__": demo.launch()