Spaces:
Build error
Build error
| import gradio as gr | |
| import torch | |
| import torchaudio | |
| import os | |
| import numpy as np | |
| from liquid_audio import LFM2AudioModel, LFM2AudioProcessor | |
| # --- CONFIGURATION --- | |
| MODEL_ID = "LiquidAI/LFM2.5-Audio-1.5B" | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"⏳ Loading model: {MODEL_ID} on {device}...") | |
| try: | |
| processor = LFM2AudioProcessor.from_pretrained(MODEL_ID) | |
| model = LFM2AudioModel.from_pretrained(MODEL_ID).to(device) | |
| print("✅ Model loaded successfully!") | |
| except Exception as e: | |
| print(f"❌ Error loading model: {e}") | |
| model = None | |
| def process_audio(input_audio_path): | |
| if model is None: | |
| return None, "Error: Model not loaded." | |
| try: | |
| # 1. Load and process the audio file | |
| # Gradio passes audio as a filepath string | |
| waveform, sample_rate = torchaudio.load(input_audio_path) | |
| # Resample if necessary (Model typically expects 16kHz) | |
| if sample_rate != 16000: | |
| resampler = torchaudio.transforms.Resample(sample_rate, 16000) | |
| waveform = resampler(waveform) | |
| sample_rate = 16000 | |
| # 2. Prepare inputs for the model | |
| inputs = processor( | |
| audio=waveform, | |
| sampling_rate=sample_rate, | |
| return_tensors="pt" | |
| ).to(device) | |
| # 3. Generate response (Interleaved Audio-to-Audio) | |
| # The generate parameters might need tuning based on specific version | |
| with torch.no_grad(): | |
| generated_ids = model.generate( | |
| **inputs, | |
| max_new_tokens=256, # Adjust length as needed | |
| do_sample=True, | |
| temperature=0.7 | |
| ) | |
| # 4. Decode the output to audio | |
| # The processor handles converting tokens back to waveform | |
| output_waveform = processor.batch_decode(generated_ids)[0] | |
| # Save to a temporary file to return to Gradio | |
| output_path = "output_response.wav" | |
| torchaudio.save(output_path, torch.tensor(output_waveform), 16000) | |
| return output_path | |
| except Exception as e: | |
| return None, f"Error during inference: {str(e)}" | |
| # --- GRADIO INTERFACE --- | |
| with gr.Blocks(title="Liquid LFM2.5 Audio") as demo: | |
| gr.Markdown("# 💧 LiquidAI LFM2.5 Audio (Speech-to-Speech)") | |
| with gr.Row(): | |
| input_audio = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Input Speech") | |
| output_audio = gr.Audio(type="filepath", label="Response", autoplay=True) | |
| submit_btn = gr.Button("Generate Response", variant="primary") | |
| submit_btn.click( | |
| fn=process_audio, | |
| inputs=[input_audio], | |
| outputs=[output_audio] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |