import torch import gradio as gr import requests import tempfile import librosa from transformers import ClapModel, ClapProcessor # Load official Hugging Face CLAP model and processor processor = ClapProcessor.from_pretrained("laion/clap-htsat-unfused") model = ClapModel.from_pretrained("laion/clap-htsat-unfused") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device).eval() # Function to preprocess and classify audio def classify_audio(audio, sr=48000): inputs = processor(audios=audio, sampling_rate=sr, return_tensors="pt", padding=True) inputs = {k: v.to(device) for k, v in inputs.items()} with torch.no_grad(): embeddings = model.get_audio_features(**inputs) return embeddings.cpu().numpy().shape # 🔼 Classify uploaded audio def classify_upload(audio_path): try: audio, sr = librosa.load(audio_path, sr=48000, mono=True) shape = classify_audio(audio, sr) return f"✅ Upload Successful — Embedding Shape: {shape}" except Exception as e: return f"❌ Upload Error: {str(e)}" # 🌐 Classify audio via URL def classify_url(audio_url): try: response = requests.get(audio_url, timeout=30) response.raise_for_status() file_ext = audio_url.split('.')[-1].lower() if file_ext not in ['wav', 'mp3', 'ogg']: return f"❌ Unsupported format: .{file_ext}" with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_ext}") as tmp: tmp.write(response.content) tmp_path = tmp.name audio, sr = librosa.load(tmp_path, sr=48000, mono=True) shape = classify_audio(audio, sr) return f"✅ URL Classified — Embedding Shape: {shape}" except requests.exceptions.Timeout: return "❌ Error: Request timed out" except Exception as e: return f"❌ URL Error: {str(e)}" # Gradio interfaces upload_ui = gr.Interface( classify_upload, gr.Audio(type="filepath"), "text", title="Audtheia CLAP Audio Agent (Upload)", description="Upload audio (.wav/.mp3) to generate CLAP embeddings using official LAION-CLAP." ) url_ui = gr.Interface( classify_url, "text", "text", title="Audtheia CLAP Audio Agent (URL)", description="Classify audio from direct URLs (.wav/.mp3/.ogg) using LAION-CLAP." ) app = gr.TabbedInterface( [upload_ui, url_ui], ["Upload Audio", "HTTP Audio URL"], title="🛰️ Audtheia Multimodal CLAP Agent" ) # Corrected Gradio queue configuration app.queue(max_size=10).launch()