Spaces:
Paused
Paused
| import torch | |
| import gradio as gr | |
| import requests | |
| import tempfile | |
| import librosa | |
| from transformers import ClapModel, ClapProcessor | |
| # Load official Hugging Face CLAP model and processor | |
| processor = ClapProcessor.from_pretrained("laion/clap-htsat-unfused") | |
| model = ClapModel.from_pretrained("laion/clap-htsat-unfused") | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model.to(device).eval() | |
| # Function to preprocess and classify audio | |
| def classify_audio(audio, sr=48000): | |
| inputs = processor(audios=audio, sampling_rate=sr, return_tensors="pt", padding=True) | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| embeddings = model.get_audio_features(**inputs) | |
| return embeddings.cpu().numpy().shape | |
| # πΌ Classify uploaded audio | |
| def classify_upload(audio_path): | |
| try: | |
| audio, sr = librosa.load(audio_path, sr=48000, mono=True) | |
| shape = classify_audio(audio, sr) | |
| return f"β Upload Successful β Embedding Shape: {shape}" | |
| except Exception as e: | |
| return f"β Upload Error: {str(e)}" | |
| # π Classify audio via URL | |
| def classify_url(audio_url): | |
| try: | |
| response = requests.get(audio_url, timeout=30) | |
| response.raise_for_status() | |
| file_ext = audio_url.split('.')[-1].lower() | |
| if file_ext not in ['wav', 'mp3', 'ogg']: | |
| return f"β Unsupported format: .{file_ext}" | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_ext}") as tmp: | |
| tmp.write(response.content) | |
| tmp_path = tmp.name | |
| audio, sr = librosa.load(tmp_path, sr=48000, mono=True) | |
| shape = classify_audio(audio, sr) | |
| return f"β URL Classified β Embedding Shape: {shape}" | |
| except requests.exceptions.Timeout: | |
| return "β Error: Request timed out" | |
| except Exception as e: | |
| return f"β URL Error: {str(e)}" | |
| # Gradio interfaces | |
| upload_ui = gr.Interface( | |
| classify_upload, gr.Audio(type="filepath"), "text", | |
| title="Audtheia CLAP Audio Agent (Upload)", | |
| description="Upload audio (.wav/.mp3) to generate CLAP embeddings using official LAION-CLAP." | |
| ) | |
| url_ui = gr.Interface( | |
| classify_url, "text", "text", | |
| title="Audtheia CLAP Audio Agent (URL)", | |
| description="Classify audio from direct URLs (.wav/.mp3/.ogg) using LAION-CLAP." | |
| ) | |
| app = gr.TabbedInterface( | |
| [upload_ui, url_ui], | |
| ["Upload Audio", "HTTP Audio URL"], | |
| title="π°οΈ Audtheia Multimodal CLAP Agent" | |
| ) | |
| # Corrected Gradio queue configuration | |
| app.queue(max_size=10).launch() | |