Spaces:
Paused
Paused
File size: 2,555 Bytes
71a6b61 3b8caa9 78e1c98 71a6b61 78e1c98 4c682ef 416998b 71a6b61 78e1c98 6ea33ca 78e1c98 6ea33ca 78e1c98 d373eae 4871a87 78e1c98 d44138a 3b8caa9 4c682ef 4871a87 78e1c98 3b8caa9 78e1c98 d44138a 4c682ef 78e1c98 d44138a 4c682ef 78e1c98 df2b6f5 4c682ef 78e1c98 4871a87 4c682ef 71a6b61 78e1c98 d44138a 78e1c98 71a6b61 d44138a 78e1c98 3b8caa9 6ea33ca 78e1c98 6ea33ca d44138a 78e1c98 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | import torch
import gradio as gr
import requests
import tempfile
import librosa
from transformers import ClapModel, ClapProcessor
# Load official Hugging Face CLAP model and processor
processor = ClapProcessor.from_pretrained("laion/clap-htsat-unfused")
model = ClapModel.from_pretrained("laion/clap-htsat-unfused")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()
# Function to preprocess and classify audio
def classify_audio(audio, sr=48000):
inputs = processor(audios=audio, sampling_rate=sr, return_tensors="pt", padding=True)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
embeddings = model.get_audio_features(**inputs)
return embeddings.cpu().numpy().shape
# πΌ Classify uploaded audio
def classify_upload(audio_path):
try:
audio, sr = librosa.load(audio_path, sr=48000, mono=True)
shape = classify_audio(audio, sr)
return f"β
Upload Successful β Embedding Shape: {shape}"
except Exception as e:
return f"β Upload Error: {str(e)}"
# π Classify audio via URL
def classify_url(audio_url):
try:
response = requests.get(audio_url, timeout=30)
response.raise_for_status()
file_ext = audio_url.split('.')[-1].lower()
if file_ext not in ['wav', 'mp3', 'ogg']:
return f"β Unsupported format: .{file_ext}"
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_ext}") as tmp:
tmp.write(response.content)
tmp_path = tmp.name
audio, sr = librosa.load(tmp_path, sr=48000, mono=True)
shape = classify_audio(audio, sr)
return f"β
URL Classified β Embedding Shape: {shape}"
except requests.exceptions.Timeout:
return "β Error: Request timed out"
except Exception as e:
return f"β URL Error: {str(e)}"
# Gradio interfaces
upload_ui = gr.Interface(
classify_upload, gr.Audio(type="filepath"), "text",
title="Audtheia CLAP Audio Agent (Upload)",
description="Upload audio (.wav/.mp3) to generate CLAP embeddings using official LAION-CLAP."
)
url_ui = gr.Interface(
classify_url, "text", "text",
title="Audtheia CLAP Audio Agent (URL)",
description="Classify audio from direct URLs (.wav/.mp3/.ogg) using LAION-CLAP."
)
app = gr.TabbedInterface(
[upload_ui, url_ui],
["Upload Audio", "HTTP Audio URL"],
title="π°οΈ Audtheia Multimodal CLAP Agent"
)
# Corrected Gradio queue configuration
app.queue(max_size=10).launch()
|