Audtheia-CLAP / app.py
Kaworu17's picture
Update app.py
78e1c98 verified
import torch
import gradio as gr
import requests
import tempfile
import librosa
from transformers import ClapModel, ClapProcessor
# Load official Hugging Face CLAP model and processor
processor = ClapProcessor.from_pretrained("laion/clap-htsat-unfused")
model = ClapModel.from_pretrained("laion/clap-htsat-unfused")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()
# Function to preprocess and classify audio
def classify_audio(audio, sr=48000):
inputs = processor(audios=audio, sampling_rate=sr, return_tensors="pt", padding=True)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
embeddings = model.get_audio_features(**inputs)
return embeddings.cpu().numpy().shape
# πŸ”Ό Classify uploaded audio
def classify_upload(audio_path):
try:
audio, sr = librosa.load(audio_path, sr=48000, mono=True)
shape = classify_audio(audio, sr)
return f"βœ… Upload Successful β€” Embedding Shape: {shape}"
except Exception as e:
return f"❌ Upload Error: {str(e)}"
# 🌐 Classify audio via URL
def classify_url(audio_url):
try:
response = requests.get(audio_url, timeout=30)
response.raise_for_status()
file_ext = audio_url.split('.')[-1].lower()
if file_ext not in ['wav', 'mp3', 'ogg']:
return f"❌ Unsupported format: .{file_ext}"
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_ext}") as tmp:
tmp.write(response.content)
tmp_path = tmp.name
audio, sr = librosa.load(tmp_path, sr=48000, mono=True)
shape = classify_audio(audio, sr)
return f"βœ… URL Classified β€” Embedding Shape: {shape}"
except requests.exceptions.Timeout:
return "❌ Error: Request timed out"
except Exception as e:
return f"❌ URL Error: {str(e)}"
# Gradio interfaces
upload_ui = gr.Interface(
classify_upload, gr.Audio(type="filepath"), "text",
title="Audtheia CLAP Audio Agent (Upload)",
description="Upload audio (.wav/.mp3) to generate CLAP embeddings using official LAION-CLAP."
)
url_ui = gr.Interface(
classify_url, "text", "text",
title="Audtheia CLAP Audio Agent (URL)",
description="Classify audio from direct URLs (.wav/.mp3/.ogg) using LAION-CLAP."
)
app = gr.TabbedInterface(
[upload_ui, url_ui],
["Upload Audio", "HTTP Audio URL"],
title="πŸ›°οΈ Audtheia Multimodal CLAP Agent"
)
# Corrected Gradio queue configuration
app.queue(max_size=10).launch()