audio_classification_regular

Sleeping

File size: 1,522 Bytes

import gradio as gr
from transformers import pipeline
import os
import torch

# --- Performance Improvement ---
# Configure PyTorch for CPU performance
num_cpu_cores = os.cpu_count() or 1 # Default to 1 if os.cpu_count() is None
torch.set_num_threads(num_cpu_cores)
print(f"✅ PyTorch is configured to use {num_cpu_cores} CPU cores.")


# --- Model and Pipeline ---
# Initialize the pipeline. It will default to the CPU.
# Using a specific revision for reproducibility
pipe = pipeline(
    "audio-classification",
    model="MIT/ast-finetuned-audioset-10-10-0.4593"
)


# --- Core Logic Function ---
def classify_audio(audio):
    """
    Classifies the audio, takes the top 3 predictions,
    and formats them into a single, human-readable string.
    """
    if audio is None:
        return "Please upload an audio file first."

    result = pipe(audio)
    return {label['label']: label['score'] for label in result}

# --- Gradio Interface ---
# Create the Gradio app interface
app = gr.Interface(
    fn=classify_audio,
    inputs=gr.Audio(type="filepath", label="Upload Audio File"),
    outputs=gr.Label(num_top_classes=3), # This will now receive a simple string
    title="Audio Classification with MIT/AST",
    description=(
        "Upload an audio file to classify it. The model will identify the top 3 most likely sound categories. "
    ),
    cache_examples=False,
)

# --- App Launch ---
# Launch the app with sharing enabled for Hugging Face Spaces
if __name__ == "__main__":
    app.launch(share=True)