import gradio as gr from transformers import pipeline import os import torch # --- Performance Improvement --- # Configure PyTorch for CPU performance num_cpu_cores = os.cpu_count() or 1 # Default to 1 if os.cpu_count() is None torch.set_num_threads(num_cpu_cores) print(f"✅ PyTorch is configured to use {num_cpu_cores} CPU cores.") # --- Model and Pipeline --- # Initialize the pipeline. It will default to the CPU. # Using a specific revision for reproducibility pipe = pipeline( "audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593" ) # --- Core Logic Function --- def classify_audio(audio): """ Classifies the audio, takes the top 3 predictions, and formats them into a single, human-readable string. """ if audio is None: return "Please upload an audio file first." result = pipe(audio) return {label['label']: label['score'] for label in result} # --- Gradio Interface --- # Create the Gradio app interface app = gr.Interface( fn=classify_audio, inputs=gr.Audio(type="filepath", label="Upload Audio File"), outputs=gr.Label(num_top_classes=3), # This will now receive a simple string title="Audio Classification with MIT/AST", description=( "Upload an audio file to classify it. The model will identify the top 3 most likely sound categories. " ), cache_examples=False, ) # --- App Launch --- # Launch the app with sharing enabled for Hugging Face Spaces if __name__ == "__main__": app.launch(share=True)