File size: 1,522 Bytes
1f01380 fefad81 39ec782 f6c5111 5266751 39ec782 5266751 f6c5111 39ec782 f6c5111 39ec782 1f01380 39ec782 f6c5111 5266751 39ec782 1f01380 39ec782 1f01380 5266751 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
from transformers import pipeline
import os
import torch
# --- Performance Improvement ---
# Configure PyTorch for CPU performance
num_cpu_cores = os.cpu_count() or 1 # Default to 1 if os.cpu_count() is None
torch.set_num_threads(num_cpu_cores)
print(f"✅ PyTorch is configured to use {num_cpu_cores} CPU cores.")
# --- Model and Pipeline ---
# Initialize the pipeline. It will default to the CPU.
# Using a specific revision for reproducibility
pipe = pipeline(
"audio-classification",
model="MIT/ast-finetuned-audioset-10-10-0.4593"
)
# --- Core Logic Function ---
def classify_audio(audio):
"""
Classifies the audio, takes the top 3 predictions,
and formats them into a single, human-readable string.
"""
if audio is None:
return "Please upload an audio file first."
result = pipe(audio)
return {label['label']: label['score'] for label in result}
# --- Gradio Interface ---
# Create the Gradio app interface
app = gr.Interface(
fn=classify_audio,
inputs=gr.Audio(type="filepath", label="Upload Audio File"),
outputs=gr.Label(num_top_classes=3), # This will now receive a simple string
title="Audio Classification with MIT/AST",
description=(
"Upload an audio file to classify it. The model will identify the top 3 most likely sound categories. "
),
cache_examples=False,
)
# --- App Launch ---
# Launch the app with sharing enabled for Hugging Face Spaces
if __name__ == "__main__":
app.launch(share=True) |