File size: 1,522 Bytes
1f01380
 
fefad81
 
 
 
39ec782
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6c5111
5266751
39ec782
 
5266751
f6c5111
39ec782
 
f6c5111
 
39ec782
 
 
1f01380
39ec782
 
f6c5111
5266751
39ec782
 
 
 
1f01380
 
39ec782
 
1f01380
5266751
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import gradio as gr
from transformers import pipeline
import os
import torch

# --- Performance Improvement ---
# Configure PyTorch for CPU performance
num_cpu_cores = os.cpu_count() or 1 # Default to 1 if os.cpu_count() is None
torch.set_num_threads(num_cpu_cores)
print(f"✅ PyTorch is configured to use {num_cpu_cores} CPU cores.")


# --- Model and Pipeline ---
# Initialize the pipeline. It will default to the CPU.
# Using a specific revision for reproducibility
pipe = pipeline(
    "audio-classification",
    model="MIT/ast-finetuned-audioset-10-10-0.4593"
)


# --- Core Logic Function ---
def classify_audio(audio):
    """
    Classifies the audio, takes the top 3 predictions,
    and formats them into a single, human-readable string.
    """
    if audio is None:
        return "Please upload an audio file first."

    result = pipe(audio)
    return {label['label']: label['score'] for label in result}

# --- Gradio Interface ---
# Create the Gradio app interface
app = gr.Interface(
    fn=classify_audio,
    inputs=gr.Audio(type="filepath", label="Upload Audio File"),
    outputs=gr.Label(num_top_classes=3), # This will now receive a simple string
    title="Audio Classification with MIT/AST",
    description=(
        "Upload an audio file to classify it. The model will identify the top 3 most likely sound categories. "
    ),
    cache_examples=False,
)

# --- App Launch ---
# Launch the app with sharing enabled for Hugging Face Spaces
if __name__ == "__main__":
    app.launch(share=True)