audio_classification_regular

Sleeping

fosters commited on Jun 7

Commit

5266751

verified ·

1 Parent(s): fefad81

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,24 +16,31 @@ else:
     print("Could not determine the number of CPU cores. Using default settings.")
 # Initialize the audio classification pipeline with the MIT model
 pipe = pipeline("audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593")
 # Define the function to classify an audio file and return the top 3 results
-def classify_audio(audio):
-    result = pipe(audio)
-    return {label['label']: label['score'] for label in result}
 # Set up the Gradio interface
-# We removed `num_top_classes=3` from `gr.Label` and instead handle the
-# top-3 logic inside the `classify_audio` function. This avoids the bug.
 app = gr.Interface(
     fn=classify_audio,                  # Function to classify audio
-    inputs=gr.Audio(type="filepath"),   # Input for uploading an audio file
-    outputs=gr.Label(num_top_classes=3),                 # Output Label will display the dictionary from the function
-    title="Audio Classification",        # App title
-    description="Upload an audio file to classify it using MIT's fine-tuned AudioSet model."
 )
-# Launch the app
 if __name__ == "__main__":
-    app.launch()

     print("Could not determine the number of CPU cores. Using default settings.")
 # Initialize the audio classification pipeline with the MIT model
+# The pipeline will run on the CPU by default
 pipe = pipeline("audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593")
 # Define the function to classify an audio file and return the top 3 results
+def classify_audio(audio_filepath):
+    """
+    Classifies the audio file and returns a dictionary of the top 3 predictions.
+    """
+    preds = pipe(audio_filepath)
+    # The pipeline returns a sorted list of predictions. We take the top 3.
+    top_3_preds = preds[:3]
+    # Format the output as a dictionary of {label: score} for the gr.Label component
+    output_labels = {p["label"]: p["score"] for p in top_3_preds}
+    return output_labels
 # Set up the Gradio interface
 app = gr.Interface(
     fn=classify_audio,                  # Function to classify audio
+    inputs=gr.Audio(type="filepath", label="Upload Audio File"),   # Input for uploading an audio file
+    outputs=gr.Label(label="Top 3 Predictions"),                 # Output Label will display the dictionary from the function
+    title="Audio Classification with MIT/AST",
+    description="Upload an audio file to classify it. The model will identify the top 3 most likely sound categories.",
+    ]
 )
+# Launch the app with a shareable link, required for Hugging Face Spaces
 if __name__ == "__main__":
+    app.launch(share=True)