audio_classification_regular

Sleeping

App Files Files Community

fosters commited on Jun 8

Commit

39ec782

verified ·

1 Parent(s): 684b36b

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -28

app.py CHANGED Viewed

@@ -4,42 +4,58 @@ import os
 import torch
 # --- Performance Improvement ---
-# 1. Determine the number of available CPU cores.
-num_cpu_cores = os.cpu_count()
-# 2. Configure PyTorch to use all available CPU cores for its operations.
-# This is crucial for speeding up model inference on a CPU.
-if num_cpu_cores is not None:
-    torch.set_num_threads(num_cpu_cores)
-    print(f"✅ PyTorch is configured to use {num_cpu_cores} CPU cores.")
-else:
-    print("Could not determine the number of CPU cores. Using default settings.")
-# Initialize the audio classification pipeline with the MIT model
-# The pipeline will run on the CPU by default
-pipe = pipeline("audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593")
-# Define the function to classify an audio file and return the top 3 results
 def classify_audio(audio_filepath):
     """
-    Classifies the audio file and returns a dictionary of the top 3 predictions.
     """
     preds = pipe(audio_filepath)
-    # The pipeline returns a sorted list of predictions. We take the top 3.
-    top_3_preds = preds[:3]
-    # Format the output as a dictionary of {label: score} for the gr.Label component
-    output_labels = {p["label"]: p["score"] for p in top_3_preds}
-    return output_labels
-# Set up the Gradio interface
 app = gr.Interface(
-    fn=classify_audio,                  # Function to classify audio
-    inputs=gr.Audio(type="filepath", label="Upload Audio File"),   # Input for uploading an audio file
-    outputs=gr.Label(label="Top 3 Predictions"),                 # Output Label will display the dictionary from the function
     title="Audio Classification with MIT/AST",
-    description="Upload an audio file to classify it. The model will identify the top 3 most likely sound categories."
 )
-# Launch the app with a shareable link, required for Hugging Face Spaces
 if __name__ == "__main__":
     app.launch(share=True)

 import torch
 # --- Performance Improvement ---
+# Configure PyTorch for CPU performance
+num_cpu_cores = os.cpu_count() or 1 # Default to 1 if os.cpu_count() is None
+torch.set_num_threads(num_cpu_cores)
+print(f"✅ PyTorch is configured to use {num_cpu_cores} CPU cores.")
+# --- Model and Pipeline ---
+# Initialize the pipeline. It will default to the CPU.
+# Using a specific revision for reproducibility
+pipe = pipeline(
+    "audio-classification",
+    model="MIT/ast-finetuned-audioset-10-10-0.4593"
+)
+# --- Core Logic Function ---
 def classify_audio(audio_filepath):
     """
+    Classifies the audio, takes the top 3 predictions,
+    and formats them into a single, human-readable string.
     """
+    if audio_filepath is None:
+        return "Please upload an audio file first."
     preds = pipe(audio_filepath)
+    # Format the output as a string instead of a dictionary
+    # This is the key change to fix the TypeError
+    output_str = ""
+    for i, pred in enumerate(preds[:3]):
+        label = pred["label"]
+        score = pred["score"]
+        output_str += f"{i+1}. {label}: {score:.4f}\n"
+    return output_str.strip()
+# --- Gradio Interface ---
+# Create the Gradio app interface
 app = gr.Interface(
+    fn=classify_audio,
+    inputs=gr.Audio(type="filepath", label="Upload Audio File"),
+    outputs=gr.Label(label="Top 3 Predictions"), # This will now receive a simple string
     title="Audio Classification with MIT/AST",
+    description=(
+        "Upload an audio file to classify it. The model will identify the top 3 most likely sound categories. "
+        "This version is corrected to avoid common Gradio backend errors."
+    ),
+    cache_examples=False,
 )
+# --- App Launch ---
+# Launch the app with sharing enabled for Hugging Face Spaces
 if __name__ == "__main__":
     app.launch(share=True)