Spaces:

AescF
/

LanguageClass

Runtime error

App Files Files Community

AescF commited on Sep 26, 2023

Commit

fc74109

1 Parent(s): 33d8db0

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -31

app.py CHANGED Viewed

@@ -1,13 +1,10 @@
 import gradio as gr
 import librosa
 import numpy as np
-from transformers import AutoFeatureExtractor
-import os
-model_id = "AescF/hubert-base-ls960-finetuned-common_language"
-processor = Wav2Vec2Processor.from_pretrained(model_id)
-model = Wav2Vec2ForClassification.from_pretrained(model_id)
 language_classes = {
     0: "Arabic",
     1: "Basque",
@@ -57,31 +54,51 @@ language_classes = {
 }
-def predict_language(audio):
-    # Read audio file
-    audio_input, sr = librosa.load(audio, sr=16000)
-    # Convert to suitable format
-    input_values = processor(audio_input, return_tensors="pt", padding=True).input_values
-    # Make prediction
-    with torch.no_grad():
-        logits = model(input_values).logits
-    # Compute probabilities
-    probabilities = torch.softmax(logits, dim=1)
-    # Retrieve label
-    predicted_language_idx = torch.argmax(probabilities[0]).item()
-    return {language_classes[predicted_language_idx]: float(probabilities[0][predicted_language_idx])}
-iface = gr.Interface(
-    predict_language,
-    inputs=gr.inputs.Audio(type="filepath", label="Upload Language Audio file"),
-    outputs=gr.outputs.Label(),
-    title="Language Classifier",
-    live=True
 )
-script_dir = os.path.abspath(os.path.join(os.path.abspath(''), os.pardir))
-iface.launch()

 import gradio as gr
 import librosa
 import numpy as np
+import torch
+from transformers import pipeline
 language_classes = {
     0: "Arabic",
     1: "Basque",
 }
+username = "AescF"  ## Complete your username
+model_id = "AescF/hubert-base-ls960-finetuned-common_language"
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+pipe = pipeline("audio-classification", model=model_id, device=device)
+# def predict_trunc(filepath):
+#     preprocessed = pipe.preprocess(filepath)
+#     truncated = pipe.feature_extractor.pad(preprocessed,truncation=True, max_length = 16_000*30)
+#     model_outputs = pipe.forward(truncated)
+#     outputs = pipe.postprocess(model_outputs)
+#     return outputs
+def classify_audio(filepath):
+    """
+      Goes from
+      [{'score': 0.8339303731918335, 'label': 'country'},
+    {'score': 0.11914275586605072, 'label': 'rock'},]
+     to
+     {"country":  0.8339303731918335, "rock":0.11914275586605072}
+    """
+    start_time = timer()
+    preds = pipe(filepath)
+    # preds = predict_trunc(filepath)
+    outputs = {}
+    pred_time = round(timer() - start_time, 5)
+    for p in preds:
+        outputs[p["label"]] = p["score"], timer
+    return outputs
+title = "🎵 Music Genre Classifier"
+description = """
+Demo for a music genre classifier trained on [GTZAN](https://huggingface.co/datasets/marsyas/gtzan)
+For more info checkout [GITHUB](https://github.com/AEscF)
+"""
+demo = gr.Interface(
+    fn=classify_audio,
+    inputs=gr.Audio(type="filepath"),
+    outputs=[gr.Label(label="Predictions"), gr.Number(label="Prediction time (s)")],
+    title=title,
+    description=description,
+    examples=filenames,
 )
+demo.launch()