Spaces:

karenlu653
/

dialect-demo

Sleeping

App Files Files Community

karenlu653 commited on Sep 17, 2025

Commit

5edb4b5

1 Parent(s): 1d674fa

added tab for dataset

Browse files

Files changed (2) hide show

app.py +49 -11
requirements.txt +4 -1

app.py CHANGED Viewed

@@ -1,12 +1,15 @@
 import torch
-import torch.nn as nn
 import librosa
 import numpy as np
 import json
 from huggingface_hub import hf_hub_download
-import gradio as gr
-import soundfile as sf
 from safetensors.torch import load_file
 # ----------------- Model definition -----------------
 class LanNetBinary(nn.Module):
@@ -97,15 +100,50 @@ def predict(audio_path):
     return label_map.get(str(pred), str(pred))
 # ----------------- Gradio Interface -----------------
-iface = gr.Interface(
-    fn=predict,
-    inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
-    outputs="text",
-    title="Dialect Classification Demo",
-    description="Upload or record audio to classify if this is the Shanghai dialect!"
-)
 if __name__ == "__main__":
-    iface.launch()

 import torch
+import gradio as gr
 import librosa
 import numpy as np
 import json
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
+from datasets import load_dataset
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.metrics import confusion_matrix
 # ----------------- Model definition -----------------
 class LanNetBinary(nn.Module):
     return label_map.get(str(pred), str(pred))
+def evaluate_dataset():
+    ds = load_dataset("karenlu653/dialect_model_demo", split="train")
+    y_true, y_pred = [], []
+    for row in ds:
+        y = np.array(row["audio"], dtype=np.float32)
+        sr = preproc.get("sampling_rate", 16000)
+        feats = extract_features(y, sr).to(device)
+        with torch.no_grad():
+            logits = model(feats)
+            pred = int(logits.argmax(dim=1))
+        y_pred.append(pred)
+        y_true.append(row["label"])
+    # Confusion matrix
+    labels = sorted(set(y_true))
+    cm = confusion_matrix(y_true, y_pred, labels=labels)
+    # Plot
+    fig, ax = plt.subplots(figsize=(5, 4))
+    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=[label_map[str(l)] for l in labels],
+                yticklabels=[label_map[str(l)] for l in labels], ax=ax)
+    ax.set_xlabel("Predicted")
+    ax.set_ylabel("True")
+    ax.set_title("Confusion Matrix of Shanghai Demo Samples")
+    plt.tight_layout()
+    return fig
 # ----------------- Gradio Interface -----------------
+with gr.Blocks() as demo:
+    with gr.Tab("Single Prediction"):
+        gr.Interface(
+            fn=predict,
+            inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
+            outputs="text",
+            description = "Upload or record audio to classify if this is the Shanghai dialect!",
+            live=False
+        )
+    with gr.Tab("Dataset Evaluation"):
+        eval_btn = gr.Button("Run Evaluation on Uploaded Dataset")
+        eval_output = gr.Plot()
+        eval_btn.click(evaluate_dataset, inputs=None, outputs=eval_output)
 if __name__ == "__main__":
+    demo.launch()

requirements.txt CHANGED Viewed

@@ -5,4 +5,7 @@ gradio
 safetensors
 huggingface_hub
 numpy
-soundfile

 safetensors
 huggingface_hub
 numpy
+soundfile
+scikit-learn
+seaborn
+matplotlib