Spaces:

tachiwin
/

classifier

Sleeping

App Files Files Community

Luis J Camargo commited on Feb 25

Commit

72cb2ee

1 Parent(s): a0ff692

Replace UI labels with interactive Top-K DataFrame table

Browse files

Files changed (2) hide show

app.py +54 -29
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import gradio as gr
 import torch
 import numpy as np
 import librosa
 from transformers import WhisperProcessor, AutoConfig, AutoModel, WhisperConfig, WhisperPreTrainedModel
 from transformers.models.whisper.modeling_whisper import WhisperEncoder
 import torch.nn as nn
@@ -150,7 +151,7 @@ def get_mem_usage():
     return process.memory_info().rss / (1024 ** 2)
 # === INFERENCE FUNCTION ===
-def predict_language(audio_path):
     if not audio_path:
         raise gr.Error("No audio provided! Please upload or record an audio file.")
@@ -194,31 +195,47 @@ def predict_language(audio_path):
         super_probs = torch.softmax(outputs["super_logits"], dim=-1)
         code_probs = torch.softmax(outputs["code_logits"], dim=-1)
-        fam_idx = outputs["fam_logits"].argmax(-1).item()
-        super_idx = outputs["super_logits"].argmax(-1).item()
-        code_idx = outputs["code_logits"].argmax(-1).item()
-        fam_conf = fam_probs[0, fam_idx].item()
-        super_conf = super_probs[0, super_idx].item()
-        code_conf = code_probs[0, code_idx].item()
-        # Map indices to human-readable strings using the LabelExtractor logic
-        # Strip the "<|" and "|>" tags if present for a cleaner UI
-        fam_text = label_extractor.family_labels[fam_idx].strip("<@|>") if fam_idx < len(label_extractor.family_labels) else f"Unknown Fam ({fam_idx})"
-        super_text = label_extractor.super_labels[super_idx].strip("<|>") if super_idx < len(label_extractor.super_labels) else f"Unknown Super ({super_idx})"
-        code_raw = label_extractor.code_labels[code_idx].strip("<|>") if code_idx < len(label_extractor.code_labels) else f"Unknown Code ({code_idx})"
-        # Apply inali_name mapping
-        code_text = f"{CODE_TO_NAME[code_raw]} ({code_raw})" if code_raw in CODE_TO_NAME else code_raw
         print(f"[LOG] Final Memory: {get_mem_usage():.2f} MB")
         print(f"--- [LOG] Request Finished ---\n")
-        return (
-            {fam_text: fam_conf},
-            {super_text: super_conf},
-            {code_text: code_conf}
-        )
     except Exception as e:
         print(f"Error during inference: {e}")
         raise gr.Error(f"Processing failed: {str(e)}")
@@ -242,26 +259,34 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"))
                 type="filepath", # Changed from numpy to filepath
                 label="Upload or Record"
             )
             with gr.Row():
                 clear_btn = gr.Button("🗑️ Clear", variant="secondary")
                 submit_btn = gr.Button("🚀 Classify", variant="primary")
         with gr.Column(scale=1):
             gr.Markdown("### 📊 2. Classification Results")
-            fam_output = gr.Label(num_top_classes=1, label="🌍 Language Family")
-            super_output = gr.Label(num_top_classes=1, label="🗣️ Superlanguage")
-            code_output = gr.Label(num_top_classes=1, label="🔤 Language")
     submit_btn.click(
         fn=predict_language,
-        inputs=audio_input,
-        outputs=[fam_output, super_output, code_output]
     )
     clear_btn.click(
-        fn=lambda: (None, None, None, None),
         inputs=None,
-        outputs=[audio_input, fam_output, super_output, code_output]
     )
     gr.Markdown(

 import torch
 import numpy as np
 import librosa
+import pandas as pd
 from transformers import WhisperProcessor, AutoConfig, AutoModel, WhisperConfig, WhisperPreTrainedModel
 from transformers.models.whisper.modeling_whisper import WhisperEncoder
 import torch.nn as nn
     return process.memory_info().rss / (1024 ** 2)
 # === INFERENCE FUNCTION ===
+def predict_language(audio_path, top_k=3, threshold=0.0):
     if not audio_path:
         raise gr.Error("No audio provided! Please upload or record an audio file.")
         super_probs = torch.softmax(outputs["super_logits"], dim=-1)
         code_probs = torch.softmax(outputs["code_logits"], dim=-1)
+        # Extract top-k indices and probabilities
+        top_k = int(top_k)
+        fam_top = torch.topk(fam_probs[0], min(top_k, fam_probs.shape[-1]))
+        super_top = torch.topk(super_probs[0], min(top_k, super_probs.shape[-1]))
+        code_top = torch.topk(code_probs[0], min(top_k, code_probs.shape[-1]))
+        table_data = []
+        # Helper to format and add results to the table
+        def add_to_table(category, top_vals, top_idx, labels_list, apply_mapping=False):
+            # top_vals and top_idx are 1D tensors
+            valid_rank = 1
+            for i in range(len(top_vals)):
+                score = top_vals[i].item()
+                if score < threshold:
+                    continue
+                idx = top_idx[i].item()
+                raw_label = labels_list[idx].strip("<|>") if idx < len(labels_list) else f"Unknown ({idx})"
+                if apply_mapping:
+                    name = f"{CODE_TO_NAME[raw_label]} ({raw_label})" if raw_label in CODE_TO_NAME else raw_label
+                else:
+                    name = raw_label
+                table_data.append([category, valid_rank, name, f"{score:.2%}"])
+                valid_rank += 1
+        add_to_table("🌍 Family", fam_top.values, fam_top.indices, label_extractor.family_labels)
+        add_to_table("🗣️ Superlanguage", super_top.values, super_top.indices, label_extractor.super_labels)
+        add_to_table("🔤 Code", code_top.values, code_top.indices, label_extractor.code_labels, apply_mapping=True)
+        if not table_data:
+            df = pd.DataFrame(columns=["Category", "Rank", "Prediction", "Confidence"])
+        else:
+            df = pd.DataFrame(table_data, columns=["Category", "Rank", "Prediction", "Confidence"])
         print(f"[LOG] Final Memory: {get_mem_usage():.2f} MB")
         print(f"--- [LOG] Request Finished ---\n")
+        return df
     except Exception as e:
         print(f"Error during inference: {e}")
         raise gr.Error(f"Processing failed: {str(e)}")
                 type="filepath", # Changed from numpy to filepath
                 label="Upload or Record"
             )
+            with gr.Row():
+                top_k = gr.Slider(minimum=1, maximum=10, step=1, value=3, label="Top-K Predictions")
+                threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.0, label="Confidence Threshold")
             with gr.Row():
                 clear_btn = gr.Button("🗑️ Clear", variant="secondary")
                 submit_btn = gr.Button("🚀 Classify", variant="primary")
         with gr.Column(scale=1):
             gr.Markdown("### 📊 2. Classification Results")
+            results_table = gr.Dataframe(
+                headers=["Category", "Rank", "Prediction", "Confidence"],
+                datatype=["str", "number", "str", "str"],
+                label="Predictions",
+                interactive=False,
+                wrap=True
+            )
     submit_btn.click(
         fn=predict_language,
+        inputs=[audio_input, top_k, threshold],
+        outputs=[results_table]
     )
     clear_btn.click(
+        fn=lambda: (None, None),
         inputs=None,
+        outputs=[audio_input, results_table]
     )
     gr.Markdown(

requirements.txt CHANGED Viewed

@@ -5,4 +5,4 @@ numpy
 librosa
 huggingface_hub
 safetensors
-psutil

 librosa
 huggingface_hub
 safetensors
+psutilpandas