File size: 4,819 Bytes
a5c2f8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1a0bd1
a5c2f8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1a0bd1
a5c2f8d
 
 
 
 
 
 
 
 
 
 
 
 
f1a0bd1
a5c2f8d
f1a0bd1
a5c2f8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c78a7cb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
from transformers import pipeline

_classifier = None
def get_classifier():
    global _classifier
    if _classifier is None:
        _classifier = pipeline(
            "audio-classification",
            model="superb/wav2vec2-base-superb-er"
        )
    return _classifier

LABEL_MAP = {
    "neu": "Neutral", "hap": "Happy", "sad": "Sad", "ang": "Angry",
    "neutral": "Neutral", "happy": "Happy", "sad": "Sad", "angry": "Angry",
}

EMOTION_STYLE = {
    "Happy":  {"emoji": "😊", "color": "#f1c40f"},
    "Sad":    {"emoji": "😒", "color": "#3498db"},
    "Angry":  {"emoji": "😠", "color": "#e74c3c"},
    "Neutral":{"emoji": "😐", "color": "#95a5a6"},
}

def analyze(audio_path):
    if audio_path is None:
        return {"is_done": False}

    results = get_classifier()(audio_path)
    top = max(results, key=lambda x: x["score"])
    top_label = LABEL_MAP.get(top["label"], top["label"])
    top_style = EMOTION_STYLE.get(top_label, {"emoji": "❓", "color": "#999"})

    emotions = []
    for r in sorted(results, key=lambda x: x["score"], reverse=True):
        label = LABEL_MAP.get(r["label"], r["label"])
        style = EMOTION_STYLE.get(label, {"emoji": "❓", "color": "#999"})
        emotions.append({
            "label": label,
            "emoji": style["emoji"],
            "color": style["color"],
            "pct": round(r["score"] * 100),
        })

    return {
        "is_done": True,
        "top_emoji": top_style["emoji"],
        "top_label": top_label,
        "top_color": top_style["color"],
        "emotions": emotions,
    }

with gr.Blocks(title="Audio Emotion Detector") as demo:
    gr.Markdown("## Audio Emotion Detector\nRecord your voice or upload a clip to detect its emotional tone.")

    audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath",
                           label="Record or upload audio")

    result = gr.HTML(
        value={"is_done": False},
        html_template="""
            {{#if value.is_done}}
                <div class="hero">
                    <span class="hero-emoji">{{value.top_emoji}}</span>
                    <div class="hero-label" style="color:{{value.top_color}}">{{value.top_label}}</div>
                </div>
                <div class="bars">
                    {{#each value.emotions}}
                        <div class="bar-row">
                            <span class="emo">{{this.emoji}}</span>
                            <span class="name">{{this.label}}</span>
                            <div class="track">
                                <div class="fill" style="--target-width:{{this.pct}}%; background:{{this.color}}"></div>
                            </div>
                            <span class="pct">{{this.pct}}%</span>
                        </div>
                    {{/each}}
                </div>
            {{else}}
                <p class="empty">Record or upload audio to detect its emotion.</p>
            {{/if}}
        """,
        css_template="""
            .hero { text-align: center; padding: 16px 0; }
            .hero-emoji { font-size: 64px; display: block; }
            .hero-label { font-size: 1.3em; font-weight: 700; margin-top: 6px; }
            .bars { margin-top: 12px; }
            .bar-row {
                display: flex; align-items: center; gap: 8px; margin: 6px 0;
                padding: 4px 0;
            }
            .bar-row:hover { background: #f8f8f8; border-radius: 6px; }
            .emo { width: 28px; font-size: 20px; text-align: center; }
            .name { width: 70px; font-weight: 600; font-size: 0.9em; }
            .track {
                flex: 1; background: #f0f0f0; border-radius: 6px;
                height: 22px; overflow: hidden;
            }
            .fill {
                height: 100%; border-radius: 6px; min-width: 3px;
                width: 0%;
                animation: grow 0.6s ease-out forwards;
            }
            @keyframes grow {
                to { width: var(--target-width); }
            }
            .pct { width: 40px; text-align: right; color: #888; font-size: 0.85em; }
            .empty { color: #aaa; text-align: center; padding: 40px; }
        """,
        js_on_load="""
            // Re-trigger bar animations whenever new results appear
            const observer = new MutationObserver(() => {
                element.querySelectorAll('.fill').forEach(bar => {
                    bar.style.animation = 'none';
                    bar.offsetHeight;  // force reflow
                    bar.style.animation = '';
                });
            });
            observer.observe(element, { childList: true, subtree: true });
        """
    )

    audio_input.change(fn=analyze, inputs=audio_input, outputs=result)

demo.launch()