RohitManglik's picture
Update app.py
bc62dd0 verified
import os
from huggingface_hub import login
login(token=os.getenv("HF_TOKEN"))
import gradio as gr
import torch
import torch.nn.functional as F
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import whisper
import pickle
from huggingface_hub import hf_hub_download
# ── Model Loading ──────────────────────────────────────────
model_path = "InfoBayAI/Audio-to-Sentiment_Intelligence_Model"
tokenizer = DistilBertTokenizer.from_pretrained(
model_path,
token=os.getenv("HF_TOKEN")
)
model = DistilBertForSequenceClassification.from_pretrained(
model_path,
token=os.getenv("HF_TOKEN")
)
label_path = hf_hub_download(
repo_id=model_path,
filename="label_encoder.pkl",
token=os.getenv("HF_TOKEN")
)
with open(label_path, "rb") as f:
encoder = pickle.load(f)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
# 🎤 Whisper model
whisper_model = whisper.load_model("base")
# ── Sentiment Prediction ───────────────────────────────────
def predict_sentiment(text):
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
probs = F.softmax(outputs.logits, dim=1)
confidence, predicted_class = torch.max(probs, dim=1)
label = encoder.inverse_transform([predicted_class.item()])[0]
confidence = confidence.item() * 100
return label, confidence
# ── Main Pipeline ──────────────────────────────────────────
def process_audio(audio_file):
if audio_file is None:
return "⚠️ No audio provided.\n\nPlease upload a file or record audio using your microphone, then click Analyze."
# Gradio 6 Audio with type="filepath" may pass a tuple (path, [start, end])
# when the user interacts with the trim tool. Always extract just the filepath
# so Whisper processes the full audio rather than a trimmed segment.
if isinstance(audio_file, (tuple, list)):
audio_file = audio_file[0]
if not audio_file:
return "⚠️ No audio file found."
try:
# Transcribe full audio — do NOT pass trim timestamps
result = whisper_model.transcribe(audio_file, task="translate")
text = result["text"].strip()
if not text:
return "⚠️ Could not transcribe any speech from the audio.\n\nPlease ensure the audio contains clear speech."
label, confidence = predict_sentiment(text)
# Confidence bar (ASCII)
filled = int(confidence / 5)
bar = "█" * filled + "░" * (20 - filled)
sentiment_emoji = {
"positive": "😊", "negative": "😞", "neutral": "😐",
"happy": "😄", "sad": "😢", "angry": "😠", "fear": "😨",
"surprise": "😲", "disgust": "🤢"
}.get(label.lower(), "🎯")
return (
f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
f" 🎯 TRANSCRIPTION\n"
f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
f"{text}\n\n"
f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
f" {sentiment_emoji} SENTIMENT ANALYSIS\n"
f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
f" Sentiment : {label.upper()}\n"
f" Confidence : {confidence:.1f}%\n\n"
f" [{bar}]\n"
)
except Exception as e:
return f"❌ Error during processing:\n\n{str(e)}\n\nPlease try again with a different audio file."
# ── CSS ────────────────────────────────────────────────────
APP_CSS = """
/* ━━━━ GOOGLE FONTS ━━━━ */
@import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=Syne:wght@400;600;700;800&display=swap');
/* ━━━━ RESET & ROOT ━━━━ */
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
:root {
--bg-base: #080810;
--bg-surface: #0f0f1c;
--bg-card: #13132a;
--bg-element: #1a1a35;
--border: #252540;
--border-glow: #4f3fbf;
--accent: #7c5cfc;
--accent-light: #a78bfa;
--accent-dim: #3d2d8a;
--text-primary: #e8e4ff;
--text-muted: #7870a8;
--text-dim: #3d3a6e;
--success: #34d399;
--danger: #f87171;
--font-display: 'Syne', sans-serif;
--font-mono: 'Space Mono', monospace;
--radius-sm: 8px;
--radius-md: 14px;
--radius-lg: 20px;
--shadow-glow: 0 0 30px rgba(124, 92, 252, 0.15);
}
/* ━━━━ BODY & CONTAINER ━━━━ */
body,
.gradio-container,
.gradio-container > * {
background-color: var(--bg-base) !important;
font-family: var(--font-display) !important;
color: var(--text-primary) !important;
}
.gradio-container {
max-width: 1400px !important;
width: 100% !important;
min-height: 100vh !important;
margin: 0 auto !important;
padding: 32px 24px 60px !important;
}
/* ━━━━ HEADER ━━━━ */
.app-header {
text-align: center;
padding: 40px 0 32px;
}
.app-header h1 {
font-family: var(--font-display) !important;
font-size: clamp(28px, 4vw, 48px) !important;
font-weight: 800 !important;
color: var(--text-primary) !important;
letter-spacing: -1px !important;
line-height: 1.1 !important;
margin-bottom: 12px !important;
}
.app-header h1 span { color: var(--accent-light); }
.app-subtitle {
color: var(--text-muted);
font-family: var(--font-mono);
font-size: 13px;
letter-spacing: 0.5px;
padding: 6px 16px;
border: 1px solid var(--border);
border-radius: 100px;
display: inline-block;
background: var(--bg-surface);
}
/* ━━━━ LAYOUT ROW ━━━━ */
.main-row {
gap: 20px !important;
align-items: stretch !important;
}
/* ━━━━ PANELS ━━━━ */
.panel-input,
.panel-output {
background: var(--bg-card) !important;
border: 1px solid var(--border) !important;
border-radius: var(--radius-lg) !important;
padding: 28px !important;
box-shadow: var(--shadow-glow) !important;
display: flex !important;
flex-direction: column !important;
gap: 0 !important;
}
.panel-input { flex: 1 !important; min-width: 360px !important; }
.panel-output { flex: 1.6 !important; min-width: 420px !important; }
/* ━━━━ SECTION HEADINGS ━━━━ */
.panel-input h2,
.panel-output h2 {
font-family: var(--font-display) !important;
font-size: 15px !important;
font-weight: 700 !important;
color: var(--text-muted) !important;
text-transform: uppercase !important;
letter-spacing: 2px !important;
margin-bottom: 20px !important;
padding-bottom: 12px !important;
border-bottom: 1px solid var(--border) !important;
}
/* ━━━━ HINT LABELS ━━━━ */
.hint-label {
font-family: var(--font-mono);
font-size: 11px;
color: var(--text-muted);
letter-spacing: 0.5px;
margin-bottom: 8px;
margin-top: 4px;
}
/* ━━━━ AUDIO COMPONENTS — THE CRITICAL FIX ━━━━ */
/* Outer wrapper Gradio creates */
.audio-component-wrap {
width: 100% !important;
margin-bottom: 16px !important;
}
/* Kill ALL overflow:hidden on audio internals */
.audio-component-wrap *,
[data-testid="audio"],
[data-testid="audio"] *,
.waveform-container,
.waveform-container *,
.waveform-controls,
.waveform-controls * {
overflow: visible !important;
}
/* The main audio block */
[data-testid="audio"],
.gr-audio,
.audio-wrap > div {
background: var(--bg-element) !important;
border: 1px solid var(--border) !important;
border-radius: var(--radius-md) !important;
padding: 0 !important;
width: 100% !important;
overflow: visible !important;
}
/* Waveform canvas area — give it proper height */
.waveform-container canvas,
[class*="waveform"] canvas {
width: 100% !important;
min-height: 80px !important;
display: block !important;
border-radius: var(--radius-sm) var(--radius-sm) 0 0 !important;
}
/* Controls toolbar row — MUST NOT clip */
.waveform-controls,
[class*="controls"],
.audio-controls,
[data-testid="waveform-controls"] {
display: flex !important;
flex-direction: row !important;
flex-wrap: nowrap !important;
align-items: center !important;
gap: 6px !important;
padding: 10px 12px !important;
background: var(--bg-card) !important;
border-top: 1px solid var(--border) !important;
border-radius: 0 0 var(--radius-md) var(--radius-md) !important;
width: 100% !important;
min-height: 52px !important;
overflow: visible !important;
}
/* Time labels */
.waveform-controls span,
[class*="time"],
[class*="timestamp"] {
font-family: var(--font-mono) !important;
font-size: 11px !important;
color: var(--text-muted) !important;
white-space: nowrap !important;
flex-shrink: 0 !important;
}
/* ALL icon/control buttons in audio toolbar */
.waveform-controls button,
.audio-controls button,
[data-testid="audio"] button,
[class*="icon-button"],
[class*="control-button"],
[class*="audio-button"] {
background: var(--bg-surface) !important;
border: 1px solid var(--border) !important;
color: var(--accent-light) !important;
border-radius: var(--radius-sm) !important;
width: 32px !important;
height: 32px !important;
min-width: 32px !important;
min-height: 32px !important;
max-width: 32px !important;
max-height: 32px !important;
padding: 0 !important;
display: flex !important;
align-items: center !important;
justify-content: center !important;
cursor: pointer !important;
flex-shrink: 0 !important;
transition: background 0.15s, border-color 0.15s !important;
}
.waveform-controls button:hover,
.audio-controls button:hover,
[data-testid="audio"] button:hover {
background: var(--accent-dim) !important;
border-color: var(--accent) !important;
}
/* SVG icons inside buttons */
.waveform-controls button svg,
.audio-controls button svg,
[data-testid="audio"] button svg {
width: 14px !important;
height: 14px !important;
fill: currentColor !important;
stroke: currentColor !important;
flex-shrink: 0 !important;
}
/* Play button — slightly larger accent */
[data-testid="play-pause-button"],
[aria-label="Play"],
[aria-label="Pause"] {
background: var(--accent) !important;
border-color: var(--accent) !important;
color: #fff !important;
width: 36px !important;
height: 36px !important;
min-width: 36px !important;
max-width: 36px !important;
border-radius: 50% !important;
}
/* Speed badge */
[class*="speed"],
[aria-label*="speed"],
[aria-label*="Speed"] {
font-family: var(--font-mono) !important;
font-size: 10px !important;
min-width: 36px !important;
max-width: 36px !important;
padding: 0 4px !important;
letter-spacing: 0 !important;
}
/* Upload drag area */
[data-testid="audio"] .upload-container,
[data-testid="audio"] .empty-state,
.audio-component-wrap .upload-container {
background: var(--bg-element) !important;
border: 2px dashed var(--border-glow) !important;
border-radius: var(--radius-md) !important;
min-height: 100px !important;
display: flex !important;
flex-direction: column !important;
align-items: center !important;
justify-content: center !important;
color: var(--text-muted) !important;
font-family: var(--font-mono) !important;
font-size: 12px !important;
cursor: pointer !important;
transition: border-color 0.2s, background 0.2s !important;
}
.audio-component-wrap .upload-container:hover {
border-color: var(--accent) !important;
background: rgba(124, 92, 252, 0.06) !important;
}
/* ━━━━ DIVIDER ━━━━ */
.section-divider {
border: none !important;
border-top: 1px solid var(--border) !important;
margin: 22px 0 !important;
}
/* ━━━━ ANALYZE BUTTONS ━━━━ */
.btn-analyze {
width: 100% !important;
padding: 13px 20px !important;
font-family: var(--font-display) !important;
font-size: 14px !important;
font-weight: 700 !important;
letter-spacing: 0.5px !important;
color: #fff !important;
background: linear-gradient(135deg, var(--accent-dim), var(--accent)) !important;
border: none !important;
border-radius: var(--radius-md) !important;
cursor: pointer !important;
transition: opacity 0.2s, transform 0.1s, box-shadow 0.2s !important;
box-shadow: 0 4px 20px rgba(124, 92, 252, 0.3) !important;
margin-bottom: 4px !important;
}
/* Catch all Gradio primary button variants */
button.lg,
button[class*="primary"],
.svelte-cmf5ev,
[data-testid*="analyze"] {
background: linear-gradient(135deg, var(--accent-dim), var(--accent)) !important;
color: #fff !important;
border: none !important;
border-radius: var(--radius-md) !important;
font-family: var(--font-display) !important;
font-size: 14px !important;
font-weight: 700 !important;
padding: 13px 20px !important;
width: 100% !important;
cursor: pointer !important;
box-shadow: 0 4px 20px rgba(124, 92, 252, 0.3) !important;
transition: opacity 0.2s, transform 0.1s !important;
margin-bottom: 4px !important;
}
button.lg:hover,
button[class*="primary"]:hover {
opacity: 0.88 !important;
transform: translateY(-1px) !important;
box-shadow: 0 6px 28px rgba(124, 92, 252, 0.45) !important;
}
button.lg:active { transform: translateY(0) !important; }
/* Secondary / clear buttons */
.btn-secondary button,
button.secondary,
button[class*="secondary"] {
background: var(--bg-element) !important;
border: 1px solid var(--border) !important;
color: var(--text-muted) !important;
font-family: var(--font-display) !important;
font-size: 13px !important;
font-weight: 600 !important;
border-radius: var(--radius-md) !important;
padding: 10px 20px !important;
width: 100% !important;
cursor: pointer !important;
transition: background 0.2s, color 0.2s !important;
}
.btn-secondary button:hover { background: var(--bg-surface) !important; color: var(--text-primary) !important; }
/* ━━━━ OUTPUT TEXTBOX ━━━━ */
.panel-output textarea,
.panel-output .gr-textbox textarea,
textarea {
background: var(--bg-surface) !important;
color: var(--text-primary) !important;
border: 1px solid var(--border) !important;
border-radius: var(--radius-md) !important;
font-family: var(--font-mono) !important;
font-size: 13px !important;
line-height: 1.8 !important;
min-height: 420px !important;
max-height: 70vh !important;
padding: 20px !important;
resize: vertical !important;
width: 100% !important;
transition: border-color 0.2s !important;
}
textarea:focus {
outline: none !important;
border-color: var(--accent) !important;
box-shadow: 0 0 0 3px rgba(124, 92, 252, 0.12) !important;
}
textarea::placeholder { color: var(--text-dim) !important; }
/* ━━━━ LABELS ━━━━ */
label span,
.label-wrap span,
.gr-textbox label span,
[class*="label"] {
font-family: var(--font-mono) !important;
font-size: 11px !important;
color: var(--text-muted) !important;
text-transform: uppercase !important;
letter-spacing: 1px !important;
}
/* ━━━━ SCROLLBARS ━━━━ */
::-webkit-scrollbar { width: 6px; height: 6px; }
::-webkit-scrollbar-track { background: var(--bg-base); }
::-webkit-scrollbar-thumb { background: var(--border-glow); border-radius: 4px; }
/* ━━━━ HIDE GRADIO FOOTER ━━━━ */
footer,
.built-with,
[class*="built-with"],
.footer,
.svelte-1ax1toq,
.gradio-container ~ footer {
display: none !important;
visibility: hidden !important;
height: 0 !important;
overflow: hidden !important;
}
/* ━━━━ RECORDING PULSE ━━━━ */
@keyframes pulse-rec {
0%, 100% { box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.5); }
50% { box-shadow: 0 0 0 10px rgba(239, 68, 68, 0); }
}
[data-testid="record-button"][aria-pressed="true"],
.recording button {
animation: pulse-rec 1.2s infinite !important;
background: #dc2626 !important;
border-color: #ef4444 !important;
}
/* ━━━━ RESPONSIVE ━━━━ */
@media (max-width: 800px) {
.gradio-container { padding: 16px 12px 40px !important; }
.panel-input, .panel-output { min-width: 100% !important; padding: 18px !important; }
}
"""
# ── Interface ──────────────────────────────────────────────
def create_interface():
# Gradio 6.0: css and theme go in launch(), NOT Blocks()
with gr.Blocks(theme=gr.themes.Base()) as interface:
# Header
gr.HTML("""
<div class="app-header">
<h1>🎧 Audio Sentiment <span>Analyzer</span></h1>
<div class="app-subtitle">DistilBERT · Whisper · Multi-language · Real-time analysis</div>
</div>
""")
with gr.Row(elem_classes="main-row", equal_height=False):
# ── INPUT PANEL ───────────────────────────────
with gr.Column(scale=1, elem_classes="panel-input"):
gr.Markdown("## 🎤 Input Panel")
# Upload
gr.HTML("<div class='hint-label'>📁 Upload audio file — MP3, WAV, M4A, OGG, FLAC</div>")
upload_audio = gr.Audio(
sources=["upload"],
type="filepath",
label="Upload Audio File",
elem_classes="audio-component-wrap",
)
upload_btn = gr.Button(
"🚀 Analyze Uploaded Audio",
variant="primary",
elem_classes="btn-analyze"
)
gr.HTML("<hr class='section-divider'>")
# Record
gr.HTML("<div class='hint-label'>🎙️ Record live from microphone</div>")
record_audio = gr.Audio(
sources=["microphone"],
type="filepath",
label="Microphone Recording",
elem_classes="audio-component-wrap",
)
record_btn = gr.Button(
"🎙️ Analyze Recorded Audio",
variant="primary",
elem_classes="btn-analyze"
)
gr.HTML("<hr class='section-divider'>")
with gr.Row(elem_classes="btn-secondary"):
clear_btn = gr.Button("🧹 Clear All", variant="secondary")
# ── OUTPUT PANEL ──────────────────────────────
with gr.Column(scale=2, elem_classes="panel-output"):
gr.Markdown("## 📊 Result Panel")
output_text = gr.Textbox(
lines=22,
label="Analysis Result",
placeholder=(
"Results will appear here after analysis...\n\n"
" 🎯 Transcription\n"
" 📊 Sentiment label\n"
" 🔍 Confidence score\n"
" ▓▓▓ Confidence bar"
),
interactive=False,
)
with gr.Row(elem_classes="btn-secondary"):
clear_result_btn = gr.Button("🧹 Clear Result", variant="secondary")
# ── Events ──────────────────────────────────────
upload_btn.click(fn=process_audio, inputs=upload_audio, outputs=output_text)
record_btn.click(fn=process_audio, inputs=record_audio, outputs=output_text)
clear_btn.click(
fn=lambda: (None, None, ""),
inputs=[],
outputs=[upload_audio, record_audio, output_text]
)
clear_result_btn.click(
fn=lambda: "",
inputs=[],
outputs=output_text
)
return interface
# ── Launch ─────────────────────────────────────────────────
if __name__ == "__main__":
interface = create_interface()
# Gradio 6.0: css and theme are passed to launch()
interface.launch(share=True, css=APP_CSS)