import os from huggingface_hub import login login(token=os.getenv("HF_TOKEN")) import gradio as gr import torch import torch.nn.functional as F from transformers import DistilBertTokenizer, DistilBertForSequenceClassification import whisper import pickle from huggingface_hub import hf_hub_download # ── Model Loading ────────────────────────────────────────── model_path = "InfoBayAI/Audio-to-Sentiment_Intelligence_Model" tokenizer = DistilBertTokenizer.from_pretrained( model_path, token=os.getenv("HF_TOKEN") ) model = DistilBertForSequenceClassification.from_pretrained( model_path, token=os.getenv("HF_TOKEN") ) label_path = hf_hub_download( repo_id=model_path, filename="label_encoder.pkl", token=os.getenv("HF_TOKEN") ) with open(label_path, "rb") as f: encoder = pickle.load(f) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) model.eval() # 🎤 Whisper model whisper_model = whisper.load_model("base") # ── Sentiment Prediction ─────────────────────────────────── def predict_sentiment(text): inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128) inputs = {k: v.to(device) for k, v in inputs.items()} with torch.no_grad(): outputs = model(**inputs) probs = F.softmax(outputs.logits, dim=1) confidence, predicted_class = torch.max(probs, dim=1) label = encoder.inverse_transform([predicted_class.item()])[0] confidence = confidence.item() * 100 return label, confidence # ── Main Pipeline ────────────────────────────────────────── def process_audio(audio_file): if audio_file is None: return "⚠️ No audio provided.\n\nPlease upload a file or record audio using your microphone, then click Analyze." # Gradio 6 Audio with type="filepath" may pass a tuple (path, [start, end]) # when the user interacts with the trim tool. Always extract just the filepath # so Whisper processes the full audio rather than a trimmed segment. if isinstance(audio_file, (tuple, list)): audio_file = audio_file[0] if not audio_file: return "⚠️ No audio file found." try: # Transcribe full audio — do NOT pass trim timestamps result = whisper_model.transcribe(audio_file, task="translate") text = result["text"].strip() if not text: return "⚠️ Could not transcribe any speech from the audio.\n\nPlease ensure the audio contains clear speech." label, confidence = predict_sentiment(text) # Confidence bar (ASCII) filled = int(confidence / 5) bar = "█" * filled + "░" * (20 - filled) sentiment_emoji = { "positive": "😊", "negative": "😞", "neutral": "😐", "happy": "😄", "sad": "😢", "angry": "😠", "fear": "😨", "surprise": "😲", "disgust": "🤢" }.get(label.lower(), "🎯") return ( f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" f" 🎯 TRANSCRIPTION\n" f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" f"{text}\n\n" f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" f" {sentiment_emoji} SENTIMENT ANALYSIS\n" f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" f" Sentiment : {label.upper()}\n" f" Confidence : {confidence:.1f}%\n\n" f" [{bar}]\n" ) except Exception as e: return f"❌ Error during processing:\n\n{str(e)}\n\nPlease try again with a different audio file." # ── CSS ──────────────────────────────────────────────────── APP_CSS = """ /* ━━━━ GOOGLE FONTS ━━━━ */ @import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=Syne:wght@400;600;700;800&display=swap'); /* ━━━━ RESET & ROOT ━━━━ */ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } :root { --bg-base: #080810; --bg-surface: #0f0f1c; --bg-card: #13132a; --bg-element: #1a1a35; --border: #252540; --border-glow: #4f3fbf; --accent: #7c5cfc; --accent-light: #a78bfa; --accent-dim: #3d2d8a; --text-primary: #e8e4ff; --text-muted: #7870a8; --text-dim: #3d3a6e; --success: #34d399; --danger: #f87171; --font-display: 'Syne', sans-serif; --font-mono: 'Space Mono', monospace; --radius-sm: 8px; --radius-md: 14px; --radius-lg: 20px; --shadow-glow: 0 0 30px rgba(124, 92, 252, 0.15); } /* ━━━━ BODY & CONTAINER ━━━━ */ body, .gradio-container, .gradio-container > * { background-color: var(--bg-base) !important; font-family: var(--font-display) !important; color: var(--text-primary) !important; } .gradio-container { max-width: 1400px !important; width: 100% !important; min-height: 100vh !important; margin: 0 auto !important; padding: 32px 24px 60px !important; } /* ━━━━ HEADER ━━━━ */ .app-header { text-align: center; padding: 40px 0 32px; } .app-header h1 { font-family: var(--font-display) !important; font-size: clamp(28px, 4vw, 48px) !important; font-weight: 800 !important; color: var(--text-primary) !important; letter-spacing: -1px !important; line-height: 1.1 !important; margin-bottom: 12px !important; } .app-header h1 span { color: var(--accent-light); } .app-subtitle { color: var(--text-muted); font-family: var(--font-mono); font-size: 13px; letter-spacing: 0.5px; padding: 6px 16px; border: 1px solid var(--border); border-radius: 100px; display: inline-block; background: var(--bg-surface); } /* ━━━━ LAYOUT ROW ━━━━ */ .main-row { gap: 20px !important; align-items: stretch !important; } /* ━━━━ PANELS ━━━━ */ .panel-input, .panel-output { background: var(--bg-card) !important; border: 1px solid var(--border) !important; border-radius: var(--radius-lg) !important; padding: 28px !important; box-shadow: var(--shadow-glow) !important; display: flex !important; flex-direction: column !important; gap: 0 !important; } .panel-input { flex: 1 !important; min-width: 360px !important; } .panel-output { flex: 1.6 !important; min-width: 420px !important; } /* ━━━━ SECTION HEADINGS ━━━━ */ .panel-input h2, .panel-output h2 { font-family: var(--font-display) !important; font-size: 15px !important; font-weight: 700 !important; color: var(--text-muted) !important; text-transform: uppercase !important; letter-spacing: 2px !important; margin-bottom: 20px !important; padding-bottom: 12px !important; border-bottom: 1px solid var(--border) !important; } /* ━━━━ HINT LABELS ━━━━ */ .hint-label { font-family: var(--font-mono); font-size: 11px; color: var(--text-muted); letter-spacing: 0.5px; margin-bottom: 8px; margin-top: 4px; } /* ━━━━ AUDIO COMPONENTS — THE CRITICAL FIX ━━━━ */ /* Outer wrapper Gradio creates */ .audio-component-wrap { width: 100% !important; margin-bottom: 16px !important; } /* Kill ALL overflow:hidden on audio internals */ .audio-component-wrap *, [data-testid="audio"], [data-testid="audio"] *, .waveform-container, .waveform-container *, .waveform-controls, .waveform-controls * { overflow: visible !important; } /* The main audio block */ [data-testid="audio"], .gr-audio, .audio-wrap > div { background: var(--bg-element) !important; border: 1px solid var(--border) !important; border-radius: var(--radius-md) !important; padding: 0 !important; width: 100% !important; overflow: visible !important; } /* Waveform canvas area — give it proper height */ .waveform-container canvas, [class*="waveform"] canvas { width: 100% !important; min-height: 80px !important; display: block !important; border-radius: var(--radius-sm) var(--radius-sm) 0 0 !important; } /* Controls toolbar row — MUST NOT clip */ .waveform-controls, [class*="controls"], .audio-controls, [data-testid="waveform-controls"] { display: flex !important; flex-direction: row !important; flex-wrap: nowrap !important; align-items: center !important; gap: 6px !important; padding: 10px 12px !important; background: var(--bg-card) !important; border-top: 1px solid var(--border) !important; border-radius: 0 0 var(--radius-md) var(--radius-md) !important; width: 100% !important; min-height: 52px !important; overflow: visible !important; } /* Time labels */ .waveform-controls span, [class*="time"], [class*="timestamp"] { font-family: var(--font-mono) !important; font-size: 11px !important; color: var(--text-muted) !important; white-space: nowrap !important; flex-shrink: 0 !important; } /* ALL icon/control buttons in audio toolbar */ .waveform-controls button, .audio-controls button, [data-testid="audio"] button, [class*="icon-button"], [class*="control-button"], [class*="audio-button"] { background: var(--bg-surface) !important; border: 1px solid var(--border) !important; color: var(--accent-light) !important; border-radius: var(--radius-sm) !important; width: 32px !important; height: 32px !important; min-width: 32px !important; min-height: 32px !important; max-width: 32px !important; max-height: 32px !important; padding: 0 !important; display: flex !important; align-items: center !important; justify-content: center !important; cursor: pointer !important; flex-shrink: 0 !important; transition: background 0.15s, border-color 0.15s !important; } .waveform-controls button:hover, .audio-controls button:hover, [data-testid="audio"] button:hover { background: var(--accent-dim) !important; border-color: var(--accent) !important; } /* SVG icons inside buttons */ .waveform-controls button svg, .audio-controls button svg, [data-testid="audio"] button svg { width: 14px !important; height: 14px !important; fill: currentColor !important; stroke: currentColor !important; flex-shrink: 0 !important; } /* Play button — slightly larger accent */ [data-testid="play-pause-button"], [aria-label="Play"], [aria-label="Pause"] { background: var(--accent) !important; border-color: var(--accent) !important; color: #fff !important; width: 36px !important; height: 36px !important; min-width: 36px !important; max-width: 36px !important; border-radius: 50% !important; } /* Speed badge */ [class*="speed"], [aria-label*="speed"], [aria-label*="Speed"] { font-family: var(--font-mono) !important; font-size: 10px !important; min-width: 36px !important; max-width: 36px !important; padding: 0 4px !important; letter-spacing: 0 !important; } /* Upload drag area */ [data-testid="audio"] .upload-container, [data-testid="audio"] .empty-state, .audio-component-wrap .upload-container { background: var(--bg-element) !important; border: 2px dashed var(--border-glow) !important; border-radius: var(--radius-md) !important; min-height: 100px !important; display: flex !important; flex-direction: column !important; align-items: center !important; justify-content: center !important; color: var(--text-muted) !important; font-family: var(--font-mono) !important; font-size: 12px !important; cursor: pointer !important; transition: border-color 0.2s, background 0.2s !important; } .audio-component-wrap .upload-container:hover { border-color: var(--accent) !important; background: rgba(124, 92, 252, 0.06) !important; } /* ━━━━ DIVIDER ━━━━ */ .section-divider { border: none !important; border-top: 1px solid var(--border) !important; margin: 22px 0 !important; } /* ━━━━ ANALYZE BUTTONS ━━━━ */ .btn-analyze { width: 100% !important; padding: 13px 20px !important; font-family: var(--font-display) !important; font-size: 14px !important; font-weight: 700 !important; letter-spacing: 0.5px !important; color: #fff !important; background: linear-gradient(135deg, var(--accent-dim), var(--accent)) !important; border: none !important; border-radius: var(--radius-md) !important; cursor: pointer !important; transition: opacity 0.2s, transform 0.1s, box-shadow 0.2s !important; box-shadow: 0 4px 20px rgba(124, 92, 252, 0.3) !important; margin-bottom: 4px !important; } /* Catch all Gradio primary button variants */ button.lg, button[class*="primary"], .svelte-cmf5ev, [data-testid*="analyze"] { background: linear-gradient(135deg, var(--accent-dim), var(--accent)) !important; color: #fff !important; border: none !important; border-radius: var(--radius-md) !important; font-family: var(--font-display) !important; font-size: 14px !important; font-weight: 700 !important; padding: 13px 20px !important; width: 100% !important; cursor: pointer !important; box-shadow: 0 4px 20px rgba(124, 92, 252, 0.3) !important; transition: opacity 0.2s, transform 0.1s !important; margin-bottom: 4px !important; } button.lg:hover, button[class*="primary"]:hover { opacity: 0.88 !important; transform: translateY(-1px) !important; box-shadow: 0 6px 28px rgba(124, 92, 252, 0.45) !important; } button.lg:active { transform: translateY(0) !important; } /* Secondary / clear buttons */ .btn-secondary button, button.secondary, button[class*="secondary"] { background: var(--bg-element) !important; border: 1px solid var(--border) !important; color: var(--text-muted) !important; font-family: var(--font-display) !important; font-size: 13px !important; font-weight: 600 !important; border-radius: var(--radius-md) !important; padding: 10px 20px !important; width: 100% !important; cursor: pointer !important; transition: background 0.2s, color 0.2s !important; } .btn-secondary button:hover { background: var(--bg-surface) !important; color: var(--text-primary) !important; } /* ━━━━ OUTPUT TEXTBOX ━━━━ */ .panel-output textarea, .panel-output .gr-textbox textarea, textarea { background: var(--bg-surface) !important; color: var(--text-primary) !important; border: 1px solid var(--border) !important; border-radius: var(--radius-md) !important; font-family: var(--font-mono) !important; font-size: 13px !important; line-height: 1.8 !important; min-height: 420px !important; max-height: 70vh !important; padding: 20px !important; resize: vertical !important; width: 100% !important; transition: border-color 0.2s !important; } textarea:focus { outline: none !important; border-color: var(--accent) !important; box-shadow: 0 0 0 3px rgba(124, 92, 252, 0.12) !important; } textarea::placeholder { color: var(--text-dim) !important; } /* ━━━━ LABELS ━━━━ */ label span, .label-wrap span, .gr-textbox label span, [class*="label"] { font-family: var(--font-mono) !important; font-size: 11px !important; color: var(--text-muted) !important; text-transform: uppercase !important; letter-spacing: 1px !important; } /* ━━━━ SCROLLBARS ━━━━ */ ::-webkit-scrollbar { width: 6px; height: 6px; } ::-webkit-scrollbar-track { background: var(--bg-base); } ::-webkit-scrollbar-thumb { background: var(--border-glow); border-radius: 4px; } /* ━━━━ HIDE GRADIO FOOTER ━━━━ */ footer, .built-with, [class*="built-with"], .footer, .svelte-1ax1toq, .gradio-container ~ footer { display: none !important; visibility: hidden !important; height: 0 !important; overflow: hidden !important; } /* ━━━━ RECORDING PULSE ━━━━ */ @keyframes pulse-rec { 0%, 100% { box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.5); } 50% { box-shadow: 0 0 0 10px rgba(239, 68, 68, 0); } } [data-testid="record-button"][aria-pressed="true"], .recording button { animation: pulse-rec 1.2s infinite !important; background: #dc2626 !important; border-color: #ef4444 !important; } /* ━━━━ RESPONSIVE ━━━━ */ @media (max-width: 800px) { .gradio-container { padding: 16px 12px 40px !important; } .panel-input, .panel-output { min-width: 100% !important; padding: 18px !important; } } """ # ── Interface ────────────────────────────────────────────── def create_interface(): # Gradio 6.0: css and theme go in launch(), NOT Blocks() with gr.Blocks(theme=gr.themes.Base()) as interface: # Header gr.HTML("""

🎧 Audio Sentiment Analyzer

DistilBERT · Whisper · Multi-language · Real-time analysis
""") with gr.Row(elem_classes="main-row", equal_height=False): # ── INPUT PANEL ─────────────────────────────── with gr.Column(scale=1, elem_classes="panel-input"): gr.Markdown("## 🎤 Input Panel") # Upload gr.HTML("
📁 Upload audio file — MP3, WAV, M4A, OGG, FLAC
") upload_audio = gr.Audio( sources=["upload"], type="filepath", label="Upload Audio File", elem_classes="audio-component-wrap", ) upload_btn = gr.Button( "🚀 Analyze Uploaded Audio", variant="primary", elem_classes="btn-analyze" ) gr.HTML("
") # Record gr.HTML("
🎙️ Record live from microphone
") record_audio = gr.Audio( sources=["microphone"], type="filepath", label="Microphone Recording", elem_classes="audio-component-wrap", ) record_btn = gr.Button( "🎙️ Analyze Recorded Audio", variant="primary", elem_classes="btn-analyze" ) gr.HTML("
") with gr.Row(elem_classes="btn-secondary"): clear_btn = gr.Button("🧹 Clear All", variant="secondary") # ── OUTPUT PANEL ────────────────────────────── with gr.Column(scale=2, elem_classes="panel-output"): gr.Markdown("## 📊 Result Panel") output_text = gr.Textbox( lines=22, label="Analysis Result", placeholder=( "Results will appear here after analysis...\n\n" " 🎯 Transcription\n" " 📊 Sentiment label\n" " 🔍 Confidence score\n" " ▓▓▓ Confidence bar" ), interactive=False, ) with gr.Row(elem_classes="btn-secondary"): clear_result_btn = gr.Button("🧹 Clear Result", variant="secondary") # ── Events ────────────────────────────────────── upload_btn.click(fn=process_audio, inputs=upload_audio, outputs=output_text) record_btn.click(fn=process_audio, inputs=record_audio, outputs=output_text) clear_btn.click( fn=lambda: (None, None, ""), inputs=[], outputs=[upload_audio, record_audio, output_text] ) clear_result_btn.click( fn=lambda: "", inputs=[], outputs=output_text ) return interface # ── Launch ───────────────────────────────────────────────── if __name__ == "__main__": interface = create_interface() # Gradio 6.0: css and theme are passed to launch() interface.launch(share=True, css=APP_CSS)