Spaces:

MaroofTechSorcerer
/

Voice_Based_Sentiment_Analysis_with_Sarcasm_Detection

Sleeping

App Files Files Community

MaroofTechSorcerer commited on May 6, 2025

Commit

854f1c9

verified ·

1 Parent(s): aec967d

Update app.py

Browse files

Files changed (1) hide show

app.py +183 -238

app.py CHANGED Viewed

@@ -8,86 +8,100 @@ import plotly.express as px
 import logging
 import warnings
 import whisper
 import base64
 import io
-import asyncio
-from concurrent.futures import ThreadPoolExecutor
 import streamlit.components.v1 as components
-# Try importing torchaudio, fallback to pydub
-try:
-    import torchaudio
-    USE_TORCHAUDIO = True
-except ImportError:
-    from pydub import AudioSegment
-    USE_TORCHAUDIO = False
-    st.warning("torchaudio not found. Using pydub (slower). Install torchaudio: pip install torchaudio")
-# Suppress warnings and set logging
-logging.getLogger("torch").setLevel(logging.ERROR)
-logging.getLogger("transformers").setLevel(logging.ERROR)
 warnings.filterwarnings("ignore")
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
-# Streamlit config
-st.set_page_config(layout="wide", page_title="Voice Sentiment Analysis")
-st.title("🎙 Voice Sentiment Analysis")
-st.markdown("Fast, accurate detection of emotions, sentiment, and sarcasm from voice or text.")
-# Global model cache
 @st.cache_resource
-def load_models():
     try:
-        # Load Whisper model with CPU optimization
-        whisper_model = whisper.load_model("base")
-        # Load emotion detection model
-        emotion_tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
-        emotion_model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
-        emotion_classifier = pipeline("text-classification", model=emotion_model, tokenizer=emotion_tokenizer,
-                                     top_k=None, device=-1)  # CPU only
-        # Load sarcasm detection model
-        sarcasm_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
-        sarcasm_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
-        sarcasm_classifier = pipeline("text-classification", model=sarcasm_model, tokenizer=sarcasm_tokenizer,
-                                     device=-1)  # CPU only
-        return whisper_model, emotion_classifier, sarcasm_classifier
     except Exception as e:
-        st.error(f"Failed to load models: {str(e)}")
-        raise
-whisper_model, emotion_classifier, sarcasm_classifier = load_models()
-# Emotion detection
-async def perform_emotion_detection(text):
-    if not text or len(text.strip()) < 3:
-        return {}, "neutral", {}, "NEUTRAL"
     try:
-        results = emotion_classifier(text)[0]
-        emotions_dict = {r['label']: r['score'] for r in results}
-        filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.01}
-        top_emotion = max(filtered_emotions, key=filtered_emotions.get, default="neutral")
         positive_emotions = ["joy"]
         negative_emotions = ["anger", "disgust", "fear", "sadness"]
-        sentiment = ("POSITIVE" if top_emotion in positive_emotions else
-                    "NEGATIVE" if top_emotion in negative_emotions else "NEUTRAL")
-        emotion_map = {"joy": "😊", "anger": "😡", "disgust": "🤢", "fear": "😨", "sadness": "😭", "surprise": "😲"}
         return emotions_dict, top_emotion, emotion_map, sentiment
     except Exception as e:
         st.error(f"Emotion detection failed: {str(e)}")
         return {}, "neutral", {}, "NEUTRAL"
-# Sarcasm detection
-async def perform_sarcasm_detection(text):
-    if not text or len(text.strip()) < 3:
-        return False, 0.0
     try:
         result = sarcasm_classifier(text)[0]
         is_sarcastic = result['label'] == "LABEL_1"
         sarcasm_score = result['score'] if is_sarcastic else 1 - result['score']
@@ -96,248 +110,179 @@ async def perform_sarcasm_detection(text):
         st.error(f"Sarcasm detection failed: {str(e)}")
         return False, 0.0
-# Audio validation
 def validate_audio(audio_path):
     try:
-        if USE_TORCHAUDIO:
-            waveform, sample_rate = torchaudio.load(audio_path)
-            if waveform.abs().max() < 0.01:
-                st.warning("Audio volume too low.")
-                return False
-            if waveform.shape[1] / sample_rate < 1:
-                st.warning("Audio too short.")
-                return False
-        else:
-            sound = AudioSegment.from_file(audio_path)
-            if sound.dBFS < -55:
-                st.warning("Audio volume too low.")
-                return False
-            if len(sound) < 1000:
-                st.warning("Audio too short.")
-                return False
         return True
     except Exception as e:
         st.error(f"Invalid audio file: {str(e)}")
         return False
-# Audio transcription
-@st.cache_data
 def transcribe_audio(audio_path):
     try:
-        if USE_TORCHAUDIO:
-            waveform, sample_rate = torchaudio.load(audio_path)
-            if sample_rate != 16000:
-                resampler = torchaudio.transforms.Resample(sample_rate, 16000)
-                waveform = resampler(waveform)
-            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
-                torchaudio.save(temp_file.name, waveform, 16000)
-                result = whisper_model.transcribe(temp_file.name, language="en", no_speech_threshold=0.6)
-        else:
-            sound = AudioSegment.from_file(audio_path)
-            sound = sound.set_frame_rate(16000).set_channels(1)
-            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
-                sound.export(temp_file.name, format="wav")
-                result = whisper_model.transcribe(temp_file.name, language="en", no_speech_threshold=0.6)
-        os.remove(temp_file.name)
         return result["text"].strip()
     except Exception as e:
         st.error(f"Transcription failed: {str(e)}")
         return ""
-# Process uploaded audio
 def process_uploaded_audio(audio_file):
     try:
         ext = audio_file.name.split('.')[-1].lower()
         if ext not in ['wav', 'mp3', 'ogg']:
-            st.error("Unsupported format. Use WAV, MP3, or OGG.")
             return None
-        with tempfile.NamedTemporaryFile(suffix=f".{ext}", delete=False) as temp_file:
-            temp_file.write(audio_file.getvalue())
-            temp_file_path = temp_file.name
         if not validate_audio(temp_file_path):
-            os.remove(temp_file_path)
             return None
         return temp_file_path
     except Exception as e:
-        st.error(f"Error processing audio: {str(e)}")
         return None
-# Process base64 audio
-def process_base64_audio(base64_data):
-    try:
-        if not base64_data.startswith("data:audio"):
-            st.error("Invalid audio data.")
-            return None
-        base64_binary = base64_data.split(',')[1]
-        binary_data = base64.b64decode(base64_binary)
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
-            temp_file.write(binary_data)
-            temp_file_path = temp_file.name
-        if not validate_audio(temp_file_path):
             os.remove(temp_file_path)
-            return None
-        return temp_file_path
-    except Exception as e:
-        st.error(f"Error processing audio data: {str(e)}")
-        return None
 # Custom audio recorder
 def custom_audio_recorder():
     audio_recorder_html = """
     <script>
-    let recorder, audioBlob, isRecording = false;
-    const recordButton = document.getElementById('record-button');
-    const audioPlayback = document.getElementById('audio-playback');
-    const audioData = document.getElementById('audio-data');
     async function startRecording() {
         try {
-            const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
             recorder = new MediaRecorder(stream);
             const chunks = [];
             recorder.ondataavailable = e => chunks.push(e.data);
             recorder.onstop = () => {
-                audioBlob = new Blob(chunks, { type: 'audio/wav' });
-                audioPlayback.src = URL.createObjectURL(audioBlob);
                 const reader = new FileReader();
-                reader.readAsDataURL(audioBlob);
                 reader.onloadend = () => {
-                    audioData.value = reader.result;
                     window.parent.postMessage({type: "streamlit:setComponentValue", value: reader.result}, "*");
                 };
                 stream.getTracks().forEach(track => track.stop());
             };
             recorder.start();
-            isRecording = true;
-            recordButton.textContent = 'Stop Recording';
-            recordButton.classList.add('recording');
-        } catch (e) {
-            alert('Recording failed: ' + e.message);
-        }
     }
     function stopRecording() {
         recorder.stop();
-        isRecording = false;
-        recordButton.textContent = 'Start Recording';
-        recordButton.classList.remove('recording');
     }
-    document.getElementById('record-button').onclick = () => {
-        isRecording ? stopRecording() : startRecording();
-    };
     </script>
-    <style>
-    .recorder-container { text-align: center; padding: 15px; }
-    .record-button { background: #ff4b4b; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer; }
-    .record-button.recording { background: #d32f2f; animation: pulse 1.5s infinite; }
-    @keyframes pulse { 0% { opacity: 1; } 50% { opacity: 0.7; } 100% { opacity: 1; } }
-    audio { margin-top: 10px; width: 100%; }
-    </style>
-    <div class="recorder-container">
-        <button id="record-button">Start Recording</button>
-        <audio id="audio-playback" controls></audio>
-        <input type="hidden" id="audio-data">
-    </div>
     """
-    return components.html(audio_recorder_html, height=150)
-# Display results
 def display_analysis_results(transcribed_text):
-    async def run_analyses():
-        emotion_task = perform_emotion_detection(transcribed_text)
-        sarcasm_task = perform_sarcasm_detection(transcribed_text)
-        return await asyncio.gather(emotion_task, sarcasm_task)
-    with st.spinner("Analyzing..."):
-        with ThreadPoolExecutor() as executor:
-            loop = asyncio.get_event_loop()
-            (emotions_dict, top_emotion, emotion_map, sentiment), (is_sarcastic, sarcasm_score) = loop.run_until_complete(run_analyses())
     st.header("Results")
-    st.subheader("Transcribed Text")
-    st.text_area("Text", transcribed_text, height=100, disabled=True)
-    col1, col2 = st.columns([1, 2])
     with col1:
         st.subheader("Sentiment")
-        sentiment_icon = "👍" if sentiment == "POSITIVE" else "👎" if sentiment == "NEGATIVE" else "😐"
-        st.markdown(f"{sentiment_icon} **{sentiment}**")
-        st.subheader("Sarcasm")
-        sarcasm_icon = "😏" if is_sarcastic else "😐"
-        st.markdown(f"{sarcasm_icon} **{'Detected' if is_sarcastic else 'Not Detected'}** (Score: {sarcasm_score:.2f})")
     with col2:
-        st.subheader("Emotions")
-        if emotions_dict:
-            st.markdown(f"*Dominant:* {emotion_map.get(top_emotion, '❓')} **{top_emotion.capitalize()}** ({emotions_dict[top_emotion]:.2f})")
-            emotions = list(emotions_dict.keys())[:5]
-            scores = list(emotions_dict.values())[:5]
-            fig = px.bar(x=emotions, y=scores, labels={'x': 'Emotion', 'y': 'Score'}, color=emotions,
-                         color_discrete_sequence=px.colors.qualitative.Set2)
-            fig.update_layout(yaxis_range=[0, 1], showlegend=False, height=300)
-            st.plotly_chart(fig, use_container_width=True)
-        else:
-            st.write("No emotions detected.")
-    with st.expander("Details"):
-        st.markdown("""
-        - **Speech**: Whisper-base (fast, ~10-15% WER)
-        - **Emotions**: DistilBERT (joy, anger, etc.)
-        - **Sarcasm**: RoBERTa (irony detection)
-        - **Tips**: Clear audio, minimal noise
-        """)
-# Main app
 def main():
-    if 'debug_info' not in st.session_state:
-        st.session_state.debug_info = []
-    tab1, tab2, tab3 = st.tabs(["📁 Upload Audio", "🎙 Record Audio", "✍️ Text Input"])
     with tab1:
-        audio_file = st.file_uploader("Upload audio", type=["wav", "mp3", "ogg"])
         if audio_file:
-            st.audio(audio_file.getvalue())
-            if st.button("Analyze", key="upload_analyze"):
-                progress = st.progress(0)
-                temp_path = process_uploaded_audio(audio_file)
                 if temp_path:
-                    progress.progress(50)
                     text = transcribe_audio(temp_path)
                     if text:
-                        progress.progress(100)
                         display_analysis_results(text)
-                    else:
-                        st.error("Transcription failed.")
-                    if os.path.exists(temp_path):
-                        os.remove(temp_path)
-                progress.empty()
-    with tab2:
-        st.markdown("Record audio using your microphone.")
-        audio_data = custom_audio_recorder()
-        if audio_data and st.button("Analyze", key="record_analyze"):
-            progress = st.progress(0)
-            temp_path = process_base64_audio(audio_data)
-            if temp_path:
-                progress.progress(50)
-                text = transcribe_audio(temp_path)
-                if text:
-                    progress.progress(100)
-                    display_analysis_results(text)
-                else:
-                    st.error("Transcription failed.")
-                if os.path.exists(temp_path):
-                    os.remove(temp_path)
-            progress.empty()
-    with tab3:
-        manual_text = st.text_area("Enter text:", placeholder="Type text to analyze...")
-        if st.button("Analyze", key="text_analyze") and manual_text:
-            display_analysis_results(manual_text)
 if __name__ == "__main__":
-    main()

 import logging
 import warnings
 import whisper
+from pydub import AudioSegment
+import time
 import base64
 import io
 import streamlit.components.v1 as components
+# Suppress warnings for a clean console
+logging.getLogger("torch").setLevel(logging.CRITICAL)
+logging.getLogger("transformers").setLevel(logging.CRITICAL)
 warnings.filterwarnings("ignore")
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# Check if CUDA is available, otherwise use CPU
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
+# Set Streamlit app layout
+st.set_page_config(layout="wide", page_title="Voice Based Sentiment Analysis")
+# Interface design
+st.title("🎙 Voice Based Sentiment Analysis")
+st.write("Detect emotions, sentiment, and sarcasm from your voice with optimized speed and accuracy using OpenAI Whisper.")
+# Emotion Detection Function
 @st.cache_resource
+def get_emotion_classifier():
     try:
+        tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion", use_fast=True)
+        model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion").to(device)
+        if torch.cuda.is_available():
+            model = model.half()  # Use fp16 on GPU
+        classifier = pipeline("text-classification",
+                             model=model,
+                             tokenizer=tokenizer,
+                             top_k=None,
+                             device=0 if torch.cuda.is_available() else -1)
+        return classifier
     except Exception as e:
+        st.error(f"Failed to load emotion model: {str(e)}")
+        return None
+def perform_emotion_detection(text):
     try:
+        if not text or len(text.strip()) < 3:
+            return {}, "neutral", {}, "NEUTRAL"
+        emotion_classifier = get_emotion_classifier()
+        if not emotion_classifier:
+            return {}, "neutral", {}, "NEUTRAL"
+        emotion_results = emotion_classifier(text)[0]
+        emotion_map = {
+            "joy": "😊", "anger": "😡", "disgust": "🤢", "fear": "😨",
+            "sadness": "😭", "surprise": "😲"
+        }
         positive_emotions = ["joy"]
         negative_emotions = ["anger", "disgust", "fear", "sadness"]
+        neutral_emotions = ["surprise"]
+        emotions_dict = {result['label']: result['score'] for result in emotion_results}
+        filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.01}
+        if not filtered_emotions:
+            filtered_emotions = emotions_dict
+        top_emotion = max(filtered_emotions, key=filtered_emotions.get)
+        if top_emotion in positive_emotions:
+            sentiment = "POSITIVE"
+        elif top_emotion in negative_emotions:
+            sentiment = "NEGATIVE"
+        else:
+            sentiment = "NEUTRAL"
         return emotions_dict, top_emotion, emotion_map, sentiment
     except Exception as e:
         st.error(f"Emotion detection failed: {str(e)}")
         return {}, "neutral", {}, "NEUTRAL"
+# Sarcasm Detection Function
+@st.cache_resource
+def get_sarcasm_classifier():
+    try:
+        tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-irony", use_fast=True)
+        model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony").to(device)
+        if torch.cuda.is_available():
+            model = model.half()  # Use fp16 on GPU
+        classifier = pipeline("text-classification", model=model, tokenizer=tokenizer,
+                             device=0 if torch.cuda.is_available() else -1)
+        return classifier
+    except Exception as e:
+        st.error(f"Failed to load sarcasm model: {str(e)}")
+        return None
+def perform_sarcasm_detection(text):
     try:
+        if not text or len(text.strip()) < 3:
+            return False, 0.0
+        sarcasm_classifier = get_sarcasm_classifier()
+        if not sarcasm_classifier:
+            return False, 0.0
         result = sarcasm_classifier(text)[0]
         is_sarcastic = result['label'] == "LABEL_1"
         sarcasm_score = result['score'] if is_sarcastic else 1 - result['score']
         st.error(f"Sarcasm detection failed: {str(e)}")
         return False, 0.0
+# Validate audio quality
 def validate_audio(audio_path):
     try:
+        sound = AudioSegment.from_file(audio_path)
+        if sound.dBFS < -55:
+            st.warning("Audio volume is too low.")
+            return False
+        if len(sound) < 1000:
+            st.warning("Audio is too short.")
+            return False
         return True
     except Exception as e:
         st.error(f"Invalid audio file: {str(e)}")
         return False
+# Speech Recognition with Whisper
+@st.cache_resource
+def load_whisper_model():
+    try:
+        model = whisper.load_model("base").to(device)
+        return model
+    except Exception as e:
+        st.error(f"Failed to load Whisper model: {str(e)}")
+        return None
 def transcribe_audio(audio_path):
+    temp_wav_path = None
     try:
+        sound = AudioSegment.from_file(audio_path).set_frame_rate(16000).set_channels(1)
+        temp_wav_path = os.path.join(tempfile.gettempdir(), f"temp_{int(time.time())}.wav")
+        sound.export(temp_wav_path, format="wav")
+        model = load_whisper_model()
+        if not model:
+            return ""
+        result = model.transcribe(temp_wav_path, language="en", fp16=torch.cuda.is_available())
         return result["text"].strip()
     except Exception as e:
         st.error(f"Transcription failed: {str(e)}")
         return ""
+    finally:
+        if temp_wav_path and os.path.exists(temp_wav_path):
+            os.remove(temp_wav_path)
+# Process uploaded audio files
 def process_uploaded_audio(audio_file):
+    if not audio_file:
+        return None
+    temp_file_path = None
     try:
         ext = audio_file.name.split('.')[-1].lower()
         if ext not in ['wav', 'mp3', 'ogg']:
+            st.error("Unsupported audio format. Use WAV, MP3, or OGG.")
             return None
+        temp_file_path = os.path.join(tempfile.gettempdir(), f"uploaded_{int(time.time())}.{ext}")
+        with open(temp_file_path, "wb") as f:
+            f.write(audio_file.getvalue())
         if not validate_audio(temp_file_path):
             return None
         return temp_file_path
     except Exception as e:
+        st.error(f"Error processing uploaded audio: {str(e)}")
         return None
+    finally:
+        if temp_file_path and os.path.exists(temp_file_path) and not st.session_state.get('keep_temp', False):
             os.remove(temp_file_path)
+# Show model information
+def show_model_info():
+    st.sidebar.header("🧠 About the Models")
+    with st.sidebar.expander("Model Details"):
+        st.markdown("""
+        - *Emotion*: DistilBERT (bhadresh-savani/distilbert-base-uncased-emotion)
+        - *Sarcasm*: RoBERTa (cardiffnlp/twitter-roberta-base-irony)
+        - *Speech*: OpenAI Whisper (base)
+        """)
 # Custom audio recorder
 def custom_audio_recorder():
+    st.warning("Recording requires microphone access and a modern browser.")
     audio_recorder_html = """
     <script>
+    let recorder, stream;
     async function startRecording() {
         try {
+            stream = await navigator.mediaDevices.getUserMedia({ audio: true });
             recorder = new MediaRecorder(stream);
             const chunks = [];
             recorder.ondataavailable = e => chunks.push(e.data);
             recorder.onstop = () => {
+                const blob = new Blob(chunks, { type: 'audio/wav' });
                 const reader = new FileReader();
                 reader.onloadend = () => {
                     window.parent.postMessage({type: "streamlit:setComponentValue", value: reader.result}, "*");
                 };
+                reader.readAsDataURL(blob);
                 stream.getTracks().forEach(track => track.stop());
             };
             recorder.start();
+            document.getElementById('record-btn').textContent = 'Stop Recording';
+        } catch (e) { alert('Recording failed: ' + e.message); }
     }
     function stopRecording() {
         recorder.stop();
+        document.getElementById('record-btn').textContent = 'Start Recording';
+    }
+    function toggleRecording() {
+        if (!recorder || recorder.state === 'inactive') startRecording();
+        else stopRecording();
     }
     </script>
+    <button id="record-btn" onclick="toggleRecording()">Start Recording</button>
     """
+    return components.html(audio_recorder_html, height=100)
+# Display analysis results
 def display_analysis_results(transcribed_text):
+    emotions_dict, top_emotion, emotion_map, sentiment = perform_emotion_detection(transcribed_text)
+    is_sarcastic, sarcasm_score = perform_sarcasm_detection(transcribed_text)
     st.header("Results")
+    st.text_area("Transcribed Text", transcribed_text, height=100, disabled=True)
+    col1, col2 = st.columns(2)
     with col1:
         st.subheader("Sentiment")
+        st.write(f"{sentiment} ({top_emotion})")
     with col2:
+        st.subheader("Sarcasm")
+        st.write(f"{'Detected' if is_sarcastic else 'Not Detected'} (Score: {sarcasm_score:.2f})")
+    if emotions_dict:
+        fig = px.bar(x=list(emotions_dict.keys()), y=list(emotions_dict.values()), labels={'x': 'Emotion', 'y': 'Score'})
+        st.plotly_chart(fig)
+# Process base64 audio
+def process_base64_audio(base64_data):
+    temp_file_path = None
+    try:
+        audio_bytes = base64.b64decode(base64_data.split(',')[1])
+        temp_file_path = os.path.join(tempfile.gettempdir(), f"rec_{int(time.time())}.wav")
+        with open(temp_file_path, "wb") as f:
+            f.write(audio_bytes)
+        if not validate_audio(temp_file_path):
+            return None
+        return temp_file_path
+    except Exception as e:
+        st.error(f"Error processing recorded audio: {str(e)}")
+        return None
+    finally:
+        if temp_file_path and os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
+# Main App Logic
 def main():
+    tab1, tab2 = st.tabs(["Upload Audio", "Record Audio"])
     with tab1:
+        audio_file = st.file_uploader("Upload Audio", type=["wav", "mp3", "ogg"])
         if audio_file:
+            st.audio(audio_file)
+            if st.button("Analyze Uploaded Audio"):
+                with st.spinner("Analyzing..."):
+                    temp_path = process_uploaded_audio(audio_file)
+                    if temp_path:
+                        text = transcribe_audio(temp_path)
+                        if text:
+                            display_analysis_results(text)
+    with tab2:
+        audio_data = custom_audio_recorder()
+        if audio_data and st.button("Analyze Recorded Audio"):
+            with st.spinner("Analyzing..."):
+                temp_path = process_base64_audio(audio_data)
                 if temp_path:
                     text = transcribe_audio(temp_path)
                     if text:
                         display_analysis_results(text)
+    show_model_info()
 if __name__ == "__main__":
+    main()