Spaces:

YashsharmaPhD
/

Single_Call_Sentiment_Plot

Sleeping

App Files Files Community

YashsharmaPhD commited on Mar 4, 2025

Commit

8941721

verified ·

1 Parent(s): e78469c

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -127

app.py CHANGED Viewed

@@ -6,16 +6,20 @@ import seaborn as sns
 from pydub import AudioSegment
 from transformers import pipeline
 import os
-import matplotlib.patches as patches
 import librosa.display
-import whisper  # Import Whisper for speech-to-text
 # Load pre-trained sentiment analysis model
 sentiment_analyzer = pipeline("sentiment-analysis")
-# Load Whisper model (small for faster performance)
 whisper_model = whisper.load_model("base")
 # Streamlit UI
 st.title("🎤 Audio Sentiment & Feature Analysis")
 st.write("Upload an MP3 file to analyze its sentiment and audio features.")
@@ -23,145 +27,45 @@ st.write("Upload an MP3 file to analyze its sentiment and audio features.")
 # Upload audio file
 uploaded_file = st.file_uploader("Choose an MP3 file", type=["mp3"])
-# Function to analyze sentiment over time
-def analyze_sentiment_over_time(y, sr, chunk_duration=10):
-    """
-    Splits audio into chunks, transcribes, and performs sentiment analysis.
-    Args:
-        y: The audio waveform.
-        sr: Sample rate.
-        chunk_duration: Duration of each chunk in seconds (default: 10 sec).
-    Returns:
-        A list of sentiment labels over time.
-    """
-    chunk_length = chunk_duration * sr  # Convert chunk duration to samples
-    total_chunks = len(y) // chunk_length  # Number of chunks
-    sentiment_labels = []
-    for i in range(total_chunks):
-        start_sample = i * chunk_length
-        end_sample = start_sample + chunk_length
-        chunk_audio = y[start_sample:end_sample]
-        # Convert chunk to WAV
-        temp_wav_path = f"temp_chunk_{i}.wav"
-        librosa.output.write_wav(temp_wav_path, chunk_audio, sr)
-        # Transcribe with Whisper
-        result = whisper_model.transcribe(temp_wav_path)
-        os.remove(temp_wav_path)  # Remove temporary WAV file
-        transcribed_text = result["text"]
-        # Run sentiment analysis on transcribed text (if available)
-        if transcribed_text.strip():
-            sentiment_result = sentiment_analyzer(transcribed_text)
-            sentiment_labels.append(sentiment_result[0]["label"])
-        else:
-            sentiment_labels.append("NEUTRAL")
-    return sentiment_labels
-# Function to process audio and get sentiment
-def analyze_audio(file_path):
     # Convert MP3 to WAV
     audio = AudioSegment.from_mp3(file_path)
     wav_path = file_path.replace(".mp3", ".wav")
     audio.export(wav_path, format="wav")
-    # Load audio
-    y, sr = librosa.load(wav_path, sr=None)
-    # Extract MFCCs (Mel-frequency cepstral coefficients)
-    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
-    mfccs_mean = np.mean(mfccs, axis=1)
-    # Transcribe audio
     result = whisper_model.transcribe(wav_path)
     transcribed_text = result["text"]
-    # Run sentiment analysis
-    if transcribed_text.strip():
-        sentiment_result = sentiment_analyzer(transcribed_text)
-    else:
-        sentiment_result = [{"label": "NEUTRAL", "score": 0.0}]
-    os.remove(wav_path)  # Remove WAV file after processing
-    return y, sr, sentiment_result[0], mfccs, mfccs_mean, transcribed_text
-# Function to extract words from audio using Whisper
-def extract_words_from_audio(file_path):
-    # Convert MP3 to WAV
-    audio = AudioSegment.from_mp3(file_path)
-    wav_path = file_path.replace(".mp3", ".wav")
-    audio.export(wav_path, format="wav")
-    # Transcribe audio using Whisper
-    result = whisper_model.transcribe(wav_path, word_timestamps=True)
-    # Extract words and timestamps
-    words = []
-    for segment in result['segments']:
-        for word_info in segment['words']:
-            words.append({"word": word_info['word'], "start_time": word_info['start'], "end_time": word_info['end']})
-    os.remove(wav_path)
-    return words, result['text']
-# Process and plot if a file is uploaded
-if uploaded_file:
-    file_path = f"temp/{uploaded_file.name}"
-    os.makedirs("temp", exist_ok=True)  # Ensure temp directory exists
-    with open(file_path, "wb") as f:
-        f.write(uploaded_file.getbuffer())
-    # Analyze sentiment & extract features
-    y, sr, sentiment, mfccs, mfccs_mean, transcribed_text = analyze_audio(file_path)
-    # Extract words from audio
-    words_from_audio, _ = extract_words_from_audio(file_path)
-    # Categorize words
-    good_words = [w['word'] for w in words_from_audio if w['word'].lower() in ['good', 'excellent', 'positive', 'great', 'happy', 'success']]
-    negative_words = [w['word'] for w in words_from_audio if w['word'].lower() in ['bad', 'negative', 'poor', 'angry', 'sad', 'failure']]
-    # Determine sentiment color
-    sentiment_label = sentiment['label']
-    sentiment_color = "green" if sentiment_label == "POSITIVE" else "red"
-    # Display sentiment
-    st.subheader("📊 Sentiment Analysis Result")
-    st.markdown(f"**Sentiment:** <span style='color:{sentiment_color}; font-size:20px;'>{sentiment_label}</span>", unsafe_allow_html=True)
-    st.write(f"**Confidence:** {sentiment['score']:.2f}")
-    # Analyze sentiment over time
-    sentiment_scores = analyze_sentiment_over_time(y, sr)
-    sentiment_numeric = [1 if s == "POSITIVE" else -1 for s in sentiment_scores]
-    # Plot sentiment trend
-    fig, ax = plt.subplots(figsize=(8, 4))
-    ax.scatter(range(len(sentiment_numeric)), sentiment_numeric, c=sentiment_numeric, cmap="coolwarm")
-    ax.set_title("Sentiment Trend (10-sec intervals)")
-    ax.set_xticks(range(0, len(sentiment_scores), max(1, len(sentiment_scores)//5)))
-    ax.set_yticks([-1, 1], labels=["Negative", "Positive"])
-    st.pyplot(fig)
-    # Display positive & negative words
-    st.subheader("🗣️ Positive and Negative Words in Audio")
     col1, col2 = st.columns(2)
     with col1:
-        st.markdown("### Good Words")
         st.write(", ".join(good_words) if good_words else "No good words detected.")
-    with col2:
-        st.markdown("### Negative Words")
-        st.write(", ".join(negative_words) if negative_words else "No negative words detected.")
-    # Display full transcription
-    st.subheader("📝 Full Transcription")
-    st.write(transcribed_text)
-    # Clean up temp file
     os.remove(file_path)

 from pydub import AudioSegment
 from transformers import pipeline
 import os
 import librosa.display
+import whisper
+import textwrap
 # Load pre-trained sentiment analysis model
 sentiment_analyzer = pipeline("sentiment-analysis")
+# Load Whisper model
 whisper_model = whisper.load_model("base")
+# Positive & Negative Word Lists
+positive_words = ["good", "excellent", "happy", "positive", "great", "success", "love", "joy", "fantastic"]
+negative_words = ["bad", "poor", "angry", "negative", "sad", "failure", "hate", "terrible", "awful"]
 # Streamlit UI
 st.title("🎤 Audio Sentiment & Feature Analysis")
 st.write("Upload an MP3 file to analyze its sentiment and audio features.")
 # Upload audio file
 uploaded_file = st.file_uploader("Choose an MP3 file", type=["mp3"])
+def extract_words_from_text(text):
+    """Extracts words and categorizes them as positive or negative."""
+    words = text.lower().split()
+    good_words = [word for word in words if word in positive_words]
+    bad_words = [word for word in words if word in negative_words]
+    return good_words, bad_words
+if uploaded_file:
+    file_path = f"temp/{uploaded_file.name}"
+    os.makedirs("temp", exist_ok=True)
+    with open(file_path, "wb") as f:
+        f.write(uploaded_file.getbuffer())
     # Convert MP3 to WAV
     audio = AudioSegment.from_mp3(file_path)
     wav_path = file_path.replace(".mp3", ".wav")
     audio.export(wav_path, format="wav")
+    # Transcribe with Whisper
     result = whisper_model.transcribe(wav_path)
     transcribed_text = result["text"]
+    # Extract words and categorize them
+    good_words, bad_words = extract_words_from_text(transcribed_text)
+    # Display Positive & Negative Words in a Table
+    st.subheader("🗣️ Positive & Negative Words in Transcription")
     col1, col2 = st.columns(2)
     with col1:
+        st.markdown("### ✅ Good Words")
         st.write(", ".join(good_words) if good_words else "No good words detected.")
+    with col2:
+        st.markdown("### ❌ Bad Words")
+        st.write(", ".join(bad_words) if bad_words else "No bad words detected.")
+    # Clean up temp files
+    os.remove(wav_path)
     os.remove(file_path)