Spaces:

YashsharmaPhD
/

Single_Call_Sentiment_Plot

Sleeping

App Files Files Community

YashsharmaPhD commited on Mar 6, 2025

Commit

828e49c

verified ·

1 Parent(s): d791332

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -94

app.py CHANGED Viewed

@@ -4,50 +4,35 @@ import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
 from pydub import AudioSegment
-from transformers import pipeline
 import os
 import librosa.display
 import whisper
-import textwrap
 from collections import Counter
 from wordcloud import WordCloud
-# Load pre-trained sentiment analysis model
-sentiment_analyzer = pipeline("sentiment-analysis")
 # Load Whisper model
 whisper_model = whisper.load_model("base")
-# Positive & Negative Word Lists
-positive_words = ["good", "excellent", "happy", "positive", "great", "success", "love", "joy", "fantastic"]
-negative_words = ["bad", "poor", "angry", "negative", "sad", "failure", "hate", "terrible", "awful"]
 # Streamlit UI
 st.title("🎤 Audio Sentiment & Feature Analysis")
 st.write("Upload an MP3 file to analyze its sentiment and audio features.")
-# Upload audio file
 uploaded_file = st.file_uploader("Choose an MP3 file", type=["mp3"])
-# Function to split transcriptions into chunks (≤512 tokens)
-def split_text_into_chunks(text, max_length=512):
-    """Splits text into smaller chunks for sentiment analysis."""
-    words = text.split()
-    chunks = []
-    while words:
-        chunk = words[:max_length]
-        chunks.append(" ".join(chunk))
-        words = words[max_length:]
-    return chunks
-# Function to extract words and categorize them
-def extract_words_from_text(text):
-    """Extracts words and categorizes them as positive or negative."""
-    words = text.lower().split()
-    good_words = [word for word in words if word in positive_words]
-    bad_words = [word for word in words if word in negative_words]
-    return good_words, bad_words
 if uploaded_file:
     file_path = f"temp/{uploaded_file.name}"
     os.makedirs("temp", exist_ok=True)
@@ -63,93 +48,39 @@ if uploaded_file:
     # Load audio
     y, sr = librosa.load(wav_path, sr=None)
-    # Extract MFCCs (Mel-frequency cepstral coefficients)
     mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
-    mfccs_mean = np.mean(mfccs, axis=1)
     # Transcribe with Whisper
     result = whisper_model.transcribe(wav_path)
     transcribed_text = result["text"]
-    # Split transcription into smaller chunks for sentiment analysis
-    text_chunks = split_text_into_chunks(transcribed_text)
-    # Analyze sentiment for each chunk and determine overall sentiment
-    sentiment_labels = []
-    for chunk in text_chunks:
-        sentiment_result = sentiment_analyzer(chunk)
-        sentiment_labels.append(sentiment_result[0]["label"])
-    # Majority voting for overall sentiment
-    sentiment_counts = Counter(sentiment_labels)
-    overall_sentiment = max(sentiment_counts, key=sentiment_counts.get)
-    sentiment_color = "green" if overall_sentiment == "POSITIVE" else "red"
-    # Extract words and categorize them
-    good_words, bad_words = extract_words_from_text(transcribed_text)
-    # Display sentiment result
     st.subheader("📊 Sentiment Analysis Result")
-    st.markdown(f"**Overall Sentiment:** <span style='color:{sentiment_color}; font-size:20px;'>{overall_sentiment}</span>", unsafe_allow_html=True)
-    # Display Positive & Negative Words in a Table
-    st.subheader("🗣️ Positive & Negative Words in Transcription")
-    col1, col2 = st.columns(2)
-    with col1:
-        st.markdown("### ✅ Good Words")
-        st.write(", ".join(good_words) if good_words else "No good words detected.")
-    with col2:
-        st.markdown("### ❌ Bad Words")
-        st.write(", ".join(bad_words) if bad_words else "No bad words detected.")
     # Display full transcription
     st.subheader("📝 Full Transcription")
     st.write(transcribed_text)
-    # 1️⃣ Sentiment Trend Over Time
-    sentiment_numeric = [1 if s == "POSITIVE" else -1 for s in sentiment_labels]
-    fig, ax = plt.subplots(figsize=(8, 4))
-    ax.scatter(range(len(sentiment_numeric)), sentiment_numeric, c=sentiment_numeric, cmap="coolwarm")
-    ax.set_title("Sentiment Trend (Per Chunk)")
-    ax.set_xticks(range(0, len(sentiment_labels), max(1, len(sentiment_labels)//5)))
-    ax.set_yticks([-1, 1], labels=["Negative", "Positive"])
-    st.pyplot(fig)
-    # 2️⃣ MFCC Heatmap
     fig, ax = plt.subplots(figsize=(10, 4))
     sns.heatmap(mfccs, cmap="coolwarm", xticklabels=False, yticklabels=False)
     ax.set_title("MFCC Heatmap")
     st.pyplot(fig)
-    # 3️⃣ Waveform Plot
-    fig, ax = plt.subplots(figsize=(10, 4))
-    librosa.display.waveshow(y, sr=sr, alpha=0.5)
-    ax.set_title("Waveform of Audio")
-    st.pyplot(fig)
-    # 4️⃣ Spectrogram
-    fig, ax = plt.subplots(figsize=(10, 4))
-    D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
-    librosa.display.specshow(D, sr=sr, x_axis="time", y_axis="log", cmap="coolwarm")
-    ax.set_title("Spectrogram")
-    st.pyplot(fig)
-    # 5️⃣ Positive vs Negative Word Count Bar Chart
-    fig, ax = plt.subplots(figsize=(6, 4))
-    ax.bar(["Positive Words", "Negative Words"], [len(good_words), len(bad_words)], color=["green", "red"])
-    ax.set_title("Positive vs Negative Word Count")
-    st.pyplot(fig)
-    # 6️⃣ Word Cloud of Transcription
     wordcloud = WordCloud(width=800, height=400, background_color="white").generate(transcribed_text)
     fig, ax = plt.subplots(figsize=(10, 5))
     ax.imshow(wordcloud, interpolation="bilinear")
     ax.axis("off")
     ax.set_title("Word Cloud of Transcription")
     st.pyplot(fig)
     # Clean up temp files
     os.remove(wav_path)
     os.remove(file_path)

 import matplotlib.pyplot as plt
 import seaborn as sns
 from pydub import AudioSegment
+from transformers import T5Tokenizer, T5ForConditionalGeneration
 import os
 import librosa.display
 import whisper
 from collections import Counter
 from wordcloud import WordCloud
+import torch
+# Load T5 model and tokenizer
+tokenizer = T5Tokenizer.from_pretrained("t5-small")
+model = T5ForConditionalGeneration.from_pretrained("t5-small")
+def analyze_sentiment_t5(text):
+    """Analyzes sentiment using the T5 model."""
+    input_text = f"sst2 sentence: {text}"  # Formatting input for T5 model
+    input_ids = tokenizer.encode(input_text, return_tensors="pt")
+    output = model.generate(input_ids)
+    sentiment = tokenizer.decode(output[0], skip_special_tokens=True)
+    return "POSITIVE" if "positive" in sentiment.lower() else "NEGATIVE"
 # Load Whisper model
 whisper_model = whisper.load_model("base")
 # Streamlit UI
 st.title("🎤 Audio Sentiment & Feature Analysis")
 st.write("Upload an MP3 file to analyze its sentiment and audio features.")
 uploaded_file = st.file_uploader("Choose an MP3 file", type=["mp3"])
 if uploaded_file:
     file_path = f"temp/{uploaded_file.name}"
     os.makedirs("temp", exist_ok=True)
     # Load audio
     y, sr = librosa.load(wav_path, sr=None)
+    # Extract MFCCs
     mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
     # Transcribe with Whisper
     result = whisper_model.transcribe(wav_path)
     transcribed_text = result["text"]
+    # Analyze sentiment
+    sentiment = analyze_sentiment_t5(transcribed_text)
+    sentiment_color = "green" if sentiment == "POSITIVE" else "red"
+    # Display results
     st.subheader("📊 Sentiment Analysis Result")
+    st.markdown(f"**Overall Sentiment:** <span style='color:{sentiment_color}; font-size:20px;'>{sentiment}</span>", unsafe_allow_html=True)
     # Display full transcription
     st.subheader("📝 Full Transcription")
     st.write(transcribed_text)
+    # 1️⃣ MFCC Heatmap
     fig, ax = plt.subplots(figsize=(10, 4))
     sns.heatmap(mfccs, cmap="coolwarm", xticklabels=False, yticklabels=False)
     ax.set_title("MFCC Heatmap")
     st.pyplot(fig)
+    # 2️⃣ Word Cloud
     wordcloud = WordCloud(width=800, height=400, background_color="white").generate(transcribed_text)
     fig, ax = plt.subplots(figsize=(10, 5))
     ax.imshow(wordcloud, interpolation="bilinear")
     ax.axis("off")
     ax.set_title("Word Cloud of Transcription")
     st.pyplot(fig)
     # Clean up temp files
     os.remove(wav_path)
     os.remove(file_path)