import streamlit as st from streamlit_mic_recorder import mic_recorder from google import genai import plotly.express as px import pandas as pd import os import io import tempfile import json from pydub import AudioSegment # Used to ensure WAV format if needed from pathlib import Path # --- Configuration --- st.set_page_config(layout="wide", page_title="Audio Sentiment Analysis") def load_css(file_name): """Loads a CSS file and injects it into the Streamlit app.""" try: css_path = Path(__file__).parent / file_name with open(css_path) as f: st.markdown(f'', unsafe_allow_html=True) # st.info(f"Loaded CSS: {file_name}") # Optional: uncomment for debugging except FileNotFoundError: st.error(f"CSS file not found: {file_name}. Make sure it's in the same directory as app.py.") except Exception as e: st.error(f"Error loading CSS file {file_name}: {e}") st.markdown(""" """, unsafe_allow_html=True) load_css("style.css") import streamlit.components.v1 as components # Inject a script that sends height to the parent window st.markdown("

",unsafe_allow_html=True) st.title("🗣️ Audio Sentiment Analysis") st.markdown(""" Upload a WAV file, record new audio, or use the default example. The app will analyse the sentiment, focusing on the customer if it detects a support call. """) # --- Default File Configuration --- DEFAULT_AUDIO_FILENAME = "default_audio.wav" # MAKE SURE THIS FILE EXISTS! # --- API Key Handling --- api_key = os.getenv("GOOGLE_API_KEY") or st.secrets["GOOGLE_API_KEY"] if not api_key: api_key = st.text_input("Enter your Google Gemini API Key:", type="password") if not api_key: st.warning("Please enter your Gemini API Key to proceed.") st.stop() client = genai.Client(api_key=api_key) def detailed_sentiment_prompt(is_customer_support=None, customer_focus=False): """Generates the prompt for Gemini based on context.""" base_prompt = """ Analyze the sentiment of the provided audio conversation in detail. Consider the following aspects: 1. **Voice Tone:** (e.g., calm, agitated, happy, sad, sarcastic, urgent, monotone) 2. **Voice Intensity:** (e.g., loud, quiet, normal, shouting, whispering) 3. **Speaking Pace:** (e.g., fast, slow, normal, rushed, hesitant) 4. **Specific Emotions:** Identify primary emotions expressed (e.g., frustration, relief, anger, confusion, satisfaction, politeness, impatience). First, determine if this sounds like a customer support interaction (e.g., someone calling a company for help). Respond 'Customer Support: Yes' or 'Customer Support: No'. """ if is_customer_support is None: # Initial analysis phase prompt = base_prompt + """ Based on your determination above, proceed with the sentiment analysis. **Sentiment Timeline:** Provide a timeline of the overall sentiment throughout the conversation. Divide the audio into logical segments (e.g., every 15-20 seconds or by speaker turn if discernible) and assign a sentiment score from -10 (very negative) to +10 (very positive) for each segment. **Output Format:** Structure your entire response strictly as a JSON object with the following keys: - "is_customer_support": (boolean, true if it's customer support, false otherwise) - "analysis_target": (string, "customer only" or "full conversation") - "detailed_report": (string, a comprehensive text report covering tone, intensity, pace, emotions, and overall sentiment trends based on the analysis target) - "sentiment_timeline": (array of numbers, e.g., [2, 1, -5, -3, 0, 4, 6]) """ elif is_customer_support and customer_focus: prompt = base_prompt + """ **Focus:** Since this is identified as a customer support call, focus your analysis *exclusively* on the speech segments likely belonging to the **customer**. Ignore the agent's speech for sentiment scoring and detailed analysis unless it directly influences the customer's reaction. **Sentiment Timeline:** Provide a timeline of the **customer's** sentiment throughout the conversation. Divide the customer's speaking parts into logical segments and assign a sentiment score from -10 (very negative) to +10 (very positive) for each segment. **Output Format:** Structure your entire response strictly as a JSON object with the following keys: - "is_customer_support": true - "analysis_target": "customer only" - "detailed_report": (string, a comprehensive text report covering the *customer's* tone, intensity, pace, emotions, and overall sentiment trends) - "sentiment_timeline": (array of numbers, representing the *customer's* sentiment scores, e.g., [-5, -6, -2, 1, 5]) """ else: # Not customer support, or explicitly analyze full conversation prompt = base_prompt + """ **Focus:** Analyze the sentiment of the **entire conversation**, considering all speakers. **Sentiment Timeline:** Provide a timeline of the overall sentiment throughout the conversation. Divide the audio into logical segments (e.g., every 15-20 seconds or by speaker turn) and assign a sentiment score from -10 (very negative) to +10 (very positive) for each segment. **Output Format:** Structure your entire response strictly as a JSON object with the following keys: - "is_customer_support": false - "analysis_target": "full conversation" - "detailed_report": (string, a comprehensive text report covering tone, intensity, pace, emotions, and overall sentiment trends for the *whole conversation*) - "sentiment_timeline": (array of numbers, representing the *overall* sentiment scores, e.g., [2, 1, -5, -3, 0, 4, 6]) """ return prompt def analyze_audio(audio_bytes, filename="uploaded_audio.wav"): """Sends audio to Gemini and processes the response.""" temp_file_path = None myfile = None # Initialize myfile to None try: # Gemini SDK works best with files. Save bytes to a temporary file. with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile: tmpfile.write(audio_bytes) temp_file_path = tmpfile.name # Upload the file to Gemini myfile = client.files.upload(file=temp_file_path) print(f"{myfile=}") # --- Initial Analysis Phase (Determine if Customer Support) --- initial_prompt = detailed_sentiment_prompt() initial_response = client.models.generate_content( model="gemini-2.0-flash", contents=[myfile, initial_prompt] ) print(f"{initial_response.text=}") # --- Process Initial Response --- try: # Clean potential markdown/code block formatting cleaned_text = initial_response.text.strip().lstrip('```json').rstrip('```') initial_data = json.loads(cleaned_text) is_customer_support = initial_data.get("is_customer_support", False) # --- Second Analysis Phase (Refined based on support type) --- # Decide if we need a second pass to focus on the customer needs_second_pass = is_customer_support if needs_second_pass: refined_prompt = detailed_sentiment_prompt(is_customer_support=True, customer_focus=True) final_response = client.models.generate_content( model="gemini-2.0-flash", contents=[myfile, refined_prompt] ) final_text = final_response.text.strip().lstrip('```json').rstrip('```') analysis_data = json.loads(final_text) else: # Use the results from the first pass if not customer support analysis_data = initial_data # Reuse initial analysis # Validate keys exist report = analysis_data.get("detailed_report", "Report not found in response.") timeline = analysis_data.get("sentiment_timeline", []) target = analysis_data.get("analysis_target", "unknown") return report, timeline, target, is_customer_support except json.JSONDecodeError: st.error("Error: Could not parse Gemini's response as JSON. Raw response:") st.code(initial_response.text if 'initial_response' in locals() else "No initial response captured") if 'final_response' in locals(): st.code(final_response.text) return "Error parsing response.", [], "Error", False except Exception as e: st.error(f"An error occurred during analysis: {e}") return f"Error: {e}", [], "Error", False except Exception as e: st.error(f"An error occurred during file processing or API call: {e}") return f"Error: {e}", [], "Error", False finally: # Clean up the uploaded file on Gemini and the local temp file if myfile: try: client.files.delete(name=myfile.name) except Exception as del_err: st.warning(f"Could not delete uploaded file from Gemini: {del_err}") if temp_file_path and os.path.exists(temp_file_path): os.remove(temp_file_path) def plot_sentiment_timeline(timeline_data): """Generates a Plotly line chart for the sentiment timeline.""" if not timeline_data or not isinstance(timeline_data, list): st.warning("No valid sentiment timeline data to plot.") return None # Ensure data are numbers (handle potential strings if parsing failed slightly) numeric_timeline = [] for item in timeline_data: try: numeric_timeline.append(float(item)) except (ValueError, TypeError): st.warning(f"Skipping non-numeric value in timeline: {item}") # Optionally append a neutral value like 0 or None, or just skip # numeric_timeline.append(0) if not numeric_timeline: st.warning("No numeric sentiment data available after filtering.") return None df = pd.DataFrame({ 'Segment': range(1, len(numeric_timeline) + 1), 'Sentiment Score': numeric_timeline }) fig = px.line(df, x='Segment', y='Sentiment Score', title="Sentiment Progression Over Conversation Segments", markers=True, range_y=[-10.5, 10.5]) # Set Y-axis range fig.update_layout(xaxis_title="Conversation Segment / Time Progression", yaxis_title="Sentiment Score (-10 to +10)") return fig # --- Streamlit UI Elements --- audio_bytes = None file_name = None # --- ADDED "Use Default Example" option --- input_method = st.radio( "Choose audio input method:", ("Upload WAV file", "Use Default Example (Customer support call)"), index=0, key="input_method" ) if input_method == "Upload WAV file": uploaded_file = st.file_uploader("Choose a WAV file", type=['wav'], key="uploader") if uploaded_file is not None: file_name = uploaded_file.name audio_bytes = uploaded_file.getvalue() st.audio(audio_bytes, format='audio/wav') # --- ADDED Logic for Default Example --- elif input_method == "Use Default Example (Customer support call)": default_file_path = DEFAULT_AUDIO_FILENAME # Check if the default file exists in the script's directory if os.path.exists(default_file_path): st.info(f"Using default example file: '{default_file_path}'") try: with open(default_file_path, "rb") as f: audio_bytes = f.read() file_name = os.path.basename(default_file_path) # Display the audio player for the default file st.audio(audio_bytes, format='audio/wav') except Exception as e: st.error(f"Error reading default file '{default_file_path}': {e}") audio_bytes = None # Reset to prevent analysis button file_name = None else: # Handle case where the file is missing st.error(f"Default file not found: '{default_file_path}'.") st.markdown(f"Please make sure a file named `{DEFAULT_AUDIO_FILENAME}` exists in the same directory as the Streamlit script (`app.py`).") # Ensure analysis button doesn't appear if file is missing audio_bytes = None file_name = None # --- Analysis Trigger --- # This part remains the same, it checks if audio_bytes and file_name are set, # regardless of how they were set (upload, record, or default) if audio_bytes and file_name: if st.button(f"Analyze Sentiment for '{file_name}'", key="analyze_button"): col1, col2 = st.columns(2) with col1: st.subheader("📊 Sentiment Analysis Report") with st.spinner("Analyzing audio... This may take a minute or two depending on length."): report, timeline, target, is_cs = analyze_audio(audio_bytes, file_name) st.text_area("Detailed Report",report, height=400) with col2: st.subheader("📈 Sentiment Timeline Plot") if timeline: fig = plot_sentiment_timeline(timeline) if fig: st.plotly_chart(fig, use_container_width=True) else: st.info("Could not generate plot.") else: st.info("No sentiment timeline data available to plot.") # Don't show the button instruction if using default and file is missing elif input_method != "Use Default Example" or os.path.exists(DEFAULT_AUDIO_FILENAME) : st.info("Please provide audio via one of the methods above to begin analysis.") # --- Footer/Info --- st.markdown("---") st.markdown("Powered by ZEGA AI") import streamlit.components.v1 as components components.html( """ """ )