Spaces:

DineshJ96
/

speech-intelligence-app

Sleeping

App Files Files Community

speech-intelligence-app / app.py

DineshJ96

Update app.py

bfb0d04 verified 8 months ago

raw

history blame contribute delete

30.1 kB

	import gradio as gr
	import whisper
	from transformers import pipeline
	import tempfile
	import os
	from pydub import AudioSegment
	import json
	import re
	import shutil # For checking ffmpeg
	import time # For tracking processing time

	# Import load_dotenv from python-dotenv
	from dotenv import load_dotenv

	# Load environment variables from .env file
	load_dotenv()

	# Optional: For Google Gemini API
	try:
	import google.generativeai as genai
	# Load API key from environment (now populated by .env)
	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
	if GEMINI_API_KEY:
	genai.configure(api_key=GEMINI_API_KEY)
	# Using gemini-1.5-flash for general use, as it's often more accessible and capable than gemini-pro.
	gemini_model = genai.GenerativeModel('gemini-1.5-flash')
	print("Google Gemini API loaded with gemini-1.5-flash.")
	else:
	gemini_model = None
	print("GEMINI_API_KEY not found in .env. Google Gemini API will not be used.")
	except ImportError:
	gemini_model = None
	print("google-generativeai not installed. Google Gemini API will not be used.")
	except Exception as e:
	print(f"Could not load Google Gemini API: {e}")
	gemini_model = None

	# Optional: For OpenAI GPT API
	try:
	from openai import OpenAI
	# Load API key from environment (now populated by .env)
	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	if OPENAI_API_KEY:
	openai_client = OpenAI(api_key=OPENAI_API_KEY)
	print("OpenAI API loaded.")
	else:
	openai_client = None
	print("OPENAI_API_KEY not found in .env. OpenAI API will not be used.")
	except ImportError:
	openai_client = None
	print("openai library not installed. OpenAI API will not be used.")
	except Exception as e:
	print(f"Could not load OpenAI API: {e}")
	openai_client = None

	# Global variables to store loaded models
	whisper_model = None
	summarization_pipeline = None
	sentiment_pipeline = None

	# --- Model Loading Function ---
	def load_models():
	"""Loads all necessary models once at startup."""
	global whisper_model, summarization_pipeline, sentiment_pipeline

	print("Loading Whisper ASR model (tiny)...") # Sticking with "tiny" for speed on CPU
	try:
	# Using "tiny" model for much faster inference on CPU.
	# For higher accuracy on diverse audio (e.g., noisy, varied accents), consider:
	# "base.en" (English-only, faster than multilingual "base")
	# "small.en" (better accuracy, slower)
	# "medium.en" or "large" (highest accuracy, much slower, may require GPU)
	whisper_model = whisper.load_model("tiny")
	print("Whisper model (tiny) loaded successfully.")
	except Exception as e:
	print(f"Error loading Whisper model: {e}")
	whisper_model = None

	print("Loading Summarization pipeline (sshleifer/distilbart-cnn-12-6)...")
	try:
	# This model is good for general summarization (fallback if LLMs fail).
	summarization_pipeline = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
	print("Summarization pipeline loaded successfully.")
	except Exception as e:
	print(f"Error loading Summarization pipeline: {e}")
	summarization_pipeline = None

	print("Loading Sentiment Analysis pipeline (distilbert-base-uncased-finetuned-sst-2-english)...")
	try:
	# A common and efficient model for sentiment analysis.
	sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
	print("Sentiment Analysis pipeline loaded successfully.")
	except Exception as e:
	print(f"Error loading Sentiment Analysis pipeline: {e}")
	sentiment_pipeline = None

	# Load models when the script starts
	load_models()

	# --- Helper Functions for Analysis ---

	def analyze_grammar_and_fluency(text):
	"""
	Analyzes text for grammar and spoken English issues using heuristic rules.
	Provides specific correction suggestions and tips for improving clarity and fluency.
	Returns a list of dictionaries {original_phrase, suggestion} and a string of general tips.
	"""
	corrections = []
	# Convert text to lowercase for easier matching, but use original for display
	lower_text = text.lower()

	# Define specific correction rules as (pattern, suggestion_template)
	correction_rules = [
	# Article usage errors (a/an)
	(r'\b(a)\s+([aeiouhAEIOUH]\w*)', r'an \2'), # "a apple" -> "an apple"
	(r'\b(an)\s+([^aeiouhAEIOUH\s]\w*)', r'a \2'), # "an book" -> "a book"
	# Common subject-verb agreement (simplified)
	(r'\b(he\|she\|it\|this\|that)\s+(have)\b', r'\1 has'), # "he have" -> "he has"
	(r'\b(he\|she\|it\|this\|that)\s+(do)\b', r'\1 does'), # "he do" -> "he does"
	(r'\b(we\|you\|they)\s+(has)\b', r'\1 have'), # "we has" -> "we have"
	(r'\b(we\|you\|they)\s+(does)\b', r'\1 do'), # "we does" -> "we do"
	# Double negatives (simplified)
	(r'\b(don\'t\|doesn\'t\|didn\'t)\s+no\b', r'\1 any'), # "don't no" -> "don't any"
	# Repetitive words (e.g., "the the", "I I")
	(r'\b(\w+)\s+\1\b', r'\1'), # "the the" -> "the"
	# Informal contractions (already present, kept for general use)
	(r'\bdon\'t\b', 'do not'),
	(r'\bcan\'t\b', 'cannot'),
	(r'\bwon\'t\b', 'will not'),
	(r'\bit\'s\b', 'it is'),
	(r'\bi\'m\b', 'I am'),
	(r'\byou\'re\b', 'you are'),
	(r'\bthey\'re\b', 'they are'),
	(r'\bwe\'re\b', 'we are'),
	(r'\bhe\'s\b', 'he is'),
	(r'\bshe\'s\b', 'she is'),
	# Overuse of "very" (already present, kept for general use)
	(r'\bvery\s+(good\|nice\|bad\|big\|small)', 'consider a stronger adjective like "excellent", "pleasant", "terrible", "large", "tiny"'),
	# Simplified detection of comma splices / run-on sentences (very basic)
	# This looks for a comma followed by a space and a new independent clause without a coordinating conjunction.
	# This is very hard to do accurately with regex alone and is a rough heuristic.
	(r'(\w+),\s(\w+[^,;]?\s*(?:i\|you\|he\|she\|it\|we\|they)\s+\w+)', r'\1. \2'), # "I went, he came" -> "I went. He came"
	# Common informal phrases/fillers that could be more formal or concise
	(r'\bat the end of the day\b', 'ultimately'),
	(r'\bto be honest\b', 'honestly'),
	(r'\byou know\b', 'consider rephrasing for conciseness'),
	(r'\bi mean\b', 'consider rephrasing for conciseness'),
	(r'\bkind of\b', 'somewhat'),
	(r'\bsort of\b', 'somewhat'),
	]

	# Apply rules
	for original_pattern, suggestion_template in correction_rules:
	match = re.search(original_pattern, lower_text, re.IGNORECASE)
	if match:
	# Extract the exact matched text from the original transcript for display
	start_idx, end_idx = match.span()
	original_phrase = text[start_idx:end_idx]

	# Construct the suggestion. If the template has backreferences, use re.sub on the original phrase.
	if '\\' in suggestion_template:
	suggestion = re.sub(original_pattern, suggestion_template, original_phrase, flags=re.IGNORECASE)
	else:
	suggestion = suggestion_template

	corrections.append({
	"original_phrase": original_phrase,
	"suggestion": suggestion
	})

	# Add general tips regardless of specific corrections found
	general_tips = []
	general_tips.append("\nGeneral Tips for Improving Clarity and Fluency:")
	general_tips.append("- Pace Yourself: Speak at a moderate speed, allowing for natural pauses.")
	general_tips.append("- Vary Sentence Structure: Use a mix of simple, compound, and complex sentences to keep listeners engaged.")
	general_tips.append("- Active Voice: Prefer active voice over passive voice for stronger and clearer statements.")
	general_tips.append("- Conciseness: Avoid unnecessary words or phrases that don't add meaning.")
	general_tips.append("- Practice Intonation: Pay attention to the rise and fall of your voice to convey emotion and emphasize key points.")
	general_tips.append("- Enunciate Clearly: Speak clearly and articulate your words well.")

	return corrections, "\n".join(general_tips)


	def detect_filler_words(text):
	"""
	Detects common speech filler words and awkward phrasing.
	Provides recommendations for reducing them.
	"""
	filler_words = [
	"um", "uh", "like", "you know", "so", "actually", "basically",
	"literally", "I mean", "right", "okay", "well", "kind of", "sort of"
	]
	awkward_phrases = [
	"at the end of the day", "to be honest", "just saying", "it is what it is",
	"long story short", "believe it or not", "for all intents and purposes"
	]

	detected_fillers = {}
	detected_awkward_phrases = {}

	# Normalize text for easier detection (lowercase, remove most punctuation but keep spaces)
	normalized_text = re.sub(r'[^\w\s]', '', text.lower())

	# Detect filler words with word boundaries
	for filler in filler_words:
	count = len(re.findall(r'\b' + re.escape(filler) + r'\b', normalized_text))
	if count > 0:
	detected_fillers[filler] = count

	# Detect awkward phrases (no word boundaries needed as phrases are multi-word)
	for phrase in awkward_phrases:
	count = len(re.findall(re.escape(phrase), normalized_text))
	if count > 0:
	detected_awkward_phrases[phrase] = count

	output = []
	if detected_fillers:
	output.append("Detected Filler Words:")
	for word, count in detected_fillers.items():
	output.append(f"- '{word}': {count} time(s)")
	else:
	output.append("- No common filler words detected by this analysis.")

	if detected_awkward_phrases:
	output.append("\nDetected Awkward Phrases:")
	for phrase, count in detected_awkward_phrases.items():
	output.append(f"- '{phrase}': {count} time(s)")
	else:
	output.append("- No common awkward phrases detected by this analysis.")

	output.append("\nRecommendations for Reducing Filler Words and Awkward Phrasing:")
	output.append("- Pause Instead: Replace filler words with brief, intentional pauses to collect your thoughts. This makes you sound more confident and thoughtful.")
	output.append("- Slow Down: Speaking at a slightly slower, more deliberate pace can reduce the perceived need for fillers and improve overall clarity.")
	output.append("- Practice Self-Awareness: Record yourself speaking and listen back to identify your own filler habits. Awareness is the first step to change.")
	output.append("- Pre-plan Key Points: For important conversations or presentations, outline your main points to reduce hesitation and the urge to use fillers.")
	output.append("- Breathe: Use natural pauses to take a breath, which also helps regulate your speaking pace and reduces the likelihood of rushing.")
	output.append("- Focus on the Message: Concentrate on conveying your message clearly and concisely rather than feeling the need to fill every silence.")

	return "\n".join(output)

	# --- LLM Integration Functions ---

	def summarize_with_llm(text, model_type="gemini"):
	"""
	Summarizes text using a cloud LLM (Gemini or OpenAI GPT).
	Args:
	text (str): The text to summarize.
	model_type (str): 'gemini' or 'openai' to specify which LLM to use.
	Returns:
	str: The summarized text, or None if an error occurs or model is not available.
	"""
	if model_type == "gemini" and gemini_model:
	try:
	prompt = f"Please summarize the following text concisely and clearly, focusing on the main points:\n\n{text}"
	print("Calling Gemini for summarization...")
	response = gemini_model.generate_content(prompt)
	return response.text
	except Exception as e:
	print(f"Error summarizing with Gemini: {e}")
	return None
	elif model_type == "openai" and openai_client:
	try:
	print("Calling OpenAI GPT for summarization...")
	response = openai_client.chat.completions.create(
	model="gpt-3.5-turbo", # Or "gpt-4" if available and desired
	messages=[
	{"role": "system", "content": "You are a helpful assistant that summarizes text concisely."},
	{"role": "user", "content": f"Please summarize the following text concisely and clearly, focusing on the main points:\n\n{text}"}
	],
	max_tokens=200, # Adjust max tokens as needed for summary length
	temperature=0.7
	)
	return response.choices[0].message.content
	except Exception as e:
	print(f"Error summarizing with OpenAI: {e}")
	return None
	return None

	def analyze_grammar_with_llm(text, model_type="gemini"):
	"""
	Analyzes grammar and provides suggestions using a cloud LLM.
	Args:
	text (str): The text to analyze.
	model_type (str): 'gemini' or 'openai' to specify which LLM to use.
	Returns:
	str: The grammar analysis with suggestions, or None if an error occurs or model is not available.
	"""
	if model_type == "gemini" and gemini_model:
	try:
	prompt = f"Please analyze the following text for grammar, spelling, and spoken English issues. Provide specific, actionable correction suggestions and tips for improving clarity, conciseness, and fluency in spoken communication. Format your response clearly with bullet points for each suggestion and tip:\n\n{text}"
	print("Calling Gemini for grammar analysis...")
	response = gemini_model.generate_content(prompt)
	return response.text
	except Exception as e:
	print(f"Error analyzing grammar with Gemini: {e}")
	return None
	elif model_type == "openai" and openai_client:
	try:
	print("Calling OpenAI GPT for grammar analysis...")
	response = openai_client.chat.completions.create(
	model="gpt-3.5-turbo", # Or "gpt-4" if available and desired
	messages=[
	{"role": "system", "content": "You are a helpful assistant that analyzes text for grammar, spelling, and spoken English issues."},
	{"role": "user", "content": f"Please analyze the following text for grammar, spelling, and spoken English issues. Provide specific, actionable correction suggestions and tips for improving clarity, conciseness, and fluency in spoken communication. Format your response clearly with bullet points for each suggestion and tip:\n\n{text}"}
	],
	max_tokens=400, # Adjust max tokens as needed for detailed analysis
	temperature=0.7
	)
	return response.choices[0].message.content
	except Exception as e:
	print(f"Error analyzing grammar with OpenAI: {e}")
	return None
	return None

	# --- Main Processing Function ---

	def process_audio(audio_file_path):
	"""
	Processes an uploaded audio file to provide speech intelligence features.
	Args:
	audio_file_path (str): The file path to the uploaded audio.
	Returns:
	str: A Markdown formatted string containing the full report.
	"""
	start_time = time.time() # Start time tracking

	output_sections = []
	temp_audio_file_name = None # To store the name of a temporary converted file

	try:
	current_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(start_time))
	output_sections.append(f"## Processing Started: {current_time_str}")
	print(f"Processing started at: {current_time_str}")

	if audio_file_path is None:
	print("ERROR: No audio file path received. Input was None.")
	output_sections.append("### ❌ Error: No audio input received.")
	output_sections.append("Please ensure you've recorded audio or uploaded a file, and then click the 'Process Audio' button.")
	return "\n".join(output_sections)

	print(f"Received audio file path: {audio_file_path}")
	output_sections.append(f"Input Audio File: `{os.path.basename(audio_file_path)}`")


	# Check if ffmpeg is available
	if shutil.which("ffmpeg") is None:
	output_sections.append("Error: FFmpeg is not found in your system's PATH. `pydub` (used for audio conversion) requires FFmpeg. Please install FFmpeg and add it to your system's PATH environmental variable to use this application. You can download it from https://ffmpeg.org/download.html")
	print("FFmpeg not found in PATH.")
	return "\n".join(output_sections)
	else:
	print("FFmpeg found in PATH.")

	# 1. Audio File Handling and Conversion (Always convert to WAV for consistency)
	# This makes handling of microphone inputs (often WebM) more robust.
	base_name, ext = os.path.splitext(audio_file_path)
	print(f"Original file extension: {ext}")
	output_sections.append(f"Original audio format detected: `{ext}`")

	# Create a temporary WAV file for processing, regardless of original format
	temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
	temp_audio_file_name = temp_audio_file.name
	temp_audio_file.close() # Close the file handle so pydub can open it

	output_sections.append(f"Attempting to convert audio to WAV for consistent processing...")
	print(f"Attempting to convert {audio_file_path} to WAV at {temp_audio_file_name}")
	try:
	audio = AudioSegment.from_file(audio_file_path)
	audio.export(temp_audio_file_name, format="wav")
	audio_to_process = temp_audio_file_name
	output_sections.append(f"Successfully converted audio to WAV.")
	print(f"Converted audio to temporary WAV file: {temp_audio_file_name}")
	except Exception as e:
	output_sections.append(f"Error during audio conversion with pydub: Could not convert audio file to WAV. This might be due to an unsupported input format, a corrupt file, or an issue with FFmpeg.")
	output_sections.append(f"Details: `{e}`")
	print(f"Error during audio conversion with pydub: {e}")
	import traceback
	print(traceback.format_exc()) # Print full traceback for conversion error
	# Clean up the partially created temp file if conversion fails
	if os.path.exists(temp_audio_file_name):
	os.remove(temp_audio_file_name)
	return "\n".join(output_sections)

	# 2. Transcribe Audio (ASR)
	output_sections.append("\n## 🎤 Audio Transcription")
	transcript = "Transcription failed or model not loaded."
	if whisper_model:
	try:
	print(f"Starting transcription of {audio_to_process}...")
	result = whisper_model.transcribe(audio_to_process)
	transcript = result["text"]
	output_sections.append(f"```\n{transcript}\n```")
	print("Transcription complete.")
	except Exception as e:
	transcript = "Transcription failed due to an error."
	output_sections.append(f"Error during transcription: {e}")
	print(f"Transcription error: {e}")
	else:
	output_sections.append("Whisper ASR model not loaded. Cannot transcribe audio.")

	# If transcription failed, stop processing further as subsequent steps depend on it.
	if transcript == "Transcription failed due to an error." or transcript == "Whisper ASR model not loaded. Cannot transcribe audio.":
	return "\n".join(output_sections)

	# 3. Summarize Transcript
	output_sections.append("\n## 📝 Summary")
	summary = None
	# Prioritize cloud LLM for enhanced summarization if available
	if gemini_model:
	summary = summarize_with_llm(transcript, model_type="gemini")
	if summary:
	output_sections.append(f"Generated by Google Gemini (Advanced Model):\n```\n{summary}\n```")
	else:
	output_sections.append("(Note: Google Gemini summarization failed. Please check your API key and quota in Hugging Face Space secrets. Falling back to open-source model.)")
	if not summary and openai_client: # Try OpenAI if Gemini not used or failed
	summary = summarize_with_llm(transcript, model_type="openai")
	if summary:
	output_sections.append(f"Generated by OpenAI GPT (Advanced Model):\n```\n{summary}\n```")
	else:
	output_sections.append("(Note: OpenAI GPT summarization failed. Please check your API key and quota in Hugging Face Space secrets. Falling back to open-source model.)")


	# Fallback to open-source summarization if no cloud LLM was used or they failed
	if not summary and summarization_pipeline:
	try:
	# Summarization models have token limits. Truncate if necessary for the open-source model.
	max_input_length = summarization_pipeline.model.config.max_position_embeddings
	if len(transcript) > max_input_length:
	truncated_transcript = transcript[:max_input_length * 2]
	output_sections.append(f"Note: Transcript might be truncated internally by the open-source summarization model due to token limit ({max_input_length} tokens).")
	else:
	truncated_transcript = transcript

	summaries = summarization_pipeline(truncated_transcript, max_length=150, min_length=30, do_sample=False)
	summary = summaries[0]['summary_text']
	output_sections.append(f"Generated by Open-Source Model:\n```\n{summary}\n```")
	except Exception as e:
	output_sections.append(f"Error during open-source summarization: {e}")
	elif not summary:
	output_sections.append("Summarization model not loaded or failed. Cannot provide a summary.")

	# 4. Grammar and Spoken English Analysis
	output_sections.append("\n## 🗣️ Grammar & Spoken English Analysis")
	grammar_analysis_output = None
	# Prioritize cloud LLM for enhanced grammar analysis if available
	if gemini_model:
	grammar_analysis_output = analyze_grammar_with_llm(transcript, model_type="gemini")
	if grammar_analysis_output:
	output_sections.append(f"Generated by Google Gemini (Advanced Model):\n{grammar_analysis_output}")
	else:
	output_sections.append("(Note: Google Gemini grammar analysis failed. Please check your API key and quota in Hugging Face Space secrets. Falling back to heuristic rules.)")
	if not grammar_analysis_output and openai_client: # Try OpenAI if Gemini not used or failed
	grammar_analysis_output = analyze_grammar_with_llm(transcript, model_type="openai")
	if grammar_analysis_output:
	output_sections.append(f"Generated by OpenAI GPT (Advanced Model):\n{grammar_analysis_output}")
	else:
	output_sections.append("(Note: OpenAI GPT grammar analysis failed. Please check your API key and quota in Hugging Face Space secrets. Falling back to heuristic rules.)")

	# Fallback to heuristic rules if no cloud LLM was used or they failed
	if not grammar_analysis_output:
	output_sections.append("Generated by Heuristic Rules (Basic Analysis):")
	corrections, general_tips = analyze_grammar_and_fluency(transcript)
	if corrections:
	output_sections.append("\nSpecific Suggestions:")
	for item in corrections:
	output_sections.append(f"- Original: `{item['original_phrase']}`\n Suggestion: `{item['suggestion']}`")
	else:
	output_sections.append("- No specific grammar or fluency issues detected by basic heuristics.")
	output_sections.append(general_tips)


	# 5. Filler Word Detection
	output_sections.append("\n## 🚫 Filler Word & Awkward Phrasing Detection")
	output_sections.append(detect_filler_words(transcript))

	# 6. Sentiment Analysis
	output_sections.append("\n## 😊 Sentiment Analysis")
	if sentiment_pipeline:
	try:
	sentiments = sentiment_pipeline(transcript, truncation=True)
	for sent in sentiments:
	label = sent['label']
	score = sent['score']
	explanation = ""
	if label == "POSITIVE":
	explanation = "The overall tone of the speech is positive, expressing approval, agreement, or optimism."
	elif label == "NEGATIVE":
	explanation = "The overall tone of the speech is negative, expressing disapproval, disagreement, or pessimism."
	elif label == "NEUTRAL":
	explanation = "The overall tone of the speech is neutral, indicating no strong positive or negative emotion."
	output_sections.append(f"- Label: `{label}` (Confidence Score: `{score:.2f}`)")
	output_sections.append(f" - Explanation: {explanation}")
	except Exception as e:
	output_sections.append(f"Error during sentiment analysis: {e}")
	else:
	output_sections.append("Sentiment analysis model not loaded. Cannot perform sentiment analysis.")

	except Exception as e:
	# Catch any unexpected errors during the entire process
	output_sections.append(f"\n## ❌ An unexpected error occurred during processing: {e}")
	import traceback
	output_sections.append(f"```\n{traceback.format_exc()}\n```")
	finally:
	# Ensure temporary converted audio file is cleaned up
	if temp_audio_file_name and os.path.exists(temp_audio_file_name):
	os.remove(temp_audio_file_name)
	print(f"Cleaned up temporary converted audio file: {temp_audio_file_name}")
	# Gradio automatically cleans up the initial uploaded file.

	end_time = time.time() # End time tracking
	processing_time = end_time - start_time
	output_sections.append(f"\n## ✅ Processing Complete")
	output_sections.append(f"Total Processing Time: `{processing_time:.2f} seconds`")

	return "\n".join(output_sections)

	# --- Gradio Interface ---

	# Custom CSS for aesthetics
	custom_css = """
	body {
	font-family: 'Inter', sans-serif;
	background-color: #f0f2f5; /* Light gray background */
	display: flex;
	justify-content: center;
	align-items: center;
	min-height: 100vh;
	margin: 0;
	padding: 20px;
	box-sizing: border-box;
	}
	.gradio-container {
	max-width: 900px;
	width: 100%;
	background-color: #ffffff; /* White card background */
	border-radius: 15px; /* Rounded corners */
	box-shadow: 0 10px 30px rgba(0, 0, 0, 0.1); /* Soft shadow */
	padding: 30px;
	box-sizing: border-box;
	}
	h1, h2 {
	color: #2c3e50; /* Dark blue-gray for headings */
	text-align: center;
	margin-bottom: 20px;
	}
	.gr-button {
	background-color: #3498db !important; /* Blue button */
	color: white !important;
	border-radius: 8px !important;
	padding: 10px 20px !important;
	font-weight: bold !important;
	transition: background-color 0.3s ease, transform 0.2s ease !important;
	box-shadow: 0 4px 10px rgba(52, 152, 219, 0.3);
	}
	.gr-button:hover {
	background-color: #2980b9 !important; /* Darker blue on hover */
	transform: translateY(-2px);
	}
	.gr-audio-input {
	border: 2px dashed #a0a0a0 !important; /* Dashed border for audio input */
	border-radius: 10px !important;
	padding: 20px !important;
	background-color: #f9f9f9;
	}
	.gr-markdown {
	background-color: #ecf0f1; /* Light background for markdown output */
	border-radius: 10px;
	padding: 20px;
	margin-top: 20px;
	border: 1px solid #dcdcdc;
	}
	/* Ensure elements are responsive */
	.gradio-container, .gr-audio-input, .gr-markdown {
	max-width: 100%;
	overflow-x: auto; /* Allow horizontal scrolling for code blocks if needed */
	}
	"""

	# Define the Gradio interface using gr.Blocks for more control
	with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
	gr.Markdown("# Advanced Speech Intelligence Prototype")
	gr.Markdown(
	"Upload an audio file (MP3, WAV, M4A, FLAC, OGG, WebM recommended) or record directly from your microphone "
	"to get a detailed speech intelligence report. After recording, ensure the audio waveform appears, then click 'Process Audio'. "
	"Features include: Automatic Speech Recognition (ASR) transcription, summarization, "
	"grammar and spoken English analysis, common filler word detection, and sentiment analysis. "
	)

	audio_input = gr.Audio(type="filepath", label="Upload Audio File or Record from Microphone", sources=["microphone", "upload"], format="wav")
	process_button = gr.Button("Process Audio")
	output_report = gr.Markdown(label="Speech Intelligence Report")

	# Link the button click to the processing function
	process_button.click(
	fn=process_audio,
	inputs=audio_input,
	outputs=output_report
	)

	# To run this application locally, uncomment the following lines:
	if __name__ == "__main__":
	print("Launching Gradio interface...")
	demo.launch()