Spaces:

rodunia
/

interview-copilot

Sleeping

App Files Files Community

interview-copilot / app.py

rodunia

Update app.py

58e2ca7 verified 8 months ago

raw

history blame contribute delete

62.1 kB

	import os
	import gradio as gr
	import json
	from datetime import datetime
	from typing import List, Dict, Tuple
	from dotenv import load_dotenv
	import shutil
	import tempfile
	import google.generativeai as genai
	import traceback
	import numpy as np
	import scipy.io.wavfile as wavfile

	# Load environment variables
	load_dotenv()

	# Import OpenAI for Whisper transcription
	from openai import OpenAI

	# Initialize OpenAI client
	openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

	# Configure Gemini for analysis
	gemini_api_key = os.getenv("GEMINI_API_KEY")
	if gemini_api_key:
	genai.configure(api_key=gemini_api_key)
	# Try to use the best available Gemini model
	try:
	# List available models
	available_models = genai.list_models()
	print("📋 Available Gemini models:")
	gemini_models = []
	for model in available_models:
	if 'generateContent' in model.supported_generation_methods:
	print(f" - {model.name}")
	gemini_models.append(model.name)

	# Priority order: Try the best models first
	model_priority = [
	'models/gemini-1.5-pro-latest', # Latest 1.5 Pro
	'models/gemini-1.5-pro', # Stable 1.5 Pro
	'models/gemini-1.5-pro-002', # Specific version
	'models/gemini-1.5-flash', # Faster but still good
	'models/gemini-pro' # Original Pro
	]

	gemini_model = None
	for model_name in model_priority:
	if model_name in gemini_models:
	try:
	gemini_model = genai.GenerativeModel(
	model_name.replace('models/', ''),
	generation_config={
	'temperature': 0.7, # Balance creativity and consistency
	'top_p': 0.95,
	'top_k': 40,
	'max_output_tokens': 8192, # Increased for detailed analysis
	}
	)
	print(f"✅ Using {model_name} - Best available model!")
	break
	except Exception as e:
	print(f" Could not initialize {model_name}: {e}")

	# Fallback if none of the preferred models work
	if not gemini_model and gemini_models:
	model_name = gemini_models[0].replace('models/', '')
	gemini_model = genai.GenerativeModel(model_name)
	print(f"✅ Using {model_name}")

	if not gemini_model:
	print("❌ No suitable Gemini models found!")

	except Exception as e:
	print(f"⚠️ Error listing Gemini models: {e}")
	# Try direct initialization with best model
	try:
	gemini_model = genai.GenerativeModel(
	'gemini-1.5-pro',
	generation_config={
	'temperature': 0.7,
	'top_p': 0.95,
	'top_k': 40,
	'max_output_tokens': 8192,
	}
	)
	print("✅ Gemini 1.5 Pro initialized (direct)")
	except:
	try:
	gemini_model = genai.GenerativeModel('gemini-pro')
	print("✅ Gemini Pro initialized (fallback)")
	except:
	print("❌ Could not initialize any Gemini model!")
	gemini_model = None
	else:
	print("⚠️ No Gemini API key found!")
	gemini_model = None


	class InterviewCoPilot:
	def __init__(self):
	self.transcript_history = []
	self.research_questions = []
	self.interview_protocol = []
	self.detected_codes = []
	self.coverage_status = {
	"rq_covered": [],
	"protocol_covered": []
	}
	# Add file tracking
	self.processed_files = []
	self.current_file_info = {}
	self.current_audio_path = None # Store the current audio path

	# Enhanced framework support - Initialize all attributes
	self.theoretical_framework = ""
	self.predefined_codes = {} # {category: [codes]}
	self.analysis_focus = []
	self.is_continuation = False # Initialize here
	self.segment_number = 1 # Initialize here

	# Session memory for Phase 1
	self.session_segments = [] # List of processed segments
	self.session_name = f"Interview_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
	self.framework_loaded = False

	# Create a persistent temp directory for this session
	self.temp_dir = tempfile.mkdtemp(prefix="interview_copilot_")
	print(f"📁 Created temp directory: {self.temp_dir}")

	# Multi-view analysis support
	self.segment_analyses = {} # Store individual segment analyses

	def __del__(self):
	"""Cleanup temp directory on exit"""
	if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir):
	try:
	shutil.rmtree(self.temp_dir)
	print(f"🧹 Cleaned up temp directory: {self.temp_dir}")
	except:
	pass

	def setup_research_context(self, research_questions: str, interview_protocol: str,
	theoretical_framework: str = "", predefined_codes: str = "",
	analysis_focus: str = ""):
	"""Setup the research context before starting interviews"""
	if not research_questions.strip():
	return "❌ Please provide at least research questions"

	# Parse research questions
	self.research_questions = [q.strip() for q in research_questions.split('\n') if q.strip()]

	# Parse interview protocol
	self.interview_protocol = [q.strip() for q in interview_protocol.split('\n') if q.strip()]

	# Store theoretical framework
	self.theoretical_framework = theoretical_framework.strip()

	# Parse predefined codes (format: "Category: code1, code2, code3")
	self.predefined_codes = {}
	if predefined_codes.strip():
	for line in predefined_codes.split('\n'):
	if ':' in line:
	category, codes = line.split(':', 1)
	self.predefined_codes[category.strip()] = [
	code.strip() for code in codes.split(',') if code.strip()
	]

	# Parse analysis focus areas
	self.analysis_focus = [f.strip() for f in analysis_focus.split('\n') if f.strip()]

	# Initialize coverage tracking
	self.coverage_status = {
	"rq_covered": [False] * len(self.research_questions),
	"protocol_covered": [False] * len(self.interview_protocol)
	}

	# Build status message
	status_parts = [
	f"✅ Setup complete!",
	f"📋 Research Questions: {len(self.research_questions)}",
	f"📝 Protocol Questions: {len(self.interview_protocol)}"
	]

	if self.theoretical_framework:
	status_parts.append(f"📚 Theoretical Framework: Yes")

	if self.predefined_codes:
	total_codes = sum(len(codes) for codes in self.predefined_codes.values())
	status_parts.append(f"🏷️ Predefined Codes: {total_codes} codes in {len(self.predefined_codes)} categories")

	if self.analysis_focus:
	status_parts.append(f"🎯 Analysis Focus Areas: {len(self.analysis_focus)}")

	# Mark framework as loaded
	self.framework_loaded = True

	return "\n".join(status_parts)

	def add_segment_to_session(self, file_name, duration, transcript_length):
	"""Add a processed segment to the current session"""
	segment_info = {
	"number": len(self.session_segments) + 1,
	"file_name": file_name,
	"duration": duration,
	"transcript_length": transcript_length,
	"timestamp": datetime.now().strftime("%H:%M:%S"),
	"codes_found": len(self.detected_codes)
	}
	self.session_segments.append(segment_info)
	return segment_info

	def get_session_summary(self):
	"""Get a summary of the current session"""
	if not self.session_segments:
	return "No segments processed yet"

	total_duration = sum(seg.get("duration", 0) for seg in self.session_segments)
	total_transcript = sum(seg.get("transcript_length", 0) for seg in self.session_segments)

	summary = f"""### 📊 Current Session: {self.session_name}

	Segments Processed: {len(self.session_segments)}
	Total Duration: {total_duration:.1f} minutes
	Total Transcript: {total_transcript:,} characters
	Unique Codes Found: {len(set(self.detected_codes))}

	Processed Files:
	"""
	for seg in self.session_segments:
	summary += f"\n✓ Segment {seg['number']} - {seg['file_name']} ({seg['timestamp']})"

	return summary

	def reset_session(self, keep_framework=True):
	"""Reset the session but optionally keep the framework"""
	self.session_segments = []
	self.transcript_history = []
	self.detected_codes = []
	self.processed_files = []
	self.segment_number = 1
	self.is_continuation = False
	self.segment_analyses = {} # Reset segment analyses

	if not keep_framework:
	self.research_questions = []
	self.interview_protocol = []
	self.theoretical_framework = ""
	self.predefined_codes = {}
	self.analysis_focus = []
	self.framework_loaded = False
	self.coverage_status = {
	"rq_covered": [],
	"protocol_covered": []
	}
	else:
	# Reset only coverage status
	self.coverage_status = {
	"rq_covered": [False] * len(self.research_questions),
	"protocol_covered": [False] * len(self.interview_protocol)
	}

	self.session_name = f"Interview_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
	return "✅ Session reset. " + ("Framework kept." if keep_framework else "Everything cleared.")

	def save_uploaded_file(self, audio_path):
	"""Save uploaded file to our temp directory to ensure it persists"""
	if not audio_path or not os.path.exists(audio_path):
	return None

	try:
	# Copy file to our temp directory
	file_name = os.path.basename(audio_path)
	saved_path = os.path.join(self.temp_dir, file_name)

	# If file already exists, add timestamp to make unique
	if os.path.exists(saved_path):
	name, ext = os.path.splitext(file_name)
	timestamp = datetime.now().strftime("%H%M%S")
	file_name = f"{name}_{timestamp}{ext}"
	saved_path = os.path.join(self.temp_dir, file_name)

	shutil.copy2(audio_path, saved_path)
	print(f"💾 Saved file to: {saved_path}")
	return saved_path

	except Exception as e:
	print(f"❌ Error saving file: {str(e)}")
	return None

	def check_audio_file(self, audio_path):
	"""Pre-check audio file before processing"""
	if not audio_path:
	return None, "No file selected", None

	try:
	# Save the file to our temp directory
	saved_path = self.save_uploaded_file(audio_path)
	if not saved_path:
	return None, "❌ Error saving uploaded file", None

	file_size = os.path.getsize(saved_path)
	file_size_mb = file_size / (1024 * 1024)
	file_name = os.path.basename(saved_path)

	# Store file info
	self.current_file_info = {
	"name": file_name,
	"size_mb": file_size_mb,
	"path": saved_path,
	"original_path": audio_path
	}

	# Debug info
	print(f"📊 File check:")
	print(f" - Original path: {audio_path}")
	print(f" - Saved path: {saved_path}")
	print(f" - Size: {file_size_mb:.2f} MB")
	print(f" - Exists: {os.path.exists(saved_path)}")

	# Check file size
	if file_size_mb > 25:
	status = f"""⚠️ File too large for direct processing
	- File: {file_name}
	- Size: {file_size_mb:.1f} MB
	- Maximum: 25 MB

	Options:
	1. Compress the file using the compression tool below
	2. Split into smaller segments
	3. Use a different recording with lower quality settings"""
	return None, status, saved_path

	# Good to go
	status = f"""✅ File ready for processing
	- File: {file_name}
	- Size: {file_size_mb:.1f} MB
	- Status: Within limits
	- Saved to: {os.path.basename(self.temp_dir)}/"""

	return saved_path, status, saved_path

	except Exception as e:
	print(f"❌ Error in check_audio_file: {traceback.format_exc()}")
	return None, f"❌ Error checking file: {str(e)}", None

	def compress_audio(self, audio_path, quality="medium"):
	"""Compress audio file with different quality settings"""
	# Handle different input types
	actual_path = None

	# If it's a tuple (sample_rate, audio_data), save it first
	if isinstance(audio_path, tuple) and len(audio_path) == 2:
	sample_rate, audio_data = audio_path
	# Save to temporary file
	temp_path = os.path.join(self.temp_dir, f"temp_audio_{datetime.now().strftime('%H%M%S')}.wav")
	wavfile.write(temp_path, sample_rate, audio_data)
	actual_path = temp_path
	elif isinstance(audio_path, str):
	actual_path = audio_path
	else:
	return None, "No valid audio file to compress"

	if not actual_path or not os.path.exists(actual_path):
	return None, "No file to compress or file not found"

	try:
	import subprocess

	# Quality presets
	quality_settings = {
	"high": {"bitrate": "128k", "sample_rate": "44100"},
	"medium": {"bitrate": "64k", "sample_rate": "22050"},
	"low": {"bitrate": "32k", "sample_rate": "16000"}
	}

	settings = quality_settings.get(quality, quality_settings["medium"])

	# Create output filename in our temp directory
	input_name = os.path.basename(actual_path)
	name, ext = os.path.splitext(input_name)
	output_path = os.path.join(self.temp_dir, f"{name}_compressed{ext}")

	# Compress
	cmd = [
	'ffmpeg', '-i', actual_path,
	'-b:a', settings["bitrate"],
	'-ar', settings["sample_rate"],
	'-ac', '1', # Mono
	'-y', output_path
	]

	result = subprocess.run(cmd, capture_output=True, text=True)

	if result.returncode == 0:
	# Check new size
	new_size = os.path.getsize(output_path) / (1024 * 1024)
	old_size = os.path.getsize(actual_path) / (1024 * 1024)

	# Update file info
	self.current_file_info["path"] = output_path
	self.current_file_info["size_mb"] = new_size

	return output_path, f"""✅ Compression successful!
	- Original size: {old_size:.1f} MB
	- Compressed size: {new_size:.1f} MB
	- Reduction: {((old_size - new_size) / old_size * 100):.0f}%
	- Quality setting: {quality}
	- Saved to: {os.path.basename(output_path)}"""
	else:
	return None, f"❌ Compression failed: {result.stderr}"

	except subprocess.SubprocessError as e:
	return None, f"❌ FFmpeg error: {str(e)}\n\nMake sure ffmpeg is installed."
	except Exception as e:
	return None, f"❌ Error: {str(e)}"

	def transcribe_audio(self, audio_path: str, progress_callback=None) -> str:
	"""Transcribe audio using Whisper API with progress updates"""
	if not audio_path:
	return "Error: No audio file provided"

	if not os.path.exists(audio_path):
	return f"Error: Audio file not found at path: {audio_path}"

	if not openai_client.api_key:
	return "Error: OpenAI API key not found (needed for transcription)"

	try:
	file_size = os.path.getsize(audio_path)
	file_size_mb = file_size / (1024 * 1024)
	print(f"📊 Transcribing file: {audio_path}")
	print(f"📊 File size: {file_size_mb:.2f} MB ({file_size} bytes)")

	# Check if it's actually over 25MB (OpenAI's limit)
	if file_size_mb > 25:
	return f"Error: Audio file too large. File size: {file_size_mb:.1f} MB (limit: 25 MB)"

	# Update progress if callback provided
	if progress_callback:
	progress_callback(f"🎵 Transcribing {file_size_mb:.1f} MB file with OpenAI Whisper...")

	with open(audio_path, "rb") as audio_file:
	print("📊 Sending to OpenAI Whisper API...")
	# New OpenAI v1.x syntax
	transcript = openai_client.audio.transcriptions.create(
	model="whisper-1",
	file=audio_file,
	response_format="text"
	)

	# In the new API, the response is directly the text
	text = transcript if isinstance(transcript, str) else str(transcript)

	# Add file info to transcript
	file_name = self.current_file_info.get("name", "unknown")
	if file_name not in self.processed_files:
	self.processed_files.append(file_name)

	print(f"✅ Transcription successful! Length: {len(text)} characters")
	return text

	except Exception as e:
	error_msg = str(e)
	print(f"❌ OpenAI API error: {error_msg}")

	# Check for specific error types
	if "Invalid file format" in error_msg:
	return "Error: Invalid audio file format. Supported formats: mp3, mp4, mpeg, mpga, m4a, wav, webm"
	elif "too large" in error_msg.lower():
	return "Error: Audio file too large. Please use files under 25MB."
	elif "Incorrect API key" in error_msg or "Authentication" in error_msg:
	return "Error: Invalid OpenAI API key. Please check your .env file."
	elif "Rate limit" in error_msg:
	return "Error: OpenAI rate limit reached. Please wait a moment and try again."
	else:
	return f"Error: {error_msg}"

	def analyze_transcript_with_gemini(self, text: str) -> Dict:
	"""Analyze transcript using Gemini with advanced prompt"""
	# Use the enhanced version by default
	return self.analyze_transcript_with_gemini_enhanced(text, segment_num=self.segment_number)

	def analyze_transcript_with_gemini_enhanced(self, text: str, segment_num: int = None) -> Dict:
	"""Enhanced analysis that tracks individual segments and can combine them"""

	if not text or len(text.strip()) < 10:
	return {"error": "Text too short to analyze"}

	if not self.research_questions:
	return {"error": "Please set up research questions first"}

	if not gemini_model:
	return {"error": "Gemini API not configured"}

	# Determine if this is a specific segment or combined analysis
	is_combined = segment_num is None
	current_segment = segment_num if segment_num else self.segment_number

	# Build context section
	context_parts = []

	if is_combined:
	context_parts.append("This is a COMBINED ANALYSIS of all segments.")
	context_parts.append(f"Total segments: {len(self.session_segments)}")
	else:
	context_parts.append(f"This is Segment {current_segment} of the interview.")
	if current_segment > 1:
	context_parts.append("Previous segments have covered:")
	covered_rqs = [f"RQ{i + 1}" for i, covered in enumerate(self.coverage_status["rq_covered"]) if covered]
	if covered_rqs:
	context_parts.append(f"- Research Questions: {', '.join(covered_rqs)}")

	context_section = "\n".join(context_parts)

	# Build framework section
	framework_section = ""
	if self.theoretical_framework:
	framework_section += f"\nTHEORETICAL FRAMEWORK:\n{self.theoretical_framework}\n"

	if self.predefined_codes:
	framework_section += "\nPREDEFINED CODES:\n"
	for category, codes in self.predefined_codes.items():
	framework_section += f"- {category}: {', '.join(codes)}\n"

	if self.analysis_focus:
	framework_section += "\nANALYSIS FOCUS:\n"
	framework_section += "\n".join([f"- {focus}" for focus in self.analysis_focus])

	# Modified prompt for combined vs individual analysis
	analysis_type = "COMBINED TRANSCRIPT" if is_combined else f"SEGMENT {current_segment}"

	prompt = f"""You are a Qualitative Research Analysis Assistant.

	{context_section}

	{analysis_type}: "{text}"

	RESEARCH FRAMEWORK:
	- Research Questions:
	{chr(10).join([f" RQ{i + 1}: {q}" for i, q in enumerate(self.research_questions)])}

	- Interview Protocol:
	{chr(10).join([f" Q{i + 1}: {q}" for i, q in enumerate(self.interview_protocol)])}

	{framework_section}

	ANALYSIS TASKS:
	1. Apply predefined codes where relevant
	2. Identify emergent codes not in the framework
	3. Track research question coverage
	4. Note theoretical alignments or challenges
	5. Consider the analysis focus areas
	{"6. Identify patterns across segments" if is_combined else ""}
	{"7. Note evolution of themes" if is_combined else ""}

	PROVIDE YOUR ANALYSIS IN THIS EXACT JSON FORMAT:
	{{
	"segment_number": {current_segment if not is_combined else '"combined"'},
	"analysis_type": "{"combined" if is_combined else "individual"}",
	"alerts": [
	{{"type": "supports", "code": "Code Name", "text": "✅ Supports [Theory/Concept]: ..."}},
	{{"type": "challenges", "text": "⚠️ Challenges [Framework]: ..."}},
	{{"type": "missing", "text": "🔍 Missing [Dimension]: ..."}},
	{{"type": "emergent", "code": "New Code", "text": "✳️ Emergent theme: ..."}},
	{{"type": "noteworthy", "text": "📌 Noteworthy: ..."}}
	],
	"rq_addressed": [1, 2],
	"codes_applied": ["Code 1", "Code 2"],
	"emergent_codes": ["New Theme 1"],
	"coverage": {{
	"protocol_covered": [1, 3, 5],
	"completion_percent": 40,
	"missing_topics": ["Topic A", "Topic B"]
	}},
	"follow_ups": [
	"🧭 To explore [concept], ask: 'Question?'",
	"🧭 RQ3 needs data on [topic]"
	],
	"insights": [
	"Key pattern or finding",
	"Theoretical implication"
	],
	"segment_summary": "Brief summary of {"all segments combined" if is_combined else "this segment's contribution"}"{', "cross_segment_patterns": ["Pattern 1", "Pattern 2"],' if is_combined else ""}{'"theme_evolution": "Description of how themes evolved across segments"' if is_combined else ""}
	}}

	Return ONLY the JSON."""

	try:
	print(f"🤖 Analyzing {analysis_type} with Gemini...")
	response = gemini_model.generate_content(prompt)
	content = response.text.strip()

	# Parse JSON response
	try:
	start = content.find('{')
	end = content.rfind('}') + 1
	if start >= 0 and end > start:
	json_str = content[start:end]
	analysis = json.loads(json_str)
	else:
	analysis = json.loads(content)

	except json.JSONDecodeError:
	print(f"JSON parsing error. Raw response: {content[:200]}...")
	# Return a default structure
	analysis = {
	"segment_number": current_segment if not is_combined else "combined",
	"analysis_type": "combined" if is_combined else "individual",
	"alerts": [],
	"rq_addressed": [],
	"codes_applied": [],
	"emergent_codes": [],
	"coverage": {
	"protocol_covered": [],
	"completion_percent": 0,
	"missing_topics": []
	},
	"follow_ups": ["Please try again"],
	"insights": ["Unable to parse response"],
	"segment_summary": "Analysis failed"
	}

	# Store individual segment analysis
	if not is_combined:
	self.segment_analyses[current_segment] = analysis

	# Update coverage tracking
	for rq_num in analysis.get("rq_addressed", []):
	if isinstance(rq_num, int) and 0 < rq_num <= len(self.research_questions):
	self.coverage_status["rq_covered"][rq_num - 1] = True

	for pq_num in analysis.get("coverage", {}).get("protocol_covered", []):
	if isinstance(pq_num, int) and 0 < pq_num <= len(self.interview_protocol):
	self.coverage_status["protocol_covered"][pq_num - 1] = True

	# Add codes to master list
	self.detected_codes.extend(analysis.get("codes_applied", []))
	self.detected_codes.extend(analysis.get("emergent_codes", []))

	return analysis

	except Exception as e:
	print(f"❌ Gemini error: {type(e).__name__}: {str(e)}")
	return {"error": f"Analysis error: {str(e)}"}

	def format_analysis_output(self, analysis: Dict, show_segment_info: bool = True) -> str:
	"""Format analysis output with segment information"""

	if "error" in analysis:
	return f"❌ {analysis['error']}"

	# Determine analysis type
	is_combined = analysis.get("analysis_type") == "combined"
	segment_num = analysis.get("segment_number", "Unknown")

	# Format alerts section
	alerts_text = ""
	if "alerts" in analysis:
	alerts_text = "### 📢 Analysis Alerts:\n"
	for alert in analysis.get("alerts", []):
	alerts_text += f"{alert.get('text', '')}\n"

	# Format codes section
	codes_section = ""
	applied_codes = analysis.get("codes_applied", [])
	emergent_codes = analysis.get("emergent_codes", [])

	if applied_codes:
	codes_section += f"Applied Codes: {', '.join(applied_codes)}\n"
	if emergent_codes:
	codes_section += f"✳️ Emergent Codes: {', '.join(emergent_codes)}\n"

	# Build header based on type
	if is_combined:
	header = "### 📊 Combined Analysis Results (All Segments)"
	segment_info = f"Total Segments Analyzed: {len(self.session_segments)}\n"
	else:
	header = f"### 📊 Analysis Results - Segment {segment_num}"
	segment_info = f"📍 Segment {segment_num} Summary: {analysis.get('segment_summary', 'Analysis of this segment')}\n"

	# Get file name for current segment
	file_info = ""
	if not is_combined and segment_num != "Unknown" and isinstance(segment_num, int):
	if segment_num <= len(self.session_segments):
	file_info = f"File: {self.session_segments[segment_num - 1].get('file_name', 'unknown')}\n"

	# Build main analysis text
	analysis_text = f"""{header}

	{segment_info if show_segment_info else ""}{file_info}Research Questions Addressed: {', '.join([f"RQ{n}" for n in analysis.get('rq_addressed', [])])}

	{alerts_text}

	Codes/Themes:
	{codes_section}

	Protocol Coverage: {', '.join([f"Q{n}" for n in analysis.get('coverage', {}).get('protocol_covered', [])])}
	Completion: {analysis.get('coverage', {}).get('completion_percent', 0)}% of protocol addressed

	Key Insights:
	{chr(10).join(['• ' + insight for insight in analysis.get('insights', [])])}"""

	# Add combined-specific sections
	if is_combined:
	if "cross_segment_patterns" in analysis:
	analysis_text += "\n\nCross-Segment Patterns:\n"
	analysis_text += chr(10).join(
	['• ' + pattern for pattern in analysis.get('cross_segment_patterns', [])])

	if "theme_evolution" in analysis:
	analysis_text += f"\n\nTheme Evolution:\n{analysis.get('theme_evolution', '')}"

	missing_topics = analysis.get('coverage', {}).get('missing_topics', [])
	if missing_topics:
	analysis_text += f"\n\nMissing Topics:\n{chr(10).join(['• ' + topic for topic in missing_topics])}"

	return analysis_text

	def generate_multi_view_analysis(self):
	"""Generate both individual segment analyses and combined analysis"""

	if not hasattr(self, 'segment_analyses') or not self.segment_analyses:
	return "No segments analyzed yet", "", ""

	# Format individual segment analyses
	individual_analyses = "## 📑 Individual Segment Analyses\n\n"

	for seg_num in sorted(self.segment_analyses.keys()):
	analysis = self.segment_analyses[seg_num]
	formatted = self.format_analysis_output(analysis, show_segment_info=True)
	individual_analyses += f"{formatted}\n\n{'=' * 50}\n\n"

	# Generate combined analysis if multiple segments
	combined_analysis = ""
	if len(self.segment_analyses) > 1:
	# Combine all transcripts
	all_transcripts = "\n\n".join(self.transcript_history)

	# Run combined analysis
	combined_result = self.analyze_transcript_with_gemini_enhanced(all_transcripts, segment_num=None)
	combined_analysis = "## 🔗 Combined Analysis (All Segments Together)\n\n"
	combined_analysis += self.format_analysis_output(combined_result, show_segment_info=True)
	else:
	combined_analysis = "Combined analysis requires at least 2 segments"

	# Generate comparison view
	comparison_view = self.generate_comparison_view()

	return individual_analyses, combined_analysis, comparison_view

	def generate_comparison_view(self):
	"""Generate a comparison view of segments"""

	if not hasattr(self, 'segment_analyses') or not self.segment_analyses:
	return "No segments to compare"

	comparison = "## 📊 Segment Comparison\n\n"

	# Create comparison table
	comparison += "\| Segment \| RQs Addressed \| Codes Applied \| Emergent Codes \| Completion % \|\n"
	comparison += "\|---------\|---------------\|---------------\|----------------\|-------------\|\n"

	for seg_num in sorted(self.segment_analyses.keys()):
	analysis = self.segment_analyses[seg_num]
	rqs = ', '.join([f"RQ{n}" for n in analysis.get('rq_addressed', [])])
	applied = len(analysis.get('codes_applied', []))
	emergent = len(analysis.get('emergent_codes', []))
	completion = analysis.get('coverage', {}).get('completion_percent', 0)

	comparison += f"\| {seg_num} \| {rqs} \| {applied} \| {emergent} \| {completion}% \|\n"

	# Add theme tracking
	comparison += "\n### 📈 Theme Frequency Across Segments\n\n"

	# Track code frequency by segment
	code_by_segment = {}
	for seg_num, analysis in self.segment_analyses.items():
	all_codes = analysis.get('codes_applied', []) + analysis.get('emergent_codes', [])
	for code in all_codes:
	if code not in code_by_segment:
	code_by_segment[code] = {}
	code_by_segment[code][seg_num] = code_by_segment[code].get(seg_num, 0) + 1

	# Display theme tracking
	for code, segments in sorted(code_by_segment.items()):
	seg_info = ', '.join([f"Seg{s}: {count}x" for s, count in sorted(segments.items())])
	comparison += f"- {code}: {seg_info}\n"

	return comparison

	def process_interview_segment(self, audio_path, progress_callback=None):
	"""Process an audio segment and return transcript and analysis"""
	print(f"\n🎯 Starting process_interview_segment")
	print(f" Audio path provided: {audio_path}")
	print(f" Type of audio_path: {type(audio_path)}")

	# Handle different types of audio input
	actual_audio_path = None

	# Case 1: audio_path is a tuple (sample_rate, audio_data) from recording
	if isinstance(audio_path, tuple) and len(audio_path) == 2:
	print(" Detected audio data tuple (recording)")
	sample_rate, audio_data = audio_path
	# Save the audio data to a temporary file
	temp_path = os.path.join(self.temp_dir, f"recorded_{datetime.now().strftime('%H%M%S')}.wav")
	wavfile.write(temp_path, sample_rate, audio_data)
	actual_audio_path = temp_path
	print(f" Saved recording to: {temp_path}")

	# Case 2: audio_path is a string (file path)
	elif isinstance(audio_path, str):
	actual_audio_path = audio_path

	# Case 3: audio_path is None, check if we have a saved file
	elif audio_path is None and self.current_file_info:
	actual_audio_path = self.current_file_info.get("path")
	print(f" Using saved path: {actual_audio_path}")

	# Validate we have a valid path
	if not actual_audio_path or not os.path.exists(actual_audio_path):
	return "", "❌ No audio file found. Please upload a file or record audio first.", "", "", "No file to process"

	# Get file info
	if isinstance(audio_path, tuple):
	file_name = f"recorded_{datetime.now().strftime('%H%M%S')}.wav"
	file_size = os.path.getsize(actual_audio_path) / (1024 * 1024)
	# Update current file info for recording
	self.current_file_info = {
	"name": file_name,
	"size_mb": file_size,
	"path": actual_audio_path
	}
	else:
	file_name = self.current_file_info.get("name", os.path.basename(actual_audio_path))
	file_size = self.current_file_info.get("size_mb", os.path.getsize(actual_audio_path) / (1024 * 1024))

	# Progress update
	progress = f"""🔄 Processing: {file_name} ({file_size:.1f} MB)

	📊 Current Step: Transcribing audio with Whisper...
	⏱️ Estimated time: {int(file_size * 0.5)}-{int(file_size * 1)} minutes for transcription

	💡 Tip: Larger files take longer. A 10MB file typically takes 5-10 minutes."""

	# Update progress callback if provided
	if progress_callback:
	progress_callback(progress)

	# Transcribe with Whisper
	print(f"🎵 Starting transcription of {file_size:.1f} MB file...")
	start_time = datetime.now()
	transcript = self.transcribe_audio(actual_audio_path, progress_callback)
	transcription_time = (datetime.now() - start_time).total_seconds()
	print(f"✅ Transcription completed in {transcription_time:.1f} seconds")

	if transcript.startswith("Error:"):
	return transcript, "❌ Transcription failed", "", "", progress + "\n\n❌ Transcription failed"

	# Add to history with file info
	timestamp = datetime.now().strftime("%H:%M:%S")

	# Safely check for continuation attributes
	is_continuation = getattr(self, 'is_continuation', False)
	segment_number = getattr(self, 'segment_number', 1)

	segment_label = f"Segment {segment_number}" if is_continuation else "Segment 1"
	self.transcript_history.append(f"[{timestamp}] [{file_name}] [{segment_label}] {transcript}")

	# Check if research context is set up
	if not self.research_questions:
	full_transcript = "\n\n".join(self.transcript_history)
	return full_transcript, "⚠️ Please set up research questions first", "", "", progress

	# Update progress for analysis phase
	progress = f"""✅ Transcription complete! ({transcription_time:.1f} seconds)

	📊 Current Step: Analyzing with Gemini 1.5 Pro...
	🔍 Analyzing {segment_label}
	⏱️ This usually takes 10-30 seconds..."""

	if progress_callback:
	progress_callback(progress)

	# Analyze with Gemini
	print(f"🤖 Starting Gemini analysis...")
	analysis_start = datetime.now()
	analysis = self.analyze_transcript_with_gemini(transcript)
	analysis_time = (datetime.now() - analysis_start).total_seconds()
	print(f"✅ Analysis completed in {analysis_time:.1f} seconds")

	# Format outputs
	full_transcript = "\n\n".join(self.transcript_history)

	if "error" not in analysis:
	# Format analysis output
	analysis_text = self.format_analysis_output(analysis)

	follow_ups = "### 💡 Suggested Follow-ups:\n" + \
	'\n'.join(analysis.get('follow_ups', []))

	rq_coverage = sum(self.coverage_status["rq_covered"]) / len(
	self.research_questions) * 100 if self.research_questions else 0
	protocol_coverage = sum(self.coverage_status["protocol_covered"]) / len(
	self.interview_protocol) * 100 if self.interview_protocol else 0

	# Track unique codes
	all_codes = list(set(self.detected_codes))
	applied_unique = list(set(analysis.get("codes_applied", [])))
	emergent_unique = list(set(analysis.get("emergent_codes", [])))

	coverage = f"""### 📈 Overall Progress:
	- Research Questions: {rq_coverage:.0f}% ({sum(self.coverage_status["rq_covered"])}/{len(self.research_questions)})
	- Protocol Questions: {protocol_coverage:.0f}% ({sum(self.coverage_status["protocol_covered"])}/{len(self.interview_protocol)})
	- Total Unique Codes: {len(all_codes)}
	- Framework Codes: {len(applied_unique)}
	- Emergent Codes: {len(emergent_unique)}
	- Segments Processed: {len(self.processed_files)}"""

	progress = f"✅ Completed: {file_name} ({segment_label})"
	else:
	analysis_text = f"❌ {analysis['error']}"
	follow_ups = "Unable to generate follow-ups"
	coverage = "Unable to calculate coverage"
	progress = f"❌ Failed: {file_name}"

	return full_transcript, analysis_text, follow_ups, coverage, progress


	# Initialize
	copilot = InterviewCoPilot()

	# Create improved interface
	with gr.Blocks(title="Research Interview Co-Pilot", theme=gr.themes.Soft(), css="""
	.file-info { background-color: #f0f0f0; padding: 10px; border-radius: 5px; margin: 10px 0; }
	.success { color: #28a745; }
	.warning { color: #ffc107; }
	.error { color: #dc3545; }
	h1 { text-align: center; }
	.contain { max-width: 1200px; margin: auto; }
	""") as app:
	gr.Markdown("""
	# 🎙️ Research Interview Co-Pilot - Enhanced with Multi-View Analysis

	Transcription: OpenAI Whisper \| Analysis: Google Gemini Pro

	Now with individual segment analysis, combined analysis, and segment comparison!
	""")

	with gr.Tab("📋 Setup"):
	gr.Markdown("### Set up your research context")

	with gr.Row():
	with gr.Column():
	rq_input = gr.Textbox(
	label="Research Questions (one per line) *",
	placeholder="What pedagogical strategies are evident in AI educators?\nHow do AI tools emphasize practical applications?\nWhat are the differences between various AI approaches?",
	lines=6
	)

	protocol_input = gr.Textbox(
	label="Interview Protocol Questions (one per line)",
	placeholder="Tell me about your experience with AI\nHow do you use AI tools?\nWhat challenges have you faced?",
	lines=6
	)

	with gr.Column():
	framework_input = gr.Textbox(
	label="Theoretical Framework (optional)",
	placeholder="e.g., Technology Acceptance Model (TAM)\nGrounded Theory approach\nActivity Theory lens",
	lines=3
	)

	codes_input = gr.Textbox(
	label="Predefined Codes (optional - format: 'Category: code1, code2')",
	placeholder="Pedagogical: Scaffolding, Direct Instruction, Guided Practice\nPractical: Application, Implementation, Real-world Use\nEthical: Privacy Concerns, Bias Awareness, Transparency",
	lines=6
	)

	focus_input = gr.Textbox(
	label="Analysis Focus Areas (optional - one per line)",
	placeholder="Look for emotional responses\nPay attention to metaphors used\nNote any resistance or enthusiasm",
	lines=3
	)

	# Segment continuation option
	with gr.Row():
	continue_interview = gr.Checkbox(
	label="This is a continuation of a previous interview segment",
	value=False
	)
	segment_info = gr.Textbox(
	label="Segment Info",
	value="Segment 1",
	interactive=False
	)

	setup_btn = gr.Button("Setup Research Context", variant="primary", size="lg")
	setup_output = gr.Textbox(label="Setup Status", interactive=False, lines=6)

	# Save/Load framework buttons
	with gr.Row():
	save_framework_btn = gr.Button("💾 Save Framework", size="sm")
	load_framework_btn = gr.Button("📂 Load Framework", size="sm")
	framework_file = gr.File(label="Framework File", visible=False, file_types=[".json"])


	def update_segment_info(is_continuation):
	if is_continuation:
	copilot.is_continuation = True
	copilot.segment_number += 1
	return f"Segment {copilot.segment_number} (Continuing from previous)"
	else:
	copilot.is_continuation = False
	copilot.segment_number = 1
	return "Segment 1"


	def save_framework(rq, protocol, framework, codes, focus):
	"""Save current framework to JSON file"""
	framework_data = {
	"research_questions": rq,
	"interview_protocol": protocol,
	"theoretical_framework": framework,
	"predefined_codes": codes,
	"analysis_focus": focus,
	"saved_date": datetime.now().isoformat()
	}

	filename = f"framework_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
	filepath = os.path.join(copilot.temp_dir, filename)

	with open(filepath, 'w') as f:
	json.dump(framework_data, f, indent=2)

	return gr.update(visible=True, value=filepath)


	def load_framework(file):
	"""Load framework from JSON file"""
	if not file:
	return "", "", "", "", "", "No file selected"

	try:
	with open(file.name, 'r') as f:
	data = json.load(f)

	return (
	data.get("research_questions", ""),
	data.get("interview_protocol", ""),
	data.get("theoretical_framework", ""),
	data.get("predefined_codes", ""),
	data.get("analysis_focus", ""),
	f"✅ Loaded framework from {os.path.basename(file.name)}"
	)
	except Exception as e:
	return "", "", "", "", "", f"❌ Error loading file: {str(e)}"


	continue_interview.change(
	update_segment_info,
	inputs=[continue_interview],
	outputs=[segment_info]
	)

	setup_btn.click(
	fn=copilot.setup_research_context,
	inputs=[rq_input, protocol_input, framework_input, codes_input, focus_input],
	outputs=setup_output
	)

	save_framework_btn.click(
	save_framework,
	inputs=[rq_input, protocol_input, framework_input, codes_input, focus_input],
	outputs=[framework_file]
	)

	framework_file.change(
	lambda x: gr.update(visible=False),
	inputs=[framework_file],
	outputs=[framework_file]
	)

	load_framework_btn.click(
	lambda: gr.update(visible=True),
	outputs=[framework_file]
	).then(
	load_framework,
	inputs=[framework_file],
	outputs=[rq_input, protocol_input, framework_input, codes_input, focus_input, setup_output]
	)

	with gr.Tab("🎤 Interview Processing"):
	gr.Markdown("### Process interview audio with multi-view analysis")

	# Session info at the top
	with gr.Row():
	session_info = gr.Markdown(copilot.get_session_summary())

	with gr.Row():
	# Session control buttons
	new_file_btn = gr.Button("📁 New File, Keep Setup", variant="secondary")
	reset_session_btn = gr.Button("🔄 Reset Session", variant="secondary")
	reset_all_btn = gr.Button("🗑️ Reset Everything", variant="stop")

	with gr.Row():
	with gr.Column(scale=1):
	# File upload with preview
	audio_input = gr.Audio(
	sources=["upload", "microphone"],
	type="filepath",
	label="📁 Upload Audio File or 🎤 Record",
	interactive=True
	)

	file_status = gr.Markdown("Upload a file to see its status")

	# Compression tool
	with gr.Accordion("🔧 Audio Compression Tool", open=False):
	gr.Markdown("Compress large audio files")

	quality_select = gr.Radio(
	choices=["high", "medium", "low"],
	value="medium",
	label="Compression Quality"
	)

	compress_btn = gr.Button("Compress Audio", variant="secondary")
	compress_output = gr.Markdown()
	compressed_audio = gr.Audio(
	label="Compressed Audio",
	visible=False
	)

	process_btn = gr.Button("🔍 Process & Analyze", variant="primary", size="lg")

	# Add visual processing indicator
	processing_status = gr.Markdown(
	value="",
	visible=True
	)

	# Add progress bar
	with gr.Row():
	progress_bar = gr.Progress()
	progress_status = gr.Textbox(
	label="Progress",
	interactive=False,
	lines=4,
	value="Ready to process audio..."
	)

	# Add multi-view analysis button AFTER progress status
	generate_multiview_btn = gr.Button(
	"📊 Generate Multi-View Analysis",
	variant="secondary",
	size="lg",
	visible=True # Always visible for now
	)

	with gr.Column(scale=2):
	# Results area with enhanced tabs
	with gr.Tabs():
	with gr.Tab("📝 Transcript"):
	transcript_output = gr.Textbox(
	label="Full Transcript",
	lines=15,
	max_lines=25,
	interactive=False
	)

	with gr.Tab("🔍 Current Segment"):
	current_analysis_output = gr.Markdown(
	value="Process a segment to see analysis"
	)

	with gr.Tab("📑 All Segments"):
	all_segments_output = gr.Markdown(
	value="Individual analyses will appear here"
	)

	with gr.Tab("🔗 Combined Analysis"):
	combined_analysis_output = gr.Markdown(
	value="Combined analysis will appear here after 2+ segments"
	)

	with gr.Tab("📊 Comparison"):
	comparison_output = gr.Markdown(
	value="Segment comparison will appear here"
	)

	with gr.Tab("💡 Follow-ups"):
	followup_output = gr.Markdown()

	with gr.Tab("📈 Coverage"):
	coverage_output = gr.Markdown()

	# Hidden state to store file path
	audio_state = gr.State()


	# Session management functions
	def new_file_keep_setup():
	"""Clear audio input but keep framework"""
	copilot.is_continuation = True
	copilot.segment_number = len(copilot.session_segments) + 1
	return (
	None, # Clear audio input
	"Upload a new file to continue the interview",
	f"Ready for Segment {copilot.segment_number}",
	copilot.get_session_summary()
	)


	def reset_session():
	"""Reset session but keep framework"""
	result = copilot.reset_session(keep_framework=True)
	return (
	None, # Clear audio
	"Session reset. Framework kept.",
	"Ready to process audio...",
	copilot.get_session_summary(),
	"" # Clear transcript
	)


	def reset_everything():
	"""Reset everything including framework"""
	result = copilot.reset_session(keep_framework=False)
	return (
	None, # Clear audio
	"Everything reset. Please set up framework again.",
	"Ready to process audio...",
	copilot.get_session_summary(),
	"", # Clear transcript
	"❌ Framework cleared. Please go to Setup tab."
	)


	# File status update - store the path in state
	audio_input.change(
	fn=copilot.check_audio_file,
	inputs=[audio_input],
	outputs=[audio_input, file_status, audio_state]
	)

	# Compression - update state with compressed file
	compress_btn.click(
	fn=copilot.compress_audio,
	inputs=[audio_state, quality_select],
	outputs=[compressed_audio, compress_output]
	).then(
	fn=lambda x, msg: (gr.update(visible=True), x) if x else (gr.update(visible=False), None),
	inputs=[compressed_audio, compress_output],
	outputs=[compressed_audio, audio_state]
	)


	# Modified process function to handle multi-view
	def process_and_update_session_multiview(audio_path, progress=gr.Progress()):
	"""Process audio and update session info with multi-view support"""

	# Create a progress callback function
	def update_progress(message):
	progress(0.5, desc=message)
	return message

	# Initialize progress
	progress(0, desc="Starting audio processing...")

	# First, process the current segment with progress callback
	results = copilot.process_interview_segment(audio_path, progress_callback=update_progress)

	# Update progress to complete
	progress(1.0, desc="Processing complete!")

	# Add to session if successful
	if results[4].startswith("✅"):
	file_name = copilot.current_file_info.get("name", "unknown")
	duration = copilot.current_file_info.get("size_mb", 0) * 0.5 # Rough estimate
	transcript_length = len(results[0])
	copilot.add_segment_to_session(file_name, duration, transcript_length)

	# Get current segment analysis
	current_segment_analysis = results[1]

	# Check if we should show multi-view button (only after 2+ segments for meaningful comparison)
	show_multiview = len(copilot.session_segments) >= 2

	# Return results plus updated session info
	return (
	results[0], # transcript
	current_segment_analysis, # current segment analysis
	results[2], # follow-ups
	results[3], # coverage
	results[4], # progress
	copilot.get_session_summary(), # session info
	gr.update(visible=show_multiview) # multi-view button visibility
	)


	# Multi-view generation function
	def generate_all_views():
	"""Generate all analysis views"""
	individual, combined, comparison = copilot.generate_multi_view_analysis()
	return individual, combined, comparison


	# Connect the process button with loading state
	process_btn.click(
	fn=lambda: gr.update(
	value="🔄 Processing in progress... Please wait, this may take several minutes for large files."),
	outputs=[processing_status]
	).then(
	fn=process_and_update_session_multiview,
	inputs=[audio_state],
	outputs=[
	transcript_output,
	current_analysis_output,
	followup_output,
	coverage_output,
	progress_status,
	session_info,
	generate_multiview_btn
	]
	).then(
	fn=lambda: gr.update(value=""),
	outputs=[processing_status]
	)

	# Connect the multi-view button
	generate_multiview_btn.click(
	fn=generate_all_views,
	outputs=[
	all_segments_output,
	combined_analysis_output,
	comparison_output
	]
	)

	# Session control buttons
	new_file_btn.click(
	fn=new_file_keep_setup,
	outputs=[audio_input, file_status, progress_status, session_info]
	)

	reset_session_btn.click(
	fn=reset_session,
	outputs=[audio_input, file_status, progress_status, session_info, transcript_output]
	)

	reset_all_btn.click(
	fn=reset_everything,
	outputs=[audio_input, file_status, progress_status, session_info, transcript_output,
	current_analysis_output]
	)

	with gr.Tab("📊 Summary & Export"):
	gr.Markdown("### Generate comprehensive summary with multi-view analysis")


	def generate_enhanced_summary():
	if not copilot.transcript_history:
	return "No interview data yet.", "", ""

	unique_codes = list(set(copilot.detected_codes))

	# Generate different formats
	markdown_summary = f"""# Interview Summary Report

	Generated: {datetime.now().strftime("%Y-%m-%d %H:%M")}
	Analysis Engine: Google Gemini Pro
	Files Processed: {', '.join(copilot.processed_files)}
	Total Segments: {len(copilot.session_segments)}

	## Research Question Coverage
	{chr(10).join([f"- {'✅' if covered else '❌'} {q}" for q, covered in zip(copilot.research_questions, copilot.coverage_status["rq_covered"])])}

	## Detected Codes/Themes ({len(unique_codes)} unique)
	{chr(10).join(['- ' + code for code in unique_codes])}

	## Segment-by-Segment Analysis
	{"Included in multi-view analysis - see Interview Processing tab" if copilot.segment_analyses else "No individual analyses yet"}

	## Full Transcript
	{chr(10).join(copilot.transcript_history)}"""

	# CSV format for codes
	csv_codes = "Code,Frequency\n"
	code_freq = {}
	for code in copilot.detected_codes:
	code_freq[code] = code_freq.get(code, 0) + 1
	for code, freq in sorted(code_freq.items(), key=lambda x: x[1], reverse=True):
	csv_codes += f'"{code}",{freq}\n'

	# JSON format with segment analyses
	json_export = json.dumps({
	"metadata": {
	"date": datetime.now().isoformat(),
	"files": copilot.processed_files,
	"total_segments": len(copilot.transcript_history),
	"analysis_engine": "Gemini Pro"
	},
	"research_questions": {
	"questions": copilot.research_questions,
	"coverage": copilot.coverage_status["rq_covered"]
	},
	"codes": unique_codes,
	"transcripts": copilot.transcript_history,
	"segment_analyses": {str(k): v for k, v in copilot.segment_analyses.items()} if hasattr(copilot,
	'segment_analyses') else {}
	}, indent=2)

	return markdown_summary, csv_codes, json_export


	with gr.Row():
	summary_btn = gr.Button("Generate All Formats", variant="primary", size="lg")

	with gr.Row():
	with gr.Column():
	summary_display = gr.Markdown(label="Summary Preview")

	with gr.Column():
	with gr.Accordion("📥 Export Options", open=True):
	csv_export = gr.Textbox(
	label="CSV Export (Codes)",
	lines=10,
	interactive=True
	)

	json_export = gr.Textbox(
	label="JSON Export (Complete Data)",
	lines=10,
	interactive=True
	)

	summary_btn.click(
	fn=generate_enhanced_summary,
	outputs=[summary_display, csv_export, json_export]
	)

	with gr.Tab("ℹ️ Help"):
	gr.Markdown(f"""
	### System Information

	Temp Directory: {copilot.temp_dir}

	Transcription Engine: OpenAI Whisper
	- Requires: OPENAI_API_KEY in .env file
	- Max file size: 25 MB
	- Supported formats: MP3, WAV, M4A, OGG, WEBM, MP4, MPEG, MPGA

	Analysis Engine: Google Gemini Pro
	- Requires: GEMINI_API_KEY in .env file
	- Free tier: 60 requests per minute
	- No file size limits (only processes text)

	### Multi-View Analysis Features

	Current Segment View: Shows analysis of the just-processed segment
	All Segments View: Shows individual analyses for each segment
	Combined Analysis: Analyzes all segments together to find patterns
	Comparison View: Side-by-side comparison of all segments

	### File Handling Tips

	To reduce file size:
	1. Use the built-in compression tool
	2. Record at lower quality (16kHz, mono)
	3. Split long recordings into segments

	Best practices:
	- Process 3-5 minute segments for optimal results
	- Use clear file names for easy tracking
	- Check file size before processing

	### Troubleshooting

	If recording doesn't work:
	- Check browser permissions for microphone
	- Try a different browser (Chrome/Edge work best)
	- Use upload instead of recording

	If processing fails:
	- Check the console for detailed error messages
	- Verify your API keys are correct
	- Ensure the audio file format is supported

	### Required API Keys

	Add to your `.env` file:
	```
	OPENAI_API_KEY=sk-your-openai-key
	GEMINI_API_KEY=your-gemini-key
	```
	""")

	# Launch
	if __name__ == "__main__":
	print("\n" + "=" * 50)
	print("🚀 Starting Enhanced Research Interview Co-Pilot with Multi-View Analysis")
	print("=" * 50)

	# Check temp directory
	print(f"📁 Temp directory: {copilot.temp_dir}")
	print(f" - Free space: {shutil.disk_usage(tempfile.gettempdir()).free / (1024 ** 3):.1f} GB")

	# Check dependencies
	if shutil.which('ffmpeg'):
	print("✅ FFmpeg found - compression available")
	else:
	print("⚠️ FFmpeg not found - compression unavailable")

	# Check API keys
	if not os.getenv("OPENAI_API_KEY"):
	print("❌ No OpenAI API key found (required for transcription)")
	else:
	print("✅ OpenAI API key loaded (Whisper transcription)")
	# Test OpenAI client initialization
	try:
	test_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
	print("✅ OpenAI client initialized successfully")
	except Exception as e:
	print(f"❌ Error initializing OpenAI client: {e}")

	if not os.getenv("GEMINI_API_KEY"):
	print("❌ No Gemini API key found (required for analysis)")
	else:
	print("✅ Gemini API key loaded (analysis)")

	if not os.getenv("OPENAI_API_KEY") or not os.getenv("GEMINI_API_KEY"):
	print("\n⚠️ Please add missing API keys to your .env file")
	else:
	print("\n✅ All systems ready!")

	print("\n📌 Launching application...")
	app.queue().launch()