""" Gradio UI for Misr Italia Properties Speech-to-Text Pipeline """ import os import sys import json import logging from pathlib import Path # Add root to python path root = Path(__file__).parent.parent sys.path.insert(0, str(root)) import gradio as gr from dotenv import load_dotenv from src.inference.transcribe import WhisperTranscriber from src.inference.analyze_call import CallAnalyzer, clean_transcript logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Load env variables load_dotenv(root / ".env") # Initialize models globally so they load on server startup instead of per-request logger.info("Loading Whisper Model...") DEFAULT_MODEL = "outputs/checkpoints/merged_model" model_path = str(root / DEFAULT_MODEL) if not Path(model_path).exists(): model_path = "openai/whisper-large-v3" transcriber = WhisperTranscriber(model_path=model_path, device=None) logger.info("Initializing CallAnalyzer with OpenAI...") analyzer = None try: analyzer = CallAnalyzer() except Exception as e: logger.error("Failed to init CallAnalyzer: %s", e) def process_call(audio_file, enable_analysis): if not audio_file: return "No audio uploaded.", "No audio uploaded.", *[None] * 11 # Run transcription (VAD + Whisper; OpenAI handles speaker separation) try: transcript = transcriber.transcribe(audio_file) except Exception as e: logger.error("Transcription error: %s", e) err = f"Transcription error: {str(e)}" return err, err, *[None] * 11 # Analysis Defaults parsed = { "cleaned_transcript": transcript, "agent_name": "", "customer_name": "", "unit_number": [], "project_name": "", "department_mentioned": "", "call_type": "", "customer_satisfaction": 0, "is_urgent": False, "pain_points": [], "action_items_promised": [], "next_steps": [] } # Keep a clean copy of the raw Whisper output before OpenAI touches it raw_transcript = transcript openai_transcript = transcript # fallback: same as raw if analysis disabled if enable_analysis and analyzer: try: analysis = analyzer.analyze(transcript) dump = analysis.model_dump() parsed.update(dump) openai_transcript = parsed.get("cleaned_transcript", transcript) except Exception as e: logger.error("OpenAI Analysis error: %s", e) parsed["agent_name"] = f"Error: {e}" openai_transcript = transcript return ( raw_transcript, openai_transcript, parsed.get("agent_name"), parsed.get("customer_name"), ", ".join(parsed.get("unit_number", [])) if isinstance(parsed.get("unit_number"), list) else str(parsed.get("unit_number", "")), parsed.get("project_name"), parsed.get("department_mentioned"), parsed.get("call_type"), parsed.get("customer_satisfaction"), parsed.get("is_urgent"), "\n".join([f"- {x}" for x in parsed.get("pain_points", [])]), "\n".join([f"- {x}" for x in parsed.get("action_items_promised", [])]), "\n".join([f"- {x}" for x in parsed.get("next_steps", [])]) ) def build_ui(): theme = gr.themes.Monochrome( primary_hue="slate", neutral_hue="slate" ) js_func = """ function refresh() { const url = new URL(window.location); if (url.searchParams.get('__theme') !== 'dark') { url.searchParams.set('__theme', 'dark'); window.location.href = url.href; } } """ with gr.Blocks(title="Misr Italia Properties - Call Analyzer") as demo: gr.Markdown("# 🏢 Misr Italia Properties - Call Center AI") gr.Markdown("Upload an audio recording of a customer call to automatically transcribe, perform intelligent speaker separation, and extract business intelligence via OpenAI GPT-4o-mini.") with gr.Row(): with gr.Column(scale=1): audio_input = gr.Audio(type="filepath", label="Upload Call Recording (WAV/MP3)") with gr.Row(): analyze_cb = gr.Checkbox(label="Enable OpenAI Analysis", value=True) submit_btn = gr.Button("Analyze Call", variant="primary") with gr.Tabs(): with gr.TabItem("🎙️ Raw Whisper"): whisper_output = gr.Textbox( label="Raw Whisper Transcript", lines=18, placeholder="Raw output from Whisper will appear here…", ) with gr.TabItem("✨ OpenAI Cleaned"): openai_output = gr.Textbox( label="OpenAI Cleaned Transcript (with speaker turns)", lines=18, placeholder="OpenAI-separated and corrected transcript will appear here…", ) with gr.Column(scale=1): gr.Markdown("### 📊 Extracted Call Insights") with gr.Row(): is_urgent_output = gr.Checkbox(label="🚨 IS URGENT") satisfaction_output = gr.Number(label="⭐ Customer Satisfaction (1-5)") call_type_output = gr.Textbox(label="📞 Call Type") with gr.Row(): agent_name_output = gr.Textbox(label="Agent Name") customer_name_output = gr.Textbox(label="Customer Name") with gr.Row(): project_name_output = gr.Textbox(label="Project Name") unit_number_output = gr.Textbox(label="Unit Number(s)") dept_output = gr.Textbox(label="Department Mentioned") pain_points_output = gr.Textbox(label="💥 Pain Points", lines=3) action_items_output = gr.Textbox(label="✅ Action Items", lines=3) next_steps_output = gr.Textbox(label="⏭️ Next Steps", lines=3) submit_btn.click( fn=process_call, inputs=[audio_input, analyze_cb], outputs=[ whisper_output, openai_output, agent_name_output, customer_name_output, unit_number_output, project_name_output, dept_output, call_type_output, satisfaction_output, is_urgent_output, pain_points_output, action_items_output, next_steps_output ] ) return demo, theme, js_func if __name__ == "__main__": app, theme_obj, js_func = build_ui() app.launch(server_name="0.0.0.0", server_port=7860, share=False, theme=theme_obj, js=js_func)