File size: 6,992 Bytes
0db822c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
"""
Gradio UI for Misr Italia Properties Speech-to-Text Pipeline
"""
import os
import sys
import json
import logging
from pathlib import Path

# Add root to python path
root = Path(__file__).parent.parent
sys.path.insert(0, str(root))

import gradio as gr
from dotenv import load_dotenv

from src.inference.transcribe import WhisperTranscriber
from src.inference.analyze_call import CallAnalyzer, clean_transcript

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load env variables
load_dotenv(root / ".env")

# Initialize models globally so they load on server startup instead of per-request
logger.info("Loading Whisper Model...")
DEFAULT_MODEL = "outputs/checkpoints/merged_model"
model_path = str(root / DEFAULT_MODEL)
if not Path(model_path).exists():
    model_path = "openai/whisper-large-v3"

transcriber = WhisperTranscriber(model_path=model_path, device=None)

logger.info("Initializing CallAnalyzer with OpenAI...")
analyzer = None
try:
    analyzer = CallAnalyzer()
except Exception as e:
    logger.error("Failed to init CallAnalyzer: %s", e)


def process_call(audio_file, enable_analysis):
    if not audio_file:
        return "No audio uploaded.", "No audio uploaded.", *[None] * 11

    # Run transcription (VAD + Whisper; OpenAI handles speaker separation)
    try:
        transcript = transcriber.transcribe(audio_file)
    except Exception as e:
        logger.error("Transcription error: %s", e)
        err = f"Transcription error: {str(e)}"
        return err, err, *[None] * 11

    # Analysis Defaults
    parsed = {
        "cleaned_transcript": transcript,
        "agent_name": "",
        "customer_name": "",
        "unit_number": [],
        "project_name": "",
        "department_mentioned": "",
        "call_type": "",
        "customer_satisfaction": 0,
        "is_urgent": False,
        "pain_points": [],
        "action_items_promised": [],
        "next_steps": []
    }

    # Keep a clean copy of the raw Whisper output before OpenAI touches it
    raw_transcript = transcript
    openai_transcript = transcript  # fallback: same as raw if analysis disabled

    if enable_analysis and analyzer:
        try:
            analysis = analyzer.analyze(transcript)
            dump = analysis.model_dump()
            parsed.update(dump)
            openai_transcript = parsed.get("cleaned_transcript", transcript)
        except Exception as e:
            logger.error("OpenAI Analysis error: %s", e)
            parsed["agent_name"] = f"Error: {e}"
            openai_transcript = transcript

    return (
        raw_transcript,
        openai_transcript,
        parsed.get("agent_name"),
        parsed.get("customer_name"),
        ", ".join(parsed.get("unit_number", [])) if isinstance(parsed.get("unit_number"), list) else str(parsed.get("unit_number", "")),
        parsed.get("project_name"),
        parsed.get("department_mentioned"),
        parsed.get("call_type"),
        parsed.get("customer_satisfaction"),
        parsed.get("is_urgent"),
        "\n".join([f"- {x}" for x in parsed.get("pain_points", [])]),
        "\n".join([f"- {x}" for x in parsed.get("action_items_promised", [])]),
        "\n".join([f"- {x}" for x in parsed.get("next_steps", [])])
    )


def build_ui():
    theme = gr.themes.Monochrome(
        primary_hue="slate",
        neutral_hue="slate"
    )

    js_func = """
    function refresh() {
        const url = new URL(window.location);
        if (url.searchParams.get('__theme') !== 'dark') {
            url.searchParams.set('__theme', 'dark');
            window.location.href = url.href;
        }
    }
    """

    with gr.Blocks(title="Misr Italia Properties - Call Analyzer") as demo:
        gr.Markdown("# 🏢 Misr Italia Properties - Call Center AI")
        gr.Markdown("Upload an audio recording of a customer call to automatically transcribe, perform intelligent speaker separation, and extract business intelligence via OpenAI GPT-4o-mini.")
        
        with gr.Row():
            with gr.Column(scale=1):
                audio_input = gr.Audio(type="filepath", label="Upload Call Recording (WAV/MP3)")
                with gr.Row():
                    analyze_cb = gr.Checkbox(label="Enable OpenAI Analysis", value=True)
                
                submit_btn = gr.Button("Analyze Call", variant="primary")

                with gr.Tabs():
                    with gr.TabItem("🎙️ Raw Whisper"):
                        whisper_output = gr.Textbox(
                            label="Raw Whisper Transcript",
                            lines=18,
                            placeholder="Raw output from Whisper will appear here…",
                        )
                    with gr.TabItem("✨ OpenAI Cleaned"):
                        openai_output = gr.Textbox(
                            label="OpenAI Cleaned Transcript (with speaker turns)",
                            lines=18,
                            placeholder="OpenAI-separated and corrected transcript will appear here…",
                        )
            
            with gr.Column(scale=1):
                gr.Markdown("### 📊 Extracted Call Insights")
                with gr.Row():
                    is_urgent_output = gr.Checkbox(label="🚨 IS URGENT")
                    satisfaction_output = gr.Number(label="⭐ Customer Satisfaction (1-5)")
                    call_type_output = gr.Textbox(label="📞 Call Type")
                with gr.Row():
                    agent_name_output = gr.Textbox(label="Agent Name")
                    customer_name_output = gr.Textbox(label="Customer Name")
                with gr.Row():
                    project_name_output = gr.Textbox(label="Project Name")
                    unit_number_output = gr.Textbox(label="Unit Number(s)")
                
                dept_output = gr.Textbox(label="Department Mentioned")
                
                pain_points_output = gr.Textbox(label="💥 Pain Points", lines=3)
                action_items_output = gr.Textbox(label="✅ Action Items", lines=3)
                next_steps_output = gr.Textbox(label="⏭️ Next Steps", lines=3)

        submit_btn.click(
            fn=process_call,
            inputs=[audio_input, analyze_cb],
            outputs=[
                whisper_output,
                openai_output,
                agent_name_output,
                customer_name_output,
                unit_number_output,
                project_name_output,
                dept_output,
                call_type_output,
                satisfaction_output,
                is_urgent_output,
                pain_points_output,
                action_items_output,
                next_steps_output
            ]
        )
        
    return demo, theme, js_func

if __name__ == "__main__":
    app, theme_obj, js_func = build_ui()
    app.launch(server_name="0.0.0.0", server_port=7860, share=False, theme=theme_obj, js=js_func)