MIP-Tech's picture
Deploy to HF Spaces
0db822c
"""
Gradio UI for Misr Italia Properties Speech-to-Text Pipeline
"""
import os
import sys
import json
import logging
from pathlib import Path
# Add root to python path
root = Path(__file__).parent.parent
sys.path.insert(0, str(root))
import gradio as gr
from dotenv import load_dotenv
from src.inference.transcribe import WhisperTranscriber
from src.inference.analyze_call import CallAnalyzer, clean_transcript
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Load env variables
load_dotenv(root / ".env")
# Initialize models globally so they load on server startup instead of per-request
logger.info("Loading Whisper Model...")
DEFAULT_MODEL = "outputs/checkpoints/merged_model"
model_path = str(root / DEFAULT_MODEL)
if not Path(model_path).exists():
model_path = "openai/whisper-large-v3"
transcriber = WhisperTranscriber(model_path=model_path, device=None)
logger.info("Initializing CallAnalyzer with OpenAI...")
analyzer = None
try:
analyzer = CallAnalyzer()
except Exception as e:
logger.error("Failed to init CallAnalyzer: %s", e)
def process_call(audio_file, enable_analysis):
if not audio_file:
return "No audio uploaded.", "No audio uploaded.", *[None] * 11
# Run transcription (VAD + Whisper; OpenAI handles speaker separation)
try:
transcript = transcriber.transcribe(audio_file)
except Exception as e:
logger.error("Transcription error: %s", e)
err = f"Transcription error: {str(e)}"
return err, err, *[None] * 11
# Analysis Defaults
parsed = {
"cleaned_transcript": transcript,
"agent_name": "",
"customer_name": "",
"unit_number": [],
"project_name": "",
"department_mentioned": "",
"call_type": "",
"customer_satisfaction": 0,
"is_urgent": False,
"pain_points": [],
"action_items_promised": [],
"next_steps": []
}
# Keep a clean copy of the raw Whisper output before OpenAI touches it
raw_transcript = transcript
openai_transcript = transcript # fallback: same as raw if analysis disabled
if enable_analysis and analyzer:
try:
analysis = analyzer.analyze(transcript)
dump = analysis.model_dump()
parsed.update(dump)
openai_transcript = parsed.get("cleaned_transcript", transcript)
except Exception as e:
logger.error("OpenAI Analysis error: %s", e)
parsed["agent_name"] = f"Error: {e}"
openai_transcript = transcript
return (
raw_transcript,
openai_transcript,
parsed.get("agent_name"),
parsed.get("customer_name"),
", ".join(parsed.get("unit_number", [])) if isinstance(parsed.get("unit_number"), list) else str(parsed.get("unit_number", "")),
parsed.get("project_name"),
parsed.get("department_mentioned"),
parsed.get("call_type"),
parsed.get("customer_satisfaction"),
parsed.get("is_urgent"),
"\n".join([f"- {x}" for x in parsed.get("pain_points", [])]),
"\n".join([f"- {x}" for x in parsed.get("action_items_promised", [])]),
"\n".join([f"- {x}" for x in parsed.get("next_steps", [])])
)
def build_ui():
theme = gr.themes.Monochrome(
primary_hue="slate",
neutral_hue="slate"
)
js_func = """
function refresh() {
const url = new URL(window.location);
if (url.searchParams.get('__theme') !== 'dark') {
url.searchParams.set('__theme', 'dark');
window.location.href = url.href;
}
}
"""
with gr.Blocks(title="Misr Italia Properties - Call Analyzer") as demo:
gr.Markdown("# 🏢 Misr Italia Properties - Call Center AI")
gr.Markdown("Upload an audio recording of a customer call to automatically transcribe, perform intelligent speaker separation, and extract business intelligence via OpenAI GPT-4o-mini.")
with gr.Row():
with gr.Column(scale=1):
audio_input = gr.Audio(type="filepath", label="Upload Call Recording (WAV/MP3)")
with gr.Row():
analyze_cb = gr.Checkbox(label="Enable OpenAI Analysis", value=True)
submit_btn = gr.Button("Analyze Call", variant="primary")
with gr.Tabs():
with gr.TabItem("🎙️ Raw Whisper"):
whisper_output = gr.Textbox(
label="Raw Whisper Transcript",
lines=18,
placeholder="Raw output from Whisper will appear here…",
)
with gr.TabItem("✨ OpenAI Cleaned"):
openai_output = gr.Textbox(
label="OpenAI Cleaned Transcript (with speaker turns)",
lines=18,
placeholder="OpenAI-separated and corrected transcript will appear here…",
)
with gr.Column(scale=1):
gr.Markdown("### 📊 Extracted Call Insights")
with gr.Row():
is_urgent_output = gr.Checkbox(label="🚨 IS URGENT")
satisfaction_output = gr.Number(label="⭐ Customer Satisfaction (1-5)")
call_type_output = gr.Textbox(label="📞 Call Type")
with gr.Row():
agent_name_output = gr.Textbox(label="Agent Name")
customer_name_output = gr.Textbox(label="Customer Name")
with gr.Row():
project_name_output = gr.Textbox(label="Project Name")
unit_number_output = gr.Textbox(label="Unit Number(s)")
dept_output = gr.Textbox(label="Department Mentioned")
pain_points_output = gr.Textbox(label="💥 Pain Points", lines=3)
action_items_output = gr.Textbox(label="✅ Action Items", lines=3)
next_steps_output = gr.Textbox(label="⏭️ Next Steps", lines=3)
submit_btn.click(
fn=process_call,
inputs=[audio_input, analyze_cb],
outputs=[
whisper_output,
openai_output,
agent_name_output,
customer_name_output,
unit_number_output,
project_name_output,
dept_output,
call_type_output,
satisfaction_output,
is_urgent_output,
pain_points_output,
action_items_output,
next_steps_output
]
)
return demo, theme, js_func
if __name__ == "__main__":
app, theme_obj, js_func = build_ui()
app.launch(server_name="0.0.0.0", server_port=7860, share=False, theme=theme_obj, js=js_func)