Spaces:
Sleeping
Sleeping
File size: 6,992 Bytes
0db822c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 | """
Gradio UI for Misr Italia Properties Speech-to-Text Pipeline
"""
import os
import sys
import json
import logging
from pathlib import Path
# Add root to python path
root = Path(__file__).parent.parent
sys.path.insert(0, str(root))
import gradio as gr
from dotenv import load_dotenv
from src.inference.transcribe import WhisperTranscriber
from src.inference.analyze_call import CallAnalyzer, clean_transcript
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Load env variables
load_dotenv(root / ".env")
# Initialize models globally so they load on server startup instead of per-request
logger.info("Loading Whisper Model...")
DEFAULT_MODEL = "outputs/checkpoints/merged_model"
model_path = str(root / DEFAULT_MODEL)
if not Path(model_path).exists():
model_path = "openai/whisper-large-v3"
transcriber = WhisperTranscriber(model_path=model_path, device=None)
logger.info("Initializing CallAnalyzer with OpenAI...")
analyzer = None
try:
analyzer = CallAnalyzer()
except Exception as e:
logger.error("Failed to init CallAnalyzer: %s", e)
def process_call(audio_file, enable_analysis):
if not audio_file:
return "No audio uploaded.", "No audio uploaded.", *[None] * 11
# Run transcription (VAD + Whisper; OpenAI handles speaker separation)
try:
transcript = transcriber.transcribe(audio_file)
except Exception as e:
logger.error("Transcription error: %s", e)
err = f"Transcription error: {str(e)}"
return err, err, *[None] * 11
# Analysis Defaults
parsed = {
"cleaned_transcript": transcript,
"agent_name": "",
"customer_name": "",
"unit_number": [],
"project_name": "",
"department_mentioned": "",
"call_type": "",
"customer_satisfaction": 0,
"is_urgent": False,
"pain_points": [],
"action_items_promised": [],
"next_steps": []
}
# Keep a clean copy of the raw Whisper output before OpenAI touches it
raw_transcript = transcript
openai_transcript = transcript # fallback: same as raw if analysis disabled
if enable_analysis and analyzer:
try:
analysis = analyzer.analyze(transcript)
dump = analysis.model_dump()
parsed.update(dump)
openai_transcript = parsed.get("cleaned_transcript", transcript)
except Exception as e:
logger.error("OpenAI Analysis error: %s", e)
parsed["agent_name"] = f"Error: {e}"
openai_transcript = transcript
return (
raw_transcript,
openai_transcript,
parsed.get("agent_name"),
parsed.get("customer_name"),
", ".join(parsed.get("unit_number", [])) if isinstance(parsed.get("unit_number"), list) else str(parsed.get("unit_number", "")),
parsed.get("project_name"),
parsed.get("department_mentioned"),
parsed.get("call_type"),
parsed.get("customer_satisfaction"),
parsed.get("is_urgent"),
"\n".join([f"- {x}" for x in parsed.get("pain_points", [])]),
"\n".join([f"- {x}" for x in parsed.get("action_items_promised", [])]),
"\n".join([f"- {x}" for x in parsed.get("next_steps", [])])
)
def build_ui():
theme = gr.themes.Monochrome(
primary_hue="slate",
neutral_hue="slate"
)
js_func = """
function refresh() {
const url = new URL(window.location);
if (url.searchParams.get('__theme') !== 'dark') {
url.searchParams.set('__theme', 'dark');
window.location.href = url.href;
}
}
"""
with gr.Blocks(title="Misr Italia Properties - Call Analyzer") as demo:
gr.Markdown("# 🏢 Misr Italia Properties - Call Center AI")
gr.Markdown("Upload an audio recording of a customer call to automatically transcribe, perform intelligent speaker separation, and extract business intelligence via OpenAI GPT-4o-mini.")
with gr.Row():
with gr.Column(scale=1):
audio_input = gr.Audio(type="filepath", label="Upload Call Recording (WAV/MP3)")
with gr.Row():
analyze_cb = gr.Checkbox(label="Enable OpenAI Analysis", value=True)
submit_btn = gr.Button("Analyze Call", variant="primary")
with gr.Tabs():
with gr.TabItem("🎙️ Raw Whisper"):
whisper_output = gr.Textbox(
label="Raw Whisper Transcript",
lines=18,
placeholder="Raw output from Whisper will appear here…",
)
with gr.TabItem("✨ OpenAI Cleaned"):
openai_output = gr.Textbox(
label="OpenAI Cleaned Transcript (with speaker turns)",
lines=18,
placeholder="OpenAI-separated and corrected transcript will appear here…",
)
with gr.Column(scale=1):
gr.Markdown("### 📊 Extracted Call Insights")
with gr.Row():
is_urgent_output = gr.Checkbox(label="🚨 IS URGENT")
satisfaction_output = gr.Number(label="⭐ Customer Satisfaction (1-5)")
call_type_output = gr.Textbox(label="📞 Call Type")
with gr.Row():
agent_name_output = gr.Textbox(label="Agent Name")
customer_name_output = gr.Textbox(label="Customer Name")
with gr.Row():
project_name_output = gr.Textbox(label="Project Name")
unit_number_output = gr.Textbox(label="Unit Number(s)")
dept_output = gr.Textbox(label="Department Mentioned")
pain_points_output = gr.Textbox(label="💥 Pain Points", lines=3)
action_items_output = gr.Textbox(label="✅ Action Items", lines=3)
next_steps_output = gr.Textbox(label="⏭️ Next Steps", lines=3)
submit_btn.click(
fn=process_call,
inputs=[audio_input, analyze_cb],
outputs=[
whisper_output,
openai_output,
agent_name_output,
customer_name_output,
unit_number_output,
project_name_output,
dept_output,
call_type_output,
satisfaction_output,
is_urgent_output,
pain_points_output,
action_items_output,
next_steps_output
]
)
return demo, theme, js_func
if __name__ == "__main__":
app, theme_obj, js_func = build_ui()
app.launch(server_name="0.0.0.0", server_port=7860, share=False, theme=theme_obj, js=js_func)
|