Spaces:

MIP-Tech
/

Speach-To-Text

Sleeping

App Files Files Community

Speach-To-Text / scripts /app.py

MIP-Tech

Deploy to HF Spaces

0db822c 23 days ago

raw

history blame contribute delete

6.99 kB

	"""
	Gradio UI for Misr Italia Properties Speech-to-Text Pipeline
	"""
	import os
	import sys
	import json
	import logging
	from pathlib import Path

	# Add root to python path
	root = Path(__file__).parent.parent
	sys.path.insert(0, str(root))

	import gradio as gr
	from dotenv import load_dotenv

	from src.inference.transcribe import WhisperTranscriber
	from src.inference.analyze_call import CallAnalyzer, clean_transcript

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Load env variables
	load_dotenv(root / ".env")

	# Initialize models globally so they load on server startup instead of per-request
	logger.info("Loading Whisper Model...")
	DEFAULT_MODEL = "outputs/checkpoints/merged_model"
	model_path = str(root / DEFAULT_MODEL)
	if not Path(model_path).exists():
	model_path = "openai/whisper-large-v3"

	transcriber = WhisperTranscriber(model_path=model_path, device=None)

	logger.info("Initializing CallAnalyzer with OpenAI...")
	analyzer = None
	try:
	analyzer = CallAnalyzer()
	except Exception as e:
	logger.error("Failed to init CallAnalyzer: %s", e)


	def process_call(audio_file, enable_analysis):
	if not audio_file:
	return "No audio uploaded.", "No audio uploaded.", [None] 11

	# Run transcription (VAD + Whisper; OpenAI handles speaker separation)
	try:
	transcript = transcriber.transcribe(audio_file)
	except Exception as e:
	logger.error("Transcription error: %s", e)
	err = f"Transcription error: {str(e)}"
	return err, err, [None] 11

	# Analysis Defaults
	parsed = {
	"cleaned_transcript": transcript,
	"agent_name": "",
	"customer_name": "",
	"unit_number": [],
	"project_name": "",
	"department_mentioned": "",
	"call_type": "",
	"customer_satisfaction": 0,
	"is_urgent": False,
	"pain_points": [],
	"action_items_promised": [],
	"next_steps": []
	}

	# Keep a clean copy of the raw Whisper output before OpenAI touches it
	raw_transcript = transcript
	openai_transcript = transcript # fallback: same as raw if analysis disabled

	if enable_analysis and analyzer:
	try:
	analysis = analyzer.analyze(transcript)
	dump = analysis.model_dump()
	parsed.update(dump)
	openai_transcript = parsed.get("cleaned_transcript", transcript)
	except Exception as e:
	logger.error("OpenAI Analysis error: %s", e)
	parsed["agent_name"] = f"Error: {e}"
	openai_transcript = transcript

	return (
	raw_transcript,
	openai_transcript,
	parsed.get("agent_name"),
	parsed.get("customer_name"),
	", ".join(parsed.get("unit_number", [])) if isinstance(parsed.get("unit_number"), list) else str(parsed.get("unit_number", "")),
	parsed.get("project_name"),
	parsed.get("department_mentioned"),
	parsed.get("call_type"),
	parsed.get("customer_satisfaction"),
	parsed.get("is_urgent"),
	"\n".join([f"- {x}" for x in parsed.get("pain_points", [])]),
	"\n".join([f"- {x}" for x in parsed.get("action_items_promised", [])]),
	"\n".join([f"- {x}" for x in parsed.get("next_steps", [])])
	)


	def build_ui():
	theme = gr.themes.Monochrome(
	primary_hue="slate",
	neutral_hue="slate"
	)

	js_func = """
	function refresh() {
	const url = new URL(window.location);
	if (url.searchParams.get('__theme') !== 'dark') {
	url.searchParams.set('__theme', 'dark');
	window.location.href = url.href;
	}
	}
	"""

	with gr.Blocks(title="Misr Italia Properties - Call Analyzer") as demo:
	gr.Markdown("# 🏢 Misr Italia Properties - Call Center AI")
	gr.Markdown("Upload an audio recording of a customer call to automatically transcribe, perform intelligent speaker separation, and extract business intelligence via OpenAI GPT-4o-mini.")

	with gr.Row():
	with gr.Column(scale=1):
	audio_input = gr.Audio(type="filepath", label="Upload Call Recording (WAV/MP3)")
	with gr.Row():
	analyze_cb = gr.Checkbox(label="Enable OpenAI Analysis", value=True)

	submit_btn = gr.Button("Analyze Call", variant="primary")

	with gr.Tabs():
	with gr.TabItem("🎙️ Raw Whisper"):
	whisper_output = gr.Textbox(
	label="Raw Whisper Transcript",
	lines=18,
	placeholder="Raw output from Whisper will appear here…",
	)
	with gr.TabItem("✨ OpenAI Cleaned"):
	openai_output = gr.Textbox(
	label="OpenAI Cleaned Transcript (with speaker turns)",
	lines=18,
	placeholder="OpenAI-separated and corrected transcript will appear here…",
	)

	with gr.Column(scale=1):
	gr.Markdown("### 📊 Extracted Call Insights")
	with gr.Row():
	is_urgent_output = gr.Checkbox(label="🚨 IS URGENT")
	satisfaction_output = gr.Number(label="⭐ Customer Satisfaction (1-5)")
	call_type_output = gr.Textbox(label="📞 Call Type")
	with gr.Row():
	agent_name_output = gr.Textbox(label="Agent Name")
	customer_name_output = gr.Textbox(label="Customer Name")
	with gr.Row():
	project_name_output = gr.Textbox(label="Project Name")
	unit_number_output = gr.Textbox(label="Unit Number(s)")

	dept_output = gr.Textbox(label="Department Mentioned")

	pain_points_output = gr.Textbox(label="💥 Pain Points", lines=3)
	action_items_output = gr.Textbox(label="✅ Action Items", lines=3)
	next_steps_output = gr.Textbox(label="⏭️ Next Steps", lines=3)

	submit_btn.click(
	fn=process_call,
	inputs=[audio_input, analyze_cb],
	outputs=[
	whisper_output,
	openai_output,
	agent_name_output,
	customer_name_output,
	unit_number_output,
	project_name_output,
	dept_output,
	call_type_output,
	satisfaction_output,
	is_urgent_output,
	pain_points_output,
	action_items_output,
	next_steps_output
	]
	)

	return demo, theme, js_func

	if __name__ == "__main__":
	app, theme_obj, js_func = build_ui()
	app.launch(server_name="0.0.0.0", server_port=7860, share=False, theme=theme_obj, js=js_func)