Spaces:

VincentGOURBIN
/

MeetingNotes-Voxtral-Analysis

Running on Zero

App Files Files Community

MeetingNotes-Voxtral-Analysis / src /ui /spaces_interface.py

VincentGOURBIN

Upload folder using huggingface_hub

79a3d1b verified 2 months ago

raw

history blame contribute delete

20.1 kB

	"""
	Application Gradio pour l'analyse intelligente de réunions avec Voxtral - Version HF Spaces.

	Version adaptée pour Hugging Face Spaces avec :
	- Uniquement mode Transformers (MLX et API supprimés)
	- Modèles 8-bit uniquement
	- Support MCP natif
	- Zero GPU decorators
	"""

	import os
	import gradio as gr
	from dotenv import load_dotenv

	from ..ai.voxtral_spaces_analyzer import VoxtralSpacesAnalyzer
	from ..utils.zero_gpu_manager import ZeroGPUManager, gpu_inference

	# Import labels locally
	from .labels import UILabels

	# Charger les variables d'environnement depuis le fichier .env
	load_dotenv()

	# Global instances for MCP functions
	analyzer = None
	gpu_manager = None

	def initialize_components():
	"""Initialize global components for MCP functions."""
	global analyzer, gpu_manager
	if analyzer is None:
	analyzer = VoxtralSpacesAnalyzer()
	gpu_manager = ZeroGPUManager()

	# MCP Tools - exposed automatically by Gradio
	@gpu_inference(duration=300)
	def analyze_meeting_audio(
	audio_file: str,
	sections: list = None,
	model_name: str = "Voxtral-Mini-3B-2507"
	) -> dict:
	"""
	Analyze meeting audio and generate structured summaries using Voxtral AI.

	This function processes audio files to extract insights and generate
	structured meeting summaries with configurable sections.

	Args:
	audio_file: Path to the audio file to analyze (MP3, WAV, M4A, OGG)
	sections: List of analysis sections to include (executive_summary, action_plan, etc.)
	model_name: Voxtral model to use for analysis (Mini-3B or Small-24B)

	Returns:
	Dictionary containing analysis results, processing time, and metadata
	"""
	initialize_components()

	if not os.path.exists(audio_file):
	return {"error": "Audio file not found", "status": "failed"}

	try:
	import time
	start_time = time.time()

	# Set default sections if none provided
	if sections is None:
	sections = ["resume_executif", "discussions_principales", "plan_action"]

	# Switch model if different
	if analyzer.current_model_key != model_name:
	analyzer.switch_model(model_name)

	# Analyze audio (MCP function without progress bar)
	results = analyzer.analyze_audio_chunks(
	wav_path=audio_file,
	language="auto",
	selected_sections=sections
	)

	processing_time = time.time() - start_time

	return {
	"status": "completed",
	"analysis": results.get("transcription", "No analysis available"),
	"processing_time_seconds": processing_time,
	"model_used": model_name,
	"sections_analyzed": sections
	}

	except Exception as e:
	return {
	"status": "failed",
	"error": str(e),
	"processing_time_seconds": time.time() - start_time if 'start_time' in locals() else 0
	}
	finally:
	if gpu_manager:
	gpu_manager.cleanup_gpu()

	def get_available_sections() -> dict:
	"""Get available analysis sections for meeting summaries."""
	from ..ai.prompts_config import VoxtralPrompts
	return {
	"status": "success",
	"sections": VoxtralPrompts.AVAILABLE_SECTIONS,
	"total_sections": len(VoxtralPrompts.AVAILABLE_SECTIONS)
	}

	def get_meeting_templates() -> dict:
	"""Get pre-configured meeting analysis templates."""
	templates = {
	"action_meeting": {
	"name": "Action-Oriented Meeting",
	"description": "For meetings focused on decisions and action items",
	"recommended_sections": ["resume_executif", "discussions_principales", "plan_action", "decisions_prises", "prochaines_etapes"]
	},
	"info_meeting": {
	"name": "Information Meeting",
	"description": "For presentations and informational sessions",
	"recommended_sections": ["resume_executif", "sujets_principaux", "points_importants", "questions_discussions", "elements_suivi"]
	}
	}
	return {"status": "success", "templates": templates, "total_templates": len(templates)}

	# Handlers adaptés pour HF Spaces
	def handle_input_mode_change(input_mode):
	"""Gestion du changement de mode d'entrée."""
	if input_mode == UILabels.INPUT_MODE_AUDIO:
	return gr.update(visible=True), gr.update(visible=False)
	else:
	return gr.update(visible=False), gr.update(visible=True)

	def extract_audio_from_video(video_file, language):
	"""Extraction audio depuis vidéo (placeholder pour HF Spaces)."""
	if video_file is None:
	return None, gr.update(visible=True), gr.update(visible=False), UILabels.INPUT_MODE_AUDIO, language

	# Pour HF Spaces, on assume que le processing vidéo sera fait côté client
	# ou qu'on accepte déjà des fichiers audio
	return video_file, gr.update(visible=True), gr.update(visible=False), UILabels.INPUT_MODE_AUDIO, language


	@gpu_inference(duration=300)
	def handle_direct_transcription(
	audio_file, hf_token, language, transcription_mode, model_key,
	selected_sections, start_trim, end_trim, progress=gr.Progress()
	):
	"""Gestion de l'analyse directe adaptée pour HF Spaces."""
	initialize_components()

	if audio_file is None:
	return "", "❌ Veuillez d'abord télécharger un fichier audio."

	try:
	# Extraire le nom du modèle depuis transcription_mode
	if "Mini" in transcription_mode:
	model_name = "Voxtral-Mini-3B-2507"
	else:
	model_name = "Voxtral-Small-24B-2507"

	# Configurer l'analyseur
	if analyzer.current_model_key != model_name:
	analyzer.switch_model(model_name)

	# Setup progress callback
	def progress_callback(progress_ratio, message):
	progress(progress_ratio, desc=message)

	# Lancer l'analyse (chunk duration automatique selon le modèle)
	results = analyzer.analyze_audio_chunks(
	wav_path=audio_file,
	language="auto",
	selected_sections=selected_sections,
	start_trim=start_trim,
	end_trim=end_trim,
	progress_callback=progress_callback
	)

	return "", results.get("transcription", "Aucune analyse disponible")

	except Exception as e:
	error_msg = f"❌ Erreur lors de l'analyse: {str(e)}"
	return "", error_msg
	finally:
	if gpu_manager:
	gpu_manager.cleanup_gpu()

	def create_spaces_interface():
	"""
	Point d'entrée principal pour l'interface HF Spaces.

	Interface identique au projet original mais simplifiée :
	- Seul mode Transformers (pas MLX/API)
	- Modèles pré-quantisés uniquement
	- Support MCP natif
	"""
	# Initialize components
	initialize_components()

	# Récupérer le token Hugging Face depuis les variables d'environnement
	hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
	if hf_token is None:
	print("⚠️ Warning: HF_TOKEN environment variable not found")

	# Configuration du thème Glass personnalisé (identique à l'original)
	custom_glass_theme = gr.themes.Glass(
	primary_hue=gr.themes.colors.blue,
	secondary_hue=gr.themes.colors.gray,
	text_size=gr.themes.sizes.text_md,
	spacing_size=gr.themes.sizes.spacing_md,
	radius_size=gr.themes.sizes.radius_md
	)

	# CSS personnalisé pour l'application
	custom_css = """
	.gradio-container {
	max-width: 1200px !important;
	margin: 0 auto !important;
	}
	.main-header {
	text-align: center;
	margin-bottom: 30px;
	padding: 20px;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	border-radius: 15px;
	color: white;
	box-shadow: 0 8px 32px rgba(31, 38, 135, 0.37);
	}
	.processing-section {
	background: rgba(255, 255, 255, 0.1);
	border-radius: 10px;
	padding: 20px;
	margin: 15px 0;
	border: 1px solid rgba(255, 255, 255, 0.2);
	backdrop-filter: blur(10px);
	}
	.results-section {
	margin-top: 25px;
	}
	"""

	with gr.Blocks(
	title="MeetingNotes - AI Analysis with Voxtral",
	theme=custom_glass_theme,
	css=custom_css
	) as demo:
	# Main header with style (identique à l'original)
	with gr.Column(elem_classes="main-header"):
	gr.Markdown(
	f"""
	# {UILabels.MAIN_TITLE}
	{UILabels.MAIN_SUBTITLE}
	{UILabels.MAIN_DESCRIPTION}
	""",
	elem_classes="header-content"
	)

	# Processing mode section (SIMPLIFIÉ - seulement Transformers)
	with gr.Column(elem_classes="processing-section"):
	gr.Markdown("## 🔧 Model Configuration")

	# Model selection (modèles pré-quantisés)
	local_model_choice = gr.Radio(
	choices=[UILabels.MODEL_MINI, UILabels.MODEL_SMALL],
	value=UILabels.MODEL_MINI,
	label="Voxtral Model Selection"
	)

	# Information about the models
	gr.Markdown("""
	📋 About this HF Spaces version:
	- Uses standard Mistral Voxtral models optimized for Zero GPU
	- Mini Model: [Voxtral-Mini-3B-2507](https://huggingface.co/mistralai/Voxtral-Mini-3B-2507) - Faster processing, lower memory usage
	- Small Model: [Voxtral-Small-24B-2507](https://huggingface.co/mistralai/Voxtral-Small-24B-2507) - Higher quality analysis, more detailed summaries
	- Chunk duration automatically optimized: 15min for Mini, 10min for Small

	🔗 Complete version available:
	For local processing (MLX/Transformers), API modes, and speaker diarization, check the full version on [GitHub](https://github.com/VincentGourbin/meetingnotes)
	""")

	# Input mode selection (identique à l'original)
	with gr.Column(elem_classes="processing-section"):
	gr.Markdown(UILabels.INPUT_MODE_TITLE)

	input_mode = gr.Radio(
	choices=[UILabels.INPUT_MODE_AUDIO, UILabels.INPUT_MODE_VIDEO],
	value=UILabels.INPUT_MODE_AUDIO,
	label=UILabels.INPUT_MODE_LABEL
	)

	# Section Audio (mode par défaut) - identique à l'original
	with gr.Column(elem_classes="processing-section") as audio_section:
	gr.Markdown(UILabels.AUDIO_MODE_TITLE)

	audio_input = gr.Audio(
	label=UILabels.AUDIO_INPUT_LABEL,
	type="filepath",
	show_label=True,
	interactive=True
	)

	# Section Vidéo (cachée par défaut) - identique à l'original
	with gr.Column(elem_classes="processing-section", visible=False) as video_section:
	gr.Markdown(UILabels.VIDEO_MODE_TITLE)

	video_input = gr.File(
	label=UILabels.VIDEO_INPUT_LABEL,
	file_types=["video"]
	)

	btn_extract_audio = gr.Button(
	UILabels.EXTRACT_AUDIO_BUTTON,
	variant="secondary",
	size="lg"
	)

	# Section options de trim (identique à l'original)
	with gr.Column(elem_classes="processing-section"):
	with gr.Accordion(UILabels.TRIM_OPTIONS_TITLE, open=False):
	with gr.Row():
	start_trim_input = gr.Number(
	label=UILabels.START_TRIM_LABEL,
	value=0,
	minimum=0,
	maximum=3600
	)
	end_trim_input = gr.Number(
	label=UILabels.END_TRIM_LABEL,
	value=0,
	minimum=0,
	maximum=3600
	)


	# Section d'analyse principale (identique à l'original)
	with gr.Column(elem_classes="processing-section"):
	gr.Markdown(UILabels.MAIN_ANALYSIS_TITLE)
	gr.Markdown(UILabels.MAIN_ANALYSIS_DESCRIPTION)
	gr.Markdown("Chunk duration is automatically optimized: 15min for Mini, 10min for Small (Zero GPU optimization)")

	# Configuration des sections de résumé
	gr.Markdown(UILabels.SUMMARY_SECTIONS_TITLE)
	gr.Markdown(UILabels.SUMMARY_SECTIONS_DESCRIPTION)

	# Boutons de présélection rapide
	with gr.Row():
	btn_preset_action = gr.Button(UILabels.PRESET_ACTION_BUTTON, variant="secondary", size="sm")
	btn_preset_info = gr.Button(UILabels.PRESET_INFO_BUTTON, variant="secondary", size="sm")
	btn_preset_complet = gr.Button(UILabels.PRESET_COMPLETE_BUTTON, variant="secondary", size="sm")

	with gr.Row():
	with gr.Column():
	gr.Markdown(UILabels.ACTION_SECTIONS_TITLE)
	section_resume_executif = gr.Checkbox(label=UILabels.SECTION_EXECUTIVE_SUMMARY, value=True)
	section_discussions = gr.Checkbox(label=UILabels.SECTION_MAIN_DISCUSSIONS, value=True)
	section_plan_action = gr.Checkbox(label=UILabels.SECTION_ACTION_PLAN, value=True)
	section_decisions = gr.Checkbox(label=UILabels.SECTION_DECISIONS, value=True)
	section_prochaines_etapes = gr.Checkbox(label=UILabels.SECTION_NEXT_STEPS, value=True)

	with gr.Column():
	gr.Markdown(UILabels.INFO_SECTIONS_TITLE)
	section_sujets_principaux = gr.Checkbox(label=UILabels.SECTION_MAIN_TOPICS, value=False)
	section_points_importants = gr.Checkbox(label=UILabels.SECTION_KEY_POINTS, value=False)
	section_questions = gr.Checkbox(label=UILabels.SECTION_QUESTIONS, value=False)
	section_elements_suivi = gr.Checkbox(label=UILabels.SECTION_FOLLOW_UP, value=False)

	btn_direct_transcribe = gr.Button(
	UILabels.ANALYZE_BUTTON,
	variant="primary",
	size="lg"
	)

	# Section résultats (identique à l'original)
	with gr.Column(elem_classes="results-section"):
	gr.Markdown(UILabels.RESULTS_TITLE)

	final_summary_output = gr.Markdown(
	value=UILabels.RESULTS_PLACEHOLDER,
	label=UILabels.RESULTS_LABEL,
	height=500
	)

	# Event handlers (adaptés pour HF Spaces)

	# Gestion du changement de mode d'entrée
	input_mode.change(
	fn=handle_input_mode_change,
	inputs=[input_mode],
	outputs=[audio_section, video_section]
	)

	# Extraction audio depuis vidéo
	btn_extract_audio.click(
	fn=extract_audio_from_video,
	inputs=[video_input, gr.State("french")],
	outputs=[audio_input, audio_section, video_section, input_mode, gr.State("french")]
	)

	# Fonctions de présélection des sections (identiques à l'original)
	def preset_action():
	return (True, True, True, True, True, False, False, False, False)

	def preset_info():
	return (True, False, False, False, False, True, True, True, True)

	def preset_complet():
	return (True, True, True, True, True, True, True, True, True)

	# Gestion de l'analyse directe (adaptée pour Transformers uniquement)
	def handle_analysis_direct(
	audio_file, hf_token, language, local_model, start_trim, end_trim,
	s_resume, s_discussions, s_plan_action, s_decisions, s_prochaines_etapes,
	s_sujets_principaux, s_points_importants, s_questions, s_elements_suivi
	):
	# Mode Transformers uniquement (pré-quantisé 8-bit)
	transcription_mode = f"Transformers ({local_model} 8-bit)"
	model_key = local_model

	# Construire la liste des sections sélectionnées
	sections_checkboxes = [
	(s_resume, "resume_executif"),
	(s_discussions, "discussions_principales"),
	(s_plan_action, "plan_action"),
	(s_decisions, "decisions_prises"),
	(s_prochaines_etapes, "prochaines_etapes"),
	(s_sujets_principaux, "sujets_principaux"),
	(s_points_importants, "points_importants"),
	(s_questions, "questions_discussions"),
	(s_elements_suivi, "elements_suivi")
	]

	selected_sections = [section_key for is_selected, section_key in sections_checkboxes if is_selected]

	# Appeler la fonction d'analyse directe (chunk duration automatique)
	_, summary = handle_direct_transcription(
	audio_file, hf_token, language, transcription_mode,
	model_key, selected_sections, start_trim, end_trim
	)
	return summary

	# Événements de présélection (identiques à l'original)
	btn_preset_action.click(
	fn=preset_action,
	outputs=[
	section_resume_executif, section_discussions, section_plan_action,
	section_decisions, section_prochaines_etapes, section_sujets_principaux,
	section_points_importants, section_questions, section_elements_suivi
	]
	)

	btn_preset_info.click(
	fn=preset_info,
	outputs=[
	section_resume_executif, section_discussions, section_plan_action,
	section_decisions, section_prochaines_etapes, section_sujets_principaux,
	section_points_importants, section_questions, section_elements_suivi
	]
	)

	btn_preset_complet.click(
	fn=preset_complet,
	outputs=[
	section_resume_executif, section_discussions, section_plan_action,
	section_decisions, section_prochaines_etapes, section_sujets_principaux,
	section_points_importants, section_questions, section_elements_suivi
	]
	)

	# Analyse principale (adaptée pour HF Spaces)
	btn_direct_transcribe.click(
	fn=handle_analysis_direct,
	inputs=[
	audio_input,
	gr.State(value=hf_token),
	gr.State("french"),
	local_model_choice,
	start_trim_input,
	end_trim_input,
	section_resume_executif,
	section_discussions,
	section_plan_action,
	section_decisions,
	section_prochaines_etapes,
	section_sujets_principaux,
	section_points_importants,
	section_questions,
	section_elements_suivi
	],
	outputs=[final_summary_output]
	)


	# Footer (identique à l'original)
	with gr.Row():
	gr.Markdown(
	"""
	---
	MeetingNotes \| Powered by [Voxtral](https://mistral.ai/) \|
	🚀 Intelligent meeting analysis \| 💾 HF Spaces with Zero GPU
	""",
	elem_classes="footer-info"
	)

	# Retourner demo (thème et CSS déjà configurés dans gr.Blocks pour Gradio 6)
	return demo