Spaces:
Running
on
Zero
Running
on
Zero
| """ | |
| Application Gradio pour l'analyse intelligente de réunions avec Voxtral - Version HF Spaces. | |
| Version adaptée pour Hugging Face Spaces avec : | |
| - Uniquement mode Transformers (MLX et API supprimés) | |
| - Modèles 8-bit uniquement | |
| - Support MCP natif | |
| - Zero GPU decorators | |
| """ | |
| import os | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| from ..ai.voxtral_spaces_analyzer import VoxtralSpacesAnalyzer | |
| from ..utils.zero_gpu_manager import ZeroGPUManager, gpu_inference | |
| # Import labels locally | |
| from .labels import UILabels | |
| # Charger les variables d'environnement depuis le fichier .env | |
| load_dotenv() | |
| # Global instances for MCP functions | |
| analyzer = None | |
| gpu_manager = None | |
| def initialize_components(): | |
| """Initialize global components for MCP functions.""" | |
| global analyzer, gpu_manager | |
| if analyzer is None: | |
| analyzer = VoxtralSpacesAnalyzer() | |
| gpu_manager = ZeroGPUManager() | |
| # MCP Tools - exposed automatically by Gradio | |
| def analyze_meeting_audio( | |
| audio_file: str, | |
| sections: list = None, | |
| model_name: str = "Voxtral-Mini-3B-2507" | |
| ) -> dict: | |
| """ | |
| Analyze meeting audio and generate structured summaries using Voxtral AI. | |
| This function processes audio files to extract insights and generate | |
| structured meeting summaries with configurable sections. | |
| Args: | |
| audio_file: Path to the audio file to analyze (MP3, WAV, M4A, OGG) | |
| sections: List of analysis sections to include (executive_summary, action_plan, etc.) | |
| model_name: Voxtral model to use for analysis (Mini-3B or Small-24B) | |
| Returns: | |
| Dictionary containing analysis results, processing time, and metadata | |
| """ | |
| initialize_components() | |
| if not os.path.exists(audio_file): | |
| return {"error": "Audio file not found", "status": "failed"} | |
| try: | |
| import time | |
| start_time = time.time() | |
| # Set default sections if none provided | |
| if sections is None: | |
| sections = ["resume_executif", "discussions_principales", "plan_action"] | |
| # Switch model if different | |
| if analyzer.current_model_key != model_name: | |
| analyzer.switch_model(model_name) | |
| # Analyze audio (MCP function without progress bar) | |
| results = analyzer.analyze_audio_chunks( | |
| wav_path=audio_file, | |
| language="auto", | |
| selected_sections=sections | |
| ) | |
| processing_time = time.time() - start_time | |
| return { | |
| "status": "completed", | |
| "analysis": results.get("transcription", "No analysis available"), | |
| "processing_time_seconds": processing_time, | |
| "model_used": model_name, | |
| "sections_analyzed": sections | |
| } | |
| except Exception as e: | |
| return { | |
| "status": "failed", | |
| "error": str(e), | |
| "processing_time_seconds": time.time() - start_time if 'start_time' in locals() else 0 | |
| } | |
| finally: | |
| if gpu_manager: | |
| gpu_manager.cleanup_gpu() | |
| def get_available_sections() -> dict: | |
| """Get available analysis sections for meeting summaries.""" | |
| from ..ai.prompts_config import VoxtralPrompts | |
| return { | |
| "status": "success", | |
| "sections": VoxtralPrompts.AVAILABLE_SECTIONS, | |
| "total_sections": len(VoxtralPrompts.AVAILABLE_SECTIONS) | |
| } | |
| def get_meeting_templates() -> dict: | |
| """Get pre-configured meeting analysis templates.""" | |
| templates = { | |
| "action_meeting": { | |
| "name": "Action-Oriented Meeting", | |
| "description": "For meetings focused on decisions and action items", | |
| "recommended_sections": ["resume_executif", "discussions_principales", "plan_action", "decisions_prises", "prochaines_etapes"] | |
| }, | |
| "info_meeting": { | |
| "name": "Information Meeting", | |
| "description": "For presentations and informational sessions", | |
| "recommended_sections": ["resume_executif", "sujets_principaux", "points_importants", "questions_discussions", "elements_suivi"] | |
| } | |
| } | |
| return {"status": "success", "templates": templates, "total_templates": len(templates)} | |
| # Handlers adaptés pour HF Spaces | |
| def handle_input_mode_change(input_mode): | |
| """Gestion du changement de mode d'entrée.""" | |
| if input_mode == UILabels.INPUT_MODE_AUDIO: | |
| return gr.update(visible=True), gr.update(visible=False) | |
| else: | |
| return gr.update(visible=False), gr.update(visible=True) | |
| def extract_audio_from_video(video_file, language): | |
| """Extraction audio depuis vidéo (placeholder pour HF Spaces).""" | |
| if video_file is None: | |
| return None, gr.update(visible=True), gr.update(visible=False), UILabels.INPUT_MODE_AUDIO, language | |
| # Pour HF Spaces, on assume que le processing vidéo sera fait côté client | |
| # ou qu'on accepte déjà des fichiers audio | |
| return video_file, gr.update(visible=True), gr.update(visible=False), UILabels.INPUT_MODE_AUDIO, language | |
| def handle_direct_transcription( | |
| audio_file, hf_token, language, transcription_mode, model_key, | |
| selected_sections, start_trim, end_trim, progress=gr.Progress() | |
| ): | |
| """Gestion de l'analyse directe adaptée pour HF Spaces.""" | |
| initialize_components() | |
| if audio_file is None: | |
| return "", "❌ Veuillez d'abord télécharger un fichier audio." | |
| try: | |
| # Extraire le nom du modèle depuis transcription_mode | |
| if "Mini" in transcription_mode: | |
| model_name = "Voxtral-Mini-3B-2507" | |
| else: | |
| model_name = "Voxtral-Small-24B-2507" | |
| # Configurer l'analyseur | |
| if analyzer.current_model_key != model_name: | |
| analyzer.switch_model(model_name) | |
| # Setup progress callback | |
| def progress_callback(progress_ratio, message): | |
| progress(progress_ratio, desc=message) | |
| # Lancer l'analyse (chunk duration automatique selon le modèle) | |
| results = analyzer.analyze_audio_chunks( | |
| wav_path=audio_file, | |
| language="auto", | |
| selected_sections=selected_sections, | |
| start_trim=start_trim, | |
| end_trim=end_trim, | |
| progress_callback=progress_callback | |
| ) | |
| return "", results.get("transcription", "Aucune analyse disponible") | |
| except Exception as e: | |
| error_msg = f"❌ Erreur lors de l'analyse: {str(e)}" | |
| return "", error_msg | |
| finally: | |
| if gpu_manager: | |
| gpu_manager.cleanup_gpu() | |
| def create_spaces_interface(): | |
| """ | |
| Point d'entrée principal pour l'interface HF Spaces. | |
| Interface identique au projet original mais simplifiée : | |
| - Seul mode Transformers (pas MLX/API) | |
| - Modèles pré-quantisés uniquement | |
| - Support MCP natif | |
| """ | |
| # Initialize components | |
| initialize_components() | |
| # Récupérer le token Hugging Face depuis les variables d'environnement | |
| hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") | |
| if hf_token is None: | |
| print("⚠️ Warning: HF_TOKEN environment variable not found") | |
| # Configuration du thème Glass personnalisé (identique à l'original) | |
| custom_glass_theme = gr.themes.Glass( | |
| primary_hue=gr.themes.colors.blue, | |
| secondary_hue=gr.themes.colors.gray, | |
| text_size=gr.themes.sizes.text_md, | |
| spacing_size=gr.themes.sizes.spacing_md, | |
| radius_size=gr.themes.sizes.radius_md | |
| ) | |
| # CSS personnalisé pour l'application | |
| custom_css = """ | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| margin: 0 auto !important; | |
| } | |
| .main-header { | |
| text-align: center; | |
| margin-bottom: 30px; | |
| padding: 20px; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| border-radius: 15px; | |
| color: white; | |
| box-shadow: 0 8px 32px rgba(31, 38, 135, 0.37); | |
| } | |
| .processing-section { | |
| background: rgba(255, 255, 255, 0.1); | |
| border-radius: 10px; | |
| padding: 20px; | |
| margin: 15px 0; | |
| border: 1px solid rgba(255, 255, 255, 0.2); | |
| backdrop-filter: blur(10px); | |
| } | |
| .results-section { | |
| margin-top: 25px; | |
| } | |
| """ | |
| with gr.Blocks( | |
| title="MeetingNotes - AI Analysis with Voxtral", | |
| theme=custom_glass_theme, | |
| css=custom_css | |
| ) as demo: | |
| # Main header with style (identique à l'original) | |
| with gr.Column(elem_classes="main-header"): | |
| gr.Markdown( | |
| f""" | |
| # {UILabels.MAIN_TITLE} | |
| {UILabels.MAIN_SUBTITLE} | |
| {UILabels.MAIN_DESCRIPTION} | |
| """, | |
| elem_classes="header-content" | |
| ) | |
| # Processing mode section (SIMPLIFIÉ - seulement Transformers) | |
| with gr.Column(elem_classes="processing-section"): | |
| gr.Markdown("## 🔧 Model Configuration") | |
| # Model selection (modèles pré-quantisés) | |
| local_model_choice = gr.Radio( | |
| choices=[UILabels.MODEL_MINI, UILabels.MODEL_SMALL], | |
| value=UILabels.MODEL_MINI, | |
| label="Voxtral Model Selection" | |
| ) | |
| # Information about the models | |
| gr.Markdown(""" | |
| **📋 About this HF Spaces version:** | |
| - Uses standard Mistral Voxtral models optimized for Zero GPU | |
| - **Mini Model**: [Voxtral-Mini-3B-2507](https://huggingface.co/mistralai/Voxtral-Mini-3B-2507) - Faster processing, lower memory usage | |
| - **Small Model**: [Voxtral-Small-24B-2507](https://huggingface.co/mistralai/Voxtral-Small-24B-2507) - Higher quality analysis, more detailed summaries | |
| - Chunk duration automatically optimized: 15min for Mini, 10min for Small | |
| **🔗 Complete version available:** | |
| For local processing (MLX/Transformers), API modes, and **speaker diarization**, check the full version on [GitHub](https://github.com/VincentGourbin/meetingnotes) | |
| """) | |
| # Input mode selection (identique à l'original) | |
| with gr.Column(elem_classes="processing-section"): | |
| gr.Markdown(UILabels.INPUT_MODE_TITLE) | |
| input_mode = gr.Radio( | |
| choices=[UILabels.INPUT_MODE_AUDIO, UILabels.INPUT_MODE_VIDEO], | |
| value=UILabels.INPUT_MODE_AUDIO, | |
| label=UILabels.INPUT_MODE_LABEL | |
| ) | |
| # Section Audio (mode par défaut) - identique à l'original | |
| with gr.Column(elem_classes="processing-section") as audio_section: | |
| gr.Markdown(UILabels.AUDIO_MODE_TITLE) | |
| audio_input = gr.Audio( | |
| label=UILabels.AUDIO_INPUT_LABEL, | |
| type="filepath", | |
| show_label=True, | |
| interactive=True | |
| ) | |
| # Section Vidéo (cachée par défaut) - identique à l'original | |
| with gr.Column(elem_classes="processing-section", visible=False) as video_section: | |
| gr.Markdown(UILabels.VIDEO_MODE_TITLE) | |
| video_input = gr.File( | |
| label=UILabels.VIDEO_INPUT_LABEL, | |
| file_types=["video"] | |
| ) | |
| btn_extract_audio = gr.Button( | |
| UILabels.EXTRACT_AUDIO_BUTTON, | |
| variant="secondary", | |
| size="lg" | |
| ) | |
| # Section options de trim (identique à l'original) | |
| with gr.Column(elem_classes="processing-section"): | |
| with gr.Accordion(UILabels.TRIM_OPTIONS_TITLE, open=False): | |
| with gr.Row(): | |
| start_trim_input = gr.Number( | |
| label=UILabels.START_TRIM_LABEL, | |
| value=0, | |
| minimum=0, | |
| maximum=3600 | |
| ) | |
| end_trim_input = gr.Number( | |
| label=UILabels.END_TRIM_LABEL, | |
| value=0, | |
| minimum=0, | |
| maximum=3600 | |
| ) | |
| # Section d'analyse principale (identique à l'original) | |
| with gr.Column(elem_classes="processing-section"): | |
| gr.Markdown(UILabels.MAIN_ANALYSIS_TITLE) | |
| gr.Markdown(UILabels.MAIN_ANALYSIS_DESCRIPTION) | |
| gr.Markdown("*Chunk duration is automatically optimized: 15min for Mini, 10min for Small (Zero GPU optimization)*") | |
| # Configuration des sections de résumé | |
| gr.Markdown(UILabels.SUMMARY_SECTIONS_TITLE) | |
| gr.Markdown(UILabels.SUMMARY_SECTIONS_DESCRIPTION) | |
| # Boutons de présélection rapide | |
| with gr.Row(): | |
| btn_preset_action = gr.Button(UILabels.PRESET_ACTION_BUTTON, variant="secondary", size="sm") | |
| btn_preset_info = gr.Button(UILabels.PRESET_INFO_BUTTON, variant="secondary", size="sm") | |
| btn_preset_complet = gr.Button(UILabels.PRESET_COMPLETE_BUTTON, variant="secondary", size="sm") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown(UILabels.ACTION_SECTIONS_TITLE) | |
| section_resume_executif = gr.Checkbox(label=UILabels.SECTION_EXECUTIVE_SUMMARY, value=True) | |
| section_discussions = gr.Checkbox(label=UILabels.SECTION_MAIN_DISCUSSIONS, value=True) | |
| section_plan_action = gr.Checkbox(label=UILabels.SECTION_ACTION_PLAN, value=True) | |
| section_decisions = gr.Checkbox(label=UILabels.SECTION_DECISIONS, value=True) | |
| section_prochaines_etapes = gr.Checkbox(label=UILabels.SECTION_NEXT_STEPS, value=True) | |
| with gr.Column(): | |
| gr.Markdown(UILabels.INFO_SECTIONS_TITLE) | |
| section_sujets_principaux = gr.Checkbox(label=UILabels.SECTION_MAIN_TOPICS, value=False) | |
| section_points_importants = gr.Checkbox(label=UILabels.SECTION_KEY_POINTS, value=False) | |
| section_questions = gr.Checkbox(label=UILabels.SECTION_QUESTIONS, value=False) | |
| section_elements_suivi = gr.Checkbox(label=UILabels.SECTION_FOLLOW_UP, value=False) | |
| btn_direct_transcribe = gr.Button( | |
| UILabels.ANALYZE_BUTTON, | |
| variant="primary", | |
| size="lg" | |
| ) | |
| # Section résultats (identique à l'original) | |
| with gr.Column(elem_classes="results-section"): | |
| gr.Markdown(UILabels.RESULTS_TITLE) | |
| final_summary_output = gr.Markdown( | |
| value=UILabels.RESULTS_PLACEHOLDER, | |
| label=UILabels.RESULTS_LABEL, | |
| height=500 | |
| ) | |
| # Event handlers (adaptés pour HF Spaces) | |
| # Gestion du changement de mode d'entrée | |
| input_mode.change( | |
| fn=handle_input_mode_change, | |
| inputs=[input_mode], | |
| outputs=[audio_section, video_section] | |
| ) | |
| # Extraction audio depuis vidéo | |
| btn_extract_audio.click( | |
| fn=extract_audio_from_video, | |
| inputs=[video_input, gr.State("french")], | |
| outputs=[audio_input, audio_section, video_section, input_mode, gr.State("french")] | |
| ) | |
| # Fonctions de présélection des sections (identiques à l'original) | |
| def preset_action(): | |
| return (True, True, True, True, True, False, False, False, False) | |
| def preset_info(): | |
| return (True, False, False, False, False, True, True, True, True) | |
| def preset_complet(): | |
| return (True, True, True, True, True, True, True, True, True) | |
| # Gestion de l'analyse directe (adaptée pour Transformers uniquement) | |
| def handle_analysis_direct( | |
| audio_file, hf_token, language, local_model, start_trim, end_trim, | |
| s_resume, s_discussions, s_plan_action, s_decisions, s_prochaines_etapes, | |
| s_sujets_principaux, s_points_importants, s_questions, s_elements_suivi | |
| ): | |
| # Mode Transformers uniquement (pré-quantisé 8-bit) | |
| transcription_mode = f"Transformers ({local_model} 8-bit)" | |
| model_key = local_model | |
| # Construire la liste des sections sélectionnées | |
| sections_checkboxes = [ | |
| (s_resume, "resume_executif"), | |
| (s_discussions, "discussions_principales"), | |
| (s_plan_action, "plan_action"), | |
| (s_decisions, "decisions_prises"), | |
| (s_prochaines_etapes, "prochaines_etapes"), | |
| (s_sujets_principaux, "sujets_principaux"), | |
| (s_points_importants, "points_importants"), | |
| (s_questions, "questions_discussions"), | |
| (s_elements_suivi, "elements_suivi") | |
| ] | |
| selected_sections = [section_key for is_selected, section_key in sections_checkboxes if is_selected] | |
| # Appeler la fonction d'analyse directe (chunk duration automatique) | |
| _, summary = handle_direct_transcription( | |
| audio_file, hf_token, language, transcription_mode, | |
| model_key, selected_sections, start_trim, end_trim | |
| ) | |
| return summary | |
| # Événements de présélection (identiques à l'original) | |
| btn_preset_action.click( | |
| fn=preset_action, | |
| outputs=[ | |
| section_resume_executif, section_discussions, section_plan_action, | |
| section_decisions, section_prochaines_etapes, section_sujets_principaux, | |
| section_points_importants, section_questions, section_elements_suivi | |
| ] | |
| ) | |
| btn_preset_info.click( | |
| fn=preset_info, | |
| outputs=[ | |
| section_resume_executif, section_discussions, section_plan_action, | |
| section_decisions, section_prochaines_etapes, section_sujets_principaux, | |
| section_points_importants, section_questions, section_elements_suivi | |
| ] | |
| ) | |
| btn_preset_complet.click( | |
| fn=preset_complet, | |
| outputs=[ | |
| section_resume_executif, section_discussions, section_plan_action, | |
| section_decisions, section_prochaines_etapes, section_sujets_principaux, | |
| section_points_importants, section_questions, section_elements_suivi | |
| ] | |
| ) | |
| # Analyse principale (adaptée pour HF Spaces) | |
| btn_direct_transcribe.click( | |
| fn=handle_analysis_direct, | |
| inputs=[ | |
| audio_input, | |
| gr.State(value=hf_token), | |
| gr.State("french"), | |
| local_model_choice, | |
| start_trim_input, | |
| end_trim_input, | |
| section_resume_executif, | |
| section_discussions, | |
| section_plan_action, | |
| section_decisions, | |
| section_prochaines_etapes, | |
| section_sujets_principaux, | |
| section_points_importants, | |
| section_questions, | |
| section_elements_suivi | |
| ], | |
| outputs=[final_summary_output] | |
| ) | |
| # Footer (identique à l'original) | |
| with gr.Row(): | |
| gr.Markdown( | |
| """ | |
| --- | |
| **MeetingNotes** | Powered by [Voxtral](https://mistral.ai/) | | |
| 🚀 Intelligent meeting analysis | 💾 HF Spaces with Zero GPU | |
| """, | |
| elem_classes="footer-info" | |
| ) | |
| # Retourner demo (thème et CSS déjà configurés dans gr.Blocks pour Gradio 6) | |
| return demo |