import os, json, re, datetime from typing import Dict, List, Optional from transformers import pipeline from faster_whisper import WhisperModel # --------- Models (lazy init) --------- _SUMMARIZER = None _QA = None _WHISPER = None def get_summarizer(): global _SUMMARIZER if _SUMMARIZER is None: _SUMMARIZER = pipeline("summarization", model="facebook/bart-large-cnn") return _SUMMARIZER def get_extractor(): """Use Flan-T5 for JSON-style action/decision extraction via text2text pipeline.""" global _QA if _QA is None: _QA = pipeline("text2text-generation", model="google/flan-t5-large", max_new_tokens=256) return _QA def get_whisper(device: str = "auto"): global _WHISPER if _WHISPER is None: _WHISPER = WhisperModel("Systran/faster-whisper-small", device=device, compute_type="int8") return _WHISPER # --------- Core functions --------- def transcribe_audio(audio_path: str) -> str: model = get_whisper() segments, info = model.transcribe(audio_path, beam_size=1) text = " ".join(seg.text.strip() for seg in segments) return text.strip() def summarize(text: str) -> str: summarizer = get_summarizer() chunks = _chunk(text, 2200) partials = [summarizer(ch, do_sample=False)[0]["summary_text"] for ch in chunks] merged = " ".join(partials) final = summarizer(merged, do_sample=False, max_length=200, min_length=60)[0]["summary_text"] return final def extract_actions_decisions(text: str) -> Dict[str, List[str]]: prompt = f"""Tu es un assistant de prise de notes de réunion. À partir du transcript ci-dessous, extrais : 1) une liste concise de "Points d'action" (qui fait quoi, verbe à l'infinitif, deadline si mentionnée) 2) une liste "Décisions prises" (phrases courtes) Retourne du JSON strict de la forme : {{"actions": ["...","..."], "decisions": ["...","..."]}} Transcript: {text[:7000]} """ gen = get_extractor() out = gen(prompt)[0]["generated_text"] try: data = json.loads(out) actions = [s.strip() for s in data.get("actions", []) if s.strip()] decisions = [s.strip() for s in data.get("decisions", []) if s.strip()] return {"actions": actions, "decisions": decisions} except Exception: actions = [] decisions = [] for line in text.splitlines(): if re.search(r"(?i)\b(action|à faire|todo|faire):", line): actions.append(re.sub(r"(?i)^.*?:\s*", "", line).strip()) if re.search(r"(?i)\b(décision|decision):", line): decisions.append(re.sub(r"(?i)^.*?:\s*", "", line).strip()) return {"actions": actions, "decisions": decisions} def make_minutes_md(title: str, summary: str, actions: List[str], decisions: List[str]) -> str: now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M") lines = [ f"# {title} — Compte-rendu", f"_Généré le {now}_", "", "## Résumé", summary.strip() if summary else "—", "", "## Points d'action", *[f"- [ ] {a}" for a in (actions or ["—"])], "", "## Décisions prises", *[f"- {d}" for d in (decisions or ["—"])], "", ] return "\n".join(lines) def _chunk(text: str, max_chars: int) -> List[str]: parts, buf, size = [], [], 0 import re as _re for sent in _re.split(r'(?<=[\.!\?])\s+', text): if size + len(sent) > max_chars and buf: parts.append(" ".join(buf)); buf, size = [], 0 buf.append(sent); size += len(sent) + 1 if buf: parts.append(" ".join(buf)) return parts