""" Meeting note parsers for extracting structured data from markdown files. """ from pathlib import Path from typing import List, Optional from datetime import datetime from pydantic import BaseModel, Field import re class ActionItem(BaseModel): """Represents an action item from a meeting.""" task: str assignee: Optional[str] = None deadline: Optional[str] = None completed: bool = False class MeetingNote(BaseModel): """Represents a parsed meeting note.""" project_name: str title: str date: Optional[datetime] = None participants: List[str] = Field(default_factory=list) discussion: Optional[str] = None decisions: List[str] = Field(default_factory=list) action_items: List[ActionItem] = Field(default_factory=list) blockers: List[str] = Field(default_factory=list) file_path: str class MeetingParser: """Parser for markdown meeting notes.""" @staticmethod def parse_date(date_str: str) -> Optional[datetime]: """Parse date from various formats.""" date_formats = [ "%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%B %d, %Y", "%b %d, %Y", "%Y/%m/%d" ] for fmt in date_formats: try: return datetime.strptime(date_str.strip(), fmt) except ValueError: continue return None @staticmethod def parse_action_item(line: str) -> Optional[ActionItem]: """Parse an action item line.""" # Match patterns like: # - [ ] Task # - [x] Task # - [ ] Alice: Task by Jan 20 # - [x] Bob: Task (by 2025-01-20) completed = False if "[x]" in line.lower() or "[✓]" in line or "[✔]" in line: completed = True # Remove checkbox markers line = re.sub(r'\[[ xX✓✔]\]', '', line).strip() line = line.lstrip('- ').strip() if not line: return None # Try to extract assignee assignee = None assignee_match = re.match(r'^([A-Za-z\s]+):\s*(.+)$', line) if assignee_match: assignee = assignee_match.group(1).strip() line = assignee_match.group(2).strip() # Try to extract deadline deadline = None deadline_patterns = [ r'by\s+([A-Za-z]+\s+\d{1,2}(?:,\s+\d{4})?)', r'by\s+(\d{4}-\d{2}-\d{2})', r'\(by\s+([^)]+)\)', ] for pattern in deadline_patterns: deadline_match = re.search(pattern, line, re.IGNORECASE) if deadline_match: deadline = deadline_match.group(1).strip() line = re.sub(pattern, '', line, flags=re.IGNORECASE).strip() break return ActionItem( task=line, assignee=assignee, deadline=deadline, completed=completed ) @staticmethod def parse(file_path: Path, project_name: str) -> Optional[MeetingNote]: """Parse a markdown meeting note file.""" if not file_path.exists(): return None content = file_path.read_text(encoding='utf-8') lines = content.split('\n') # Initialize fields title = file_path.stem.replace('-', ' ').replace('_', ' ').title() date = None participants = [] discussion = [] decisions = [] action_items = [] blockers = [] current_section = None for line in lines: line_stripped = line.strip() # Skip empty lines if not line_stripped: continue # Check for title if line_stripped.startswith('# '): title = line_stripped[2:].strip() # Try to extract from "Meeting: X" format if title.lower().startswith('meeting:'): title = title[8:].strip() continue # Check for metadata if line_stripped.lower().startswith('date:'): date_str = line_stripped[5:].strip() date = MeetingParser.parse_date(date_str) continue if line_stripped.lower().startswith('participants:'): participants_str = line_stripped[13:].strip() participants = [p.strip() for p in participants_str.split(',')] continue # Check for sections if line_stripped.startswith('## '): section_name = line_stripped[3:].strip().lower() if 'discussion' in section_name or 'notes' in section_name: current_section = 'discussion' elif 'decision' in section_name: current_section = 'decisions' elif 'action' in section_name or 'todo' in section_name or 'task' in section_name: current_section = 'action_items' elif 'blocker' in section_name or 'issue' in section_name: current_section = 'blockers' else: current_section = 'discussion' continue # Add content to current section if current_section == 'discussion': discussion.append(line_stripped) elif current_section == 'decisions': if line_stripped.startswith('-') or line_stripped.startswith('*'): decisions.append(line_stripped.lstrip('-*').strip()) elif current_section == 'action_items': if '[' in line_stripped: action_item = MeetingParser.parse_action_item(line_stripped) if action_item: action_items.append(action_item) elif current_section == 'blockers': if line_stripped.startswith('-') or line_stripped.startswith('*'): blockers.append(line_stripped.lstrip('-*').strip()) return MeetingNote( project_name=project_name, title=title, date=date, participants=participants, discussion='\n'.join(discussion) if discussion else None, decisions=decisions, action_items=action_items, blockers=blockers, file_path=str(file_path) ) def load_meetings_from_directory(data_dir: Path) -> List[MeetingNote]: """Load all meeting notes from a directory structure.""" meetings = [] if not data_dir.exists(): return meetings # Expected structure: data_dir/project_name/meetings/*.md for project_dir in data_dir.iterdir(): if not project_dir.is_dir(): continue project_name = project_dir.name meetings_dir = project_dir / "meetings" if not meetings_dir.exists(): continue for meeting_file in meetings_dir.glob("*.md"): meeting = MeetingParser.parse(meeting_file, project_name) if meeting: meetings.append(meeting) return meetings