|
|
""" |
|
|
Meeting note parsers for extracting structured data from markdown files. |
|
|
""" |
|
|
from pathlib import Path |
|
|
from typing import List, Optional |
|
|
from datetime import datetime |
|
|
from pydantic import BaseModel, Field |
|
|
import re |
|
|
|
|
|
|
|
|
class ActionItem(BaseModel): |
|
|
"""Represents an action item from a meeting.""" |
|
|
task: str |
|
|
assignee: Optional[str] = None |
|
|
deadline: Optional[str] = None |
|
|
completed: bool = False |
|
|
|
|
|
|
|
|
class MeetingNote(BaseModel): |
|
|
"""Represents a parsed meeting note.""" |
|
|
project_name: str |
|
|
title: str |
|
|
date: Optional[datetime] = None |
|
|
participants: List[str] = Field(default_factory=list) |
|
|
discussion: Optional[str] = None |
|
|
decisions: List[str] = Field(default_factory=list) |
|
|
action_items: List[ActionItem] = Field(default_factory=list) |
|
|
blockers: List[str] = Field(default_factory=list) |
|
|
file_path: str |
|
|
|
|
|
|
|
|
class MeetingParser: |
|
|
"""Parser for markdown meeting notes.""" |
|
|
|
|
|
@staticmethod |
|
|
def parse_date(date_str: str) -> Optional[datetime]: |
|
|
"""Parse date from various formats.""" |
|
|
date_formats = [ |
|
|
"%Y-%m-%d", |
|
|
"%d/%m/%Y", |
|
|
"%m/%d/%Y", |
|
|
"%B %d, %Y", |
|
|
"%b %d, %Y", |
|
|
"%Y/%m/%d" |
|
|
] |
|
|
|
|
|
for fmt in date_formats: |
|
|
try: |
|
|
return datetime.strptime(date_str.strip(), fmt) |
|
|
except ValueError: |
|
|
continue |
|
|
return None |
|
|
|
|
|
@staticmethod |
|
|
def parse_action_item(line: str) -> Optional[ActionItem]: |
|
|
"""Parse an action item line.""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
completed = False |
|
|
if "[x]" in line.lower() or "[✓]" in line or "[✔]" in line: |
|
|
completed = True |
|
|
|
|
|
|
|
|
line = re.sub(r'\[[ xX✓✔]\]', '', line).strip() |
|
|
line = line.lstrip('- ').strip() |
|
|
|
|
|
if not line: |
|
|
return None |
|
|
|
|
|
|
|
|
assignee = None |
|
|
assignee_match = re.match(r'^([A-Za-z\s]+):\s*(.+)$', line) |
|
|
if assignee_match: |
|
|
assignee = assignee_match.group(1).strip() |
|
|
line = assignee_match.group(2).strip() |
|
|
|
|
|
|
|
|
deadline = None |
|
|
deadline_patterns = [ |
|
|
r'by\s+([A-Za-z]+\s+\d{1,2}(?:,\s+\d{4})?)', |
|
|
r'by\s+(\d{4}-\d{2}-\d{2})', |
|
|
r'\(by\s+([^)]+)\)', |
|
|
] |
|
|
|
|
|
for pattern in deadline_patterns: |
|
|
deadline_match = re.search(pattern, line, re.IGNORECASE) |
|
|
if deadline_match: |
|
|
deadline = deadline_match.group(1).strip() |
|
|
line = re.sub(pattern, '', line, flags=re.IGNORECASE).strip() |
|
|
break |
|
|
|
|
|
return ActionItem( |
|
|
task=line, |
|
|
assignee=assignee, |
|
|
deadline=deadline, |
|
|
completed=completed |
|
|
) |
|
|
|
|
|
@staticmethod |
|
|
def parse(file_path: Path, project_name: str) -> Optional[MeetingNote]: |
|
|
"""Parse a markdown meeting note file.""" |
|
|
if not file_path.exists(): |
|
|
return None |
|
|
|
|
|
content = file_path.read_text(encoding='utf-8') |
|
|
lines = content.split('\n') |
|
|
|
|
|
|
|
|
title = file_path.stem.replace('-', ' ').replace('_', ' ').title() |
|
|
date = None |
|
|
participants = [] |
|
|
discussion = [] |
|
|
decisions = [] |
|
|
action_items = [] |
|
|
blockers = [] |
|
|
|
|
|
current_section = None |
|
|
|
|
|
for line in lines: |
|
|
line_stripped = line.strip() |
|
|
|
|
|
|
|
|
if not line_stripped: |
|
|
continue |
|
|
|
|
|
|
|
|
if line_stripped.startswith('# '): |
|
|
title = line_stripped[2:].strip() |
|
|
|
|
|
if title.lower().startswith('meeting:'): |
|
|
title = title[8:].strip() |
|
|
continue |
|
|
|
|
|
|
|
|
if line_stripped.lower().startswith('date:'): |
|
|
date_str = line_stripped[5:].strip() |
|
|
date = MeetingParser.parse_date(date_str) |
|
|
continue |
|
|
|
|
|
if line_stripped.lower().startswith('participants:'): |
|
|
participants_str = line_stripped[13:].strip() |
|
|
participants = [p.strip() for p in participants_str.split(',')] |
|
|
continue |
|
|
|
|
|
|
|
|
if line_stripped.startswith('## '): |
|
|
section_name = line_stripped[3:].strip().lower() |
|
|
if 'discussion' in section_name or 'notes' in section_name: |
|
|
current_section = 'discussion' |
|
|
elif 'decision' in section_name: |
|
|
current_section = 'decisions' |
|
|
elif 'action' in section_name or 'todo' in section_name or 'task' in section_name: |
|
|
current_section = 'action_items' |
|
|
elif 'blocker' in section_name or 'issue' in section_name: |
|
|
current_section = 'blockers' |
|
|
else: |
|
|
current_section = 'discussion' |
|
|
continue |
|
|
|
|
|
|
|
|
if current_section == 'discussion': |
|
|
discussion.append(line_stripped) |
|
|
elif current_section == 'decisions': |
|
|
if line_stripped.startswith('-') or line_stripped.startswith('*'): |
|
|
decisions.append(line_stripped.lstrip('-*').strip()) |
|
|
elif current_section == 'action_items': |
|
|
if '[' in line_stripped: |
|
|
action_item = MeetingParser.parse_action_item(line_stripped) |
|
|
if action_item: |
|
|
action_items.append(action_item) |
|
|
elif current_section == 'blockers': |
|
|
if line_stripped.startswith('-') or line_stripped.startswith('*'): |
|
|
blockers.append(line_stripped.lstrip('-*').strip()) |
|
|
|
|
|
return MeetingNote( |
|
|
project_name=project_name, |
|
|
title=title, |
|
|
date=date, |
|
|
participants=participants, |
|
|
discussion='\n'.join(discussion) if discussion else None, |
|
|
decisions=decisions, |
|
|
action_items=action_items, |
|
|
blockers=blockers, |
|
|
file_path=str(file_path) |
|
|
) |
|
|
|
|
|
|
|
|
def load_meetings_from_directory(data_dir: Path) -> List[MeetingNote]: |
|
|
"""Load all meeting notes from a directory structure.""" |
|
|
meetings = [] |
|
|
|
|
|
if not data_dir.exists(): |
|
|
return meetings |
|
|
|
|
|
|
|
|
for project_dir in data_dir.iterdir(): |
|
|
if not project_dir.is_dir(): |
|
|
continue |
|
|
|
|
|
project_name = project_dir.name |
|
|
meetings_dir = project_dir / "meetings" |
|
|
|
|
|
if not meetings_dir.exists(): |
|
|
continue |
|
|
|
|
|
for meeting_file in meetings_dir.glob("*.md"): |
|
|
meeting = MeetingParser.parse(meeting_file, project_name) |
|
|
if meeting: |
|
|
meetings.append(meeting) |
|
|
|
|
|
return meetings |
|
|
|