Spaces:
Sleeping
Sleeping
File size: 7,126 Bytes
d6f13c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 |
"""
Meeting note parsers for extracting structured data from markdown files.
"""
from pathlib import Path
from typing import List, Optional
from datetime import datetime
from pydantic import BaseModel, Field
import re
class ActionItem(BaseModel):
"""Represents an action item from a meeting."""
task: str
assignee: Optional[str] = None
deadline: Optional[str] = None
completed: bool = False
class MeetingNote(BaseModel):
"""Represents a parsed meeting note."""
project_name: str
title: str
date: Optional[datetime] = None
participants: List[str] = Field(default_factory=list)
discussion: Optional[str] = None
decisions: List[str] = Field(default_factory=list)
action_items: List[ActionItem] = Field(default_factory=list)
blockers: List[str] = Field(default_factory=list)
file_path: str
class MeetingParser:
"""Parser for markdown meeting notes."""
@staticmethod
def parse_date(date_str: str) -> Optional[datetime]:
"""Parse date from various formats."""
date_formats = [
"%Y-%m-%d",
"%d/%m/%Y",
"%m/%d/%Y",
"%B %d, %Y",
"%b %d, %Y",
"%Y/%m/%d"
]
for fmt in date_formats:
try:
return datetime.strptime(date_str.strip(), fmt)
except ValueError:
continue
return None
@staticmethod
def parse_action_item(line: str) -> Optional[ActionItem]:
"""Parse an action item line."""
# Match patterns like:
# - [ ] Task
# - [x] Task
# - [ ] Alice: Task by Jan 20
# - [x] Bob: Task (by 2025-01-20)
completed = False
if "[x]" in line.lower() or "[✓]" in line or "[✔]" in line:
completed = True
# Remove checkbox markers
line = re.sub(r'\[[ xX✓✔]\]', '', line).strip()
line = line.lstrip('- ').strip()
if not line:
return None
# Try to extract assignee
assignee = None
assignee_match = re.match(r'^([A-Za-z\s]+):\s*(.+)$', line)
if assignee_match:
assignee = assignee_match.group(1).strip()
line = assignee_match.group(2).strip()
# Try to extract deadline
deadline = None
deadline_patterns = [
r'by\s+([A-Za-z]+\s+\d{1,2}(?:,\s+\d{4})?)',
r'by\s+(\d{4}-\d{2}-\d{2})',
r'\(by\s+([^)]+)\)',
]
for pattern in deadline_patterns:
deadline_match = re.search(pattern, line, re.IGNORECASE)
if deadline_match:
deadline = deadline_match.group(1).strip()
line = re.sub(pattern, '', line, flags=re.IGNORECASE).strip()
break
return ActionItem(
task=line,
assignee=assignee,
deadline=deadline,
completed=completed
)
@staticmethod
def parse(file_path: Path, project_name: str) -> Optional[MeetingNote]:
"""Parse a markdown meeting note file."""
if not file_path.exists():
return None
content = file_path.read_text(encoding='utf-8')
lines = content.split('\n')
# Initialize fields
title = file_path.stem.replace('-', ' ').replace('_', ' ').title()
date = None
participants = []
discussion = []
decisions = []
action_items = []
blockers = []
current_section = None
for line in lines:
line_stripped = line.strip()
# Skip empty lines
if not line_stripped:
continue
# Check for title
if line_stripped.startswith('# '):
title = line_stripped[2:].strip()
# Try to extract from "Meeting: X" format
if title.lower().startswith('meeting:'):
title = title[8:].strip()
continue
# Check for metadata
if line_stripped.lower().startswith('date:'):
date_str = line_stripped[5:].strip()
date = MeetingParser.parse_date(date_str)
continue
if line_stripped.lower().startswith('participants:'):
participants_str = line_stripped[13:].strip()
participants = [p.strip() for p in participants_str.split(',')]
continue
# Check for sections
if line_stripped.startswith('## '):
section_name = line_stripped[3:].strip().lower()
if 'discussion' in section_name or 'notes' in section_name:
current_section = 'discussion'
elif 'decision' in section_name:
current_section = 'decisions'
elif 'action' in section_name or 'todo' in section_name or 'task' in section_name:
current_section = 'action_items'
elif 'blocker' in section_name or 'issue' in section_name:
current_section = 'blockers'
else:
current_section = 'discussion'
continue
# Add content to current section
if current_section == 'discussion':
discussion.append(line_stripped)
elif current_section == 'decisions':
if line_stripped.startswith('-') or line_stripped.startswith('*'):
decisions.append(line_stripped.lstrip('-*').strip())
elif current_section == 'action_items':
if '[' in line_stripped:
action_item = MeetingParser.parse_action_item(line_stripped)
if action_item:
action_items.append(action_item)
elif current_section == 'blockers':
if line_stripped.startswith('-') or line_stripped.startswith('*'):
blockers.append(line_stripped.lstrip('-*').strip())
return MeetingNote(
project_name=project_name,
title=title,
date=date,
participants=participants,
discussion='\n'.join(discussion) if discussion else None,
decisions=decisions,
action_items=action_items,
blockers=blockers,
file_path=str(file_path)
)
def load_meetings_from_directory(data_dir: Path) -> List[MeetingNote]:
"""Load all meeting notes from a directory structure."""
meetings = []
if not data_dir.exists():
return meetings
# Expected structure: data_dir/project_name/meetings/*.md
for project_dir in data_dir.iterdir():
if not project_dir.is_dir():
continue
project_name = project_dir.name
meetings_dir = project_dir / "meetings"
if not meetings_dir.exists():
continue
for meeting_file in meetings_dir.glob("*.md"):
meeting = MeetingParser.parse(meeting_file, project_name)
if meeting:
meetings.append(meeting)
return meetings
|