File size: 7,126 Bytes
d6f13c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
"""
Meeting note parsers for extracting structured data from markdown files.
"""
from pathlib import Path
from typing import List, Optional
from datetime import datetime
from pydantic import BaseModel, Field
import re


class ActionItem(BaseModel):
    """Represents an action item from a meeting."""
    task: str
    assignee: Optional[str] = None
    deadline: Optional[str] = None
    completed: bool = False


class MeetingNote(BaseModel):
    """Represents a parsed meeting note."""
    project_name: str
    title: str
    date: Optional[datetime] = None
    participants: List[str] = Field(default_factory=list)
    discussion: Optional[str] = None
    decisions: List[str] = Field(default_factory=list)
    action_items: List[ActionItem] = Field(default_factory=list)
    blockers: List[str] = Field(default_factory=list)
    file_path: str


class MeetingParser:
    """Parser for markdown meeting notes."""

    @staticmethod
    def parse_date(date_str: str) -> Optional[datetime]:
        """Parse date from various formats."""
        date_formats = [
            "%Y-%m-%d",
            "%d/%m/%Y",
            "%m/%d/%Y",
            "%B %d, %Y",
            "%b %d, %Y",
            "%Y/%m/%d"
        ]

        for fmt in date_formats:
            try:
                return datetime.strptime(date_str.strip(), fmt)
            except ValueError:
                continue
        return None

    @staticmethod
    def parse_action_item(line: str) -> Optional[ActionItem]:
        """Parse an action item line."""
        # Match patterns like:
        # - [ ] Task
        # - [x] Task
        # - [ ] Alice: Task by Jan 20
        # - [x] Bob: Task (by 2025-01-20)

        completed = False
        if "[x]" in line.lower() or "[✓]" in line or "[✔]" in line:
            completed = True

        # Remove checkbox markers
        line = re.sub(r'\[[ xX✓✔]\]', '', line).strip()
        line = line.lstrip('- ').strip()

        if not line:
            return None

        # Try to extract assignee
        assignee = None
        assignee_match = re.match(r'^([A-Za-z\s]+):\s*(.+)$', line)
        if assignee_match:
            assignee = assignee_match.group(1).strip()
            line = assignee_match.group(2).strip()

        # Try to extract deadline
        deadline = None
        deadline_patterns = [
            r'by\s+([A-Za-z]+\s+\d{1,2}(?:,\s+\d{4})?)',
            r'by\s+(\d{4}-\d{2}-\d{2})',
            r'\(by\s+([^)]+)\)',
        ]

        for pattern in deadline_patterns:
            deadline_match = re.search(pattern, line, re.IGNORECASE)
            if deadline_match:
                deadline = deadline_match.group(1).strip()
                line = re.sub(pattern, '', line, flags=re.IGNORECASE).strip()
                break

        return ActionItem(
            task=line,
            assignee=assignee,
            deadline=deadline,
            completed=completed
        )

    @staticmethod
    def parse(file_path: Path, project_name: str) -> Optional[MeetingNote]:
        """Parse a markdown meeting note file."""
        if not file_path.exists():
            return None

        content = file_path.read_text(encoding='utf-8')
        lines = content.split('\n')

        # Initialize fields
        title = file_path.stem.replace('-', ' ').replace('_', ' ').title()
        date = None
        participants = []
        discussion = []
        decisions = []
        action_items = []
        blockers = []

        current_section = None

        for line in lines:
            line_stripped = line.strip()

            # Skip empty lines
            if not line_stripped:
                continue

            # Check for title
            if line_stripped.startswith('# '):
                title = line_stripped[2:].strip()
                # Try to extract from "Meeting: X" format
                if title.lower().startswith('meeting:'):
                    title = title[8:].strip()
                continue

            # Check for metadata
            if line_stripped.lower().startswith('date:'):
                date_str = line_stripped[5:].strip()
                date = MeetingParser.parse_date(date_str)
                continue

            if line_stripped.lower().startswith('participants:'):
                participants_str = line_stripped[13:].strip()
                participants = [p.strip() for p in participants_str.split(',')]
                continue

            # Check for sections
            if line_stripped.startswith('## '):
                section_name = line_stripped[3:].strip().lower()
                if 'discussion' in section_name or 'notes' in section_name:
                    current_section = 'discussion'
                elif 'decision' in section_name:
                    current_section = 'decisions'
                elif 'action' in section_name or 'todo' in section_name or 'task' in section_name:
                    current_section = 'action_items'
                elif 'blocker' in section_name or 'issue' in section_name:
                    current_section = 'blockers'
                else:
                    current_section = 'discussion'
                continue

            # Add content to current section
            if current_section == 'discussion':
                discussion.append(line_stripped)
            elif current_section == 'decisions':
                if line_stripped.startswith('-') or line_stripped.startswith('*'):
                    decisions.append(line_stripped.lstrip('-*').strip())
            elif current_section == 'action_items':
                if '[' in line_stripped:
                    action_item = MeetingParser.parse_action_item(line_stripped)
                    if action_item:
                        action_items.append(action_item)
            elif current_section == 'blockers':
                if line_stripped.startswith('-') or line_stripped.startswith('*'):
                    blockers.append(line_stripped.lstrip('-*').strip())

        return MeetingNote(
            project_name=project_name,
            title=title,
            date=date,
            participants=participants,
            discussion='\n'.join(discussion) if discussion else None,
            decisions=decisions,
            action_items=action_items,
            blockers=blockers,
            file_path=str(file_path)
        )


def load_meetings_from_directory(data_dir: Path) -> List[MeetingNote]:
    """Load all meeting notes from a directory structure."""
    meetings = []

    if not data_dir.exists():
        return meetings

    # Expected structure: data_dir/project_name/meetings/*.md
    for project_dir in data_dir.iterdir():
        if not project_dir.is_dir():
            continue

        project_name = project_dir.name
        meetings_dir = project_dir / "meetings"

        if not meetings_dir.exists():
            continue

        for meeting_file in meetings_dir.glob("*.md"):
            meeting = MeetingParser.parse(meeting_file, project_name)
            if meeting:
                meetings.append(meeting)

    return meetings