File size: 3,293 Bytes
3cf54df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import gradio as gr
import re
import datefinder

class AdvancedActionItemExtractor:
    def __init__(self):
        self.patterns = self._build_patterns()
        
    def _build_patterns(self):
        return [
            {'pattern': r'(\b[A-Z][a-z]+\b)\s+will\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
            {'pattern': r'(\b[A-Z][a-z]+\b):\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
            {'pattern': r'(\b[A-Z][a-z]+\b)\s+to\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
            {'pattern': r'(\b[A-Z][a-z]+\b)\s+should\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
            {'pattern': r'\bI\s+will\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['action', 'deadline'], 'assignee': 'Current Speaker'},
        ]
    
    def extract(self, text):
        if not text or not isinstance(text, str): return []
        cleaned_text = re.sub(r'\s+', ' ', text)
        action_items = []
        sentences = re.split(r'[.!?]+', cleaned_text)
        for sentence in sentences:
            sentence = sentence.strip()
            for pattern_config in self.patterns:
                matches = re.finditer(pattern_config['pattern'], sentence, re.IGNORECASE)
                for match in matches:
                    action_item = self._parse_match(match, pattern_config, sentence)
                    if action_item: action_items.append(action_item)
        return action_items
    
    def _parse_match(self, match, pattern_config, sentence):
        groups = match.groups()
        action_item = {'assignee': None, 'action': None, 'deadline': None}
        if 'assignee' in pattern_config: action_item['assignee'] = pattern_config['assignee']
        for i, group_name in enumerate(pattern_config['groups']):
            if i < len(groups):
                if group_name == 'assignee': action_item['assignee'] = groups[i].title()
                elif group_name == 'action': action_item['action'] = groups[i].strip()
                elif group_name == 'deadline': action_item['deadline'] = groups[i].strip()
        return action_item
    
    def format_output(self, action_items):
        if not action_items: return "ACTION ITEMS:\nNo action items found."
        output_lines = ["ACTION ITEMS:"]
        for i, item in enumerate(action_items, 1):
            deadline = item['deadline'] if item['deadline'] else 'TBD'
            output_lines.append(f"{i}. {item['assignee']}: {item['action']} by {deadline}")
        return "\n".join(output_lines)

def extract_action_items(text):
    extractor = AdvancedActionItemExtractor()
    action_items = extractor.extract(text)
    return extractor.format_output(action_items)

demo = gr.Interface(
    fn=extract_action_items,
    inputs=gr.Textbox(lines=5, placeholder="Paste meeting transcript here...\nExample: Mike will set up Flask project by Oct 5."),
    outputs=gr.Textbox(lines=10, label="Extracted Action Items"),
    title="Action Item & Deadline Extractor",
    description="Extract action items, assignees, and deadlines from meeting transcripts"
)

if __name__ == "__main__":
    demo.launch()