Saswat84 commited on
Commit
ab1e1c2
·
verified ·
1 Parent(s): 614823e

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. app.py +66 -0
  2. extractor.pkl +3 -0
  3. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import re
3
+ import datefinder
4
+ from datetime import datetime
5
+
6
+ class AdvancedActionItemExtractor:
7
+ def __init__(self):
8
+ self.patterns = self._build_patterns()
9
+
10
+ def _build_patterns(self):
11
+ return [
12
+ {'pattern': r'(\b[A-Z][a-z]+\b)\s+will\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
13
+ {'pattern': r'(\b[A-Z][a-z]+\b):\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
14
+ {'pattern': r'(\b[A-Z][a-z]+\b)\s+to\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
15
+ {'pattern': r'(\b[A-Z][a-z]+\b)\s+should\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
16
+ {'pattern': r'\bI\s+will\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['action', 'deadline'], 'assignee': 'Current Speaker'},
17
+ ]
18
+
19
+ def extract(self, text):
20
+ if not text or not isinstance(text, str): return []
21
+ cleaned_text = re.sub(r'\s+', ' ', text)
22
+ action_items = []
23
+ sentences = re.split(r'[.!?]+', cleaned_text)
24
+ for sentence in sentences:
25
+ sentence = sentence.strip()
26
+ for pattern_config in self.patterns:
27
+ matches = re.finditer(pattern_config['pattern'], sentence, re.IGNORECASE)
28
+ for match in matches:
29
+ action_item = self._parse_match(match, pattern_config, sentence)
30
+ if action_item: action_items.append(action_item)
31
+ return action_items
32
+
33
+ def _parse_match(self, match, pattern_config, sentence):
34
+ groups = match.groups()
35
+ action_item = {'assignee': None, 'action': None, 'deadline': None}
36
+ if 'assignee' in pattern_config: action_item['assignee'] = pattern_config['assignee']
37
+ for i, group_name in enumerate(pattern_config['groups']):
38
+ if i < len(groups):
39
+ if group_name == 'assignee': action_item['assignee'] = groups[i].title()
40
+ elif group_name == 'action': action_item['action'] = groups[i].strip()
41
+ elif group_name == 'deadline': action_item['deadline'] = groups[i].strip()
42
+ return action_item
43
+
44
+ def format_output(self, action_items):
45
+ if not action_items: return "ACTION ITEMS:\nNo action items found."
46
+ output_lines = ["ACTION ITEMS:"]
47
+ for i, item in enumerate(action_items, 1):
48
+ deadline = item['deadline'] if item['deadline'] else 'TBD'
49
+ output_lines.append(f"{i}. {item['assignee']}: {item['action']} by {deadline}")
50
+ return "\n".join(output_lines)
51
+
52
+ def extract_action_items(text):
53
+ extractor = AdvancedActionItemExtractor()
54
+ action_items = extractor.extract(text)
55
+ return extractor.format_output(action_items)
56
+
57
+ iface = gr.Interface(
58
+ fn=extract_action_items,
59
+ inputs=gr.Textbox(lines=5, placeholder="Paste meeting transcript here...", label="Meeting Transcript"),
60
+ outputs=gr.Textbox(lines=10, label="Extracted Action Items"),
61
+ title="Action Item & Deadline Extractor",
62
+ examples=[["Mike will set up Flask project by Oct 5. Emily should create React prototype by Oct 7."]]
63
+ )
64
+
65
+ if __name__ == "__main__":
66
+ iface.launch()
extractor.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f68e65aec26e4920f47d42a63cab7478ccc0f4b9271b093c41407175e7a4d81d
3
+ size 575
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ regex>=2022.3.15
2
+ datefinder>=0.7.1