Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- app.py +66 -0
- extractor.pkl +3 -0
- requirements.txt +2 -0
app.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import re
|
| 3 |
+
import datefinder
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
class AdvancedActionItemExtractor:
|
| 7 |
+
def __init__(self):
|
| 8 |
+
self.patterns = self._build_patterns()
|
| 9 |
+
|
| 10 |
+
def _build_patterns(self):
|
| 11 |
+
return [
|
| 12 |
+
{'pattern': r'(\b[A-Z][a-z]+\b)\s+will\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
|
| 13 |
+
{'pattern': r'(\b[A-Z][a-z]+\b):\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
|
| 14 |
+
{'pattern': r'(\b[A-Z][a-z]+\b)\s+to\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
|
| 15 |
+
{'pattern': r'(\b[A-Z][a-z]+\b)\s+should\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
|
| 16 |
+
{'pattern': r'\bI\s+will\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['action', 'deadline'], 'assignee': 'Current Speaker'},
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
def extract(self, text):
|
| 20 |
+
if not text or not isinstance(text, str): return []
|
| 21 |
+
cleaned_text = re.sub(r'\s+', ' ', text)
|
| 22 |
+
action_items = []
|
| 23 |
+
sentences = re.split(r'[.!?]+', cleaned_text)
|
| 24 |
+
for sentence in sentences:
|
| 25 |
+
sentence = sentence.strip()
|
| 26 |
+
for pattern_config in self.patterns:
|
| 27 |
+
matches = re.finditer(pattern_config['pattern'], sentence, re.IGNORECASE)
|
| 28 |
+
for match in matches:
|
| 29 |
+
action_item = self._parse_match(match, pattern_config, sentence)
|
| 30 |
+
if action_item: action_items.append(action_item)
|
| 31 |
+
return action_items
|
| 32 |
+
|
| 33 |
+
def _parse_match(self, match, pattern_config, sentence):
|
| 34 |
+
groups = match.groups()
|
| 35 |
+
action_item = {'assignee': None, 'action': None, 'deadline': None}
|
| 36 |
+
if 'assignee' in pattern_config: action_item['assignee'] = pattern_config['assignee']
|
| 37 |
+
for i, group_name in enumerate(pattern_config['groups']):
|
| 38 |
+
if i < len(groups):
|
| 39 |
+
if group_name == 'assignee': action_item['assignee'] = groups[i].title()
|
| 40 |
+
elif group_name == 'action': action_item['action'] = groups[i].strip()
|
| 41 |
+
elif group_name == 'deadline': action_item['deadline'] = groups[i].strip()
|
| 42 |
+
return action_item
|
| 43 |
+
|
| 44 |
+
def format_output(self, action_items):
|
| 45 |
+
if not action_items: return "ACTION ITEMS:\nNo action items found."
|
| 46 |
+
output_lines = ["ACTION ITEMS:"]
|
| 47 |
+
for i, item in enumerate(action_items, 1):
|
| 48 |
+
deadline = item['deadline'] if item['deadline'] else 'TBD'
|
| 49 |
+
output_lines.append(f"{i}. {item['assignee']}: {item['action']} by {deadline}")
|
| 50 |
+
return "\n".join(output_lines)
|
| 51 |
+
|
| 52 |
+
def extract_action_items(text):
|
| 53 |
+
extractor = AdvancedActionItemExtractor()
|
| 54 |
+
action_items = extractor.extract(text)
|
| 55 |
+
return extractor.format_output(action_items)
|
| 56 |
+
|
| 57 |
+
iface = gr.Interface(
|
| 58 |
+
fn=extract_action_items,
|
| 59 |
+
inputs=gr.Textbox(lines=5, placeholder="Paste meeting transcript here...", label="Meeting Transcript"),
|
| 60 |
+
outputs=gr.Textbox(lines=10, label="Extracted Action Items"),
|
| 61 |
+
title="Action Item & Deadline Extractor",
|
| 62 |
+
examples=[["Mike will set up Flask project by Oct 5. Emily should create React prototype by Oct 7."]]
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
if __name__ == "__main__":
|
| 66 |
+
iface.launch()
|
extractor.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f68e65aec26e4920f47d42a63cab7478ccc0f4b9271b093c41407175e7a4d81d
|
| 3 |
+
size 575
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
regex>=2022.3.15
|
| 2 |
+
datefinder>=0.7.1
|