File size: 819 Bytes
2a64ad4
 
 
 
 
 
ed31594
 
 
 
 
2a64ad4
 
 
 
ed31594
 
 
2a64ad4
ed31594
2a64ad4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import re

def normalize_text(text):
    """Normalize text for comparison: lowercase, strip punctuation."""
    return re.sub(r'[^\w\s]', '', text.lower().strip())

def simple_sent_split(text):
    """Simple sentence splitter using regex"""
    sentences = re.split(r'[.!?]+\s+|[.!?]+$', text)
    return [s.strip() for s in sentences if s.strip()]

def extract_hard_commitments(text, nlp=None):
    """Extract commitments using expanded modal keyword detection."""
    commitments = set()
    hard_modals = {'must', 'shall', 'will', 'have', 'need', 'required', 'ought', 'cannot', 'should'}
    sentences = simple_sent_split(text)
    for sent in sentences:
        sent_lower = sent.lower()
        if any(modal in sent_lower for modal in hard_modals):
            commitments.add(sent.strip())
    return commitments