burnmydays's picture
Remove spacy dependency completely - use regex sentence splitting
ed31594
import re
def normalize_text(text):
"""Normalize text for comparison: lowercase, strip punctuation."""
return re.sub(r'[^\w\s]', '', text.lower().strip())
def simple_sent_split(text):
"""Simple sentence splitter using regex"""
sentences = re.split(r'[.!?]+\s+|[.!?]+$', text)
return [s.strip() for s in sentences if s.strip()]
def extract_hard_commitments(text, nlp=None):
"""Extract commitments using expanded modal keyword detection."""
commitments = set()
hard_modals = {'must', 'shall', 'will', 'have', 'need', 'required', 'ought', 'cannot', 'should'}
sentences = simple_sent_split(text)
for sent in sentences:
sent_lower = sent.lower()
if any(modal in sent_lower for modal in hard_modals):
commitments.add(sent.strip())
return commitments