burnmydays's picture
Initial commit: Commitment Conservation Framework
2a64ad4
from spacy import load
import re
def load_spacy_model(model_name='en_core_web_sm'):
nlp = load(model_name)
return nlp
def normalize_text(text):
"""Normalize text for comparison: lowercase, strip punctuation."""
return re.sub(r'[^\w\s]', '', text.lower().strip())
def extract_hard_commitments(text, nlp=None):
"""Extract commitments using expanded modal keyword detection."""
if nlp is None:
nlp = load_spacy_model()
doc = nlp(text)
commitments = set()
# Expanded modal keywords
hard_modals = {'must', 'shall', 'will', 'have', 'need', 'required', 'ought', 'cannot', 'should'}
soft_modals = {'might', 'could', 'may', 'perhaps', 'maybe', 'tend'}
# Extract by sentence-level modal presence
for sent in doc.sents:
sent_lower = sent.text.lower()
# Check for hard modals
if any(modal in sent_lower for modal in hard_modals):
commitments.add(sent.text.strip())
# Check for soft modals
elif any(modal in sent_lower for modal in soft_modals):
commitments.add(sent.text.strip())
return commitments
def extract_from_texts(texts, model_name='en_core_web_sm'):
nlp = load_spacy_model(model_name)
all_commitments = {}
for text in texts:
commitments = extract_hard_commitments(text, nlp)
all_commitments[text] = commitments
return all_commitments
def extract_hard(text: str, nlp=None) -> set:
"""Shorthand for extract_hard_commitments."""
return extract_hard_commitments(text, nlp)