Spaces:

solfedge
/

Clause_Lense

Sleeping

App Files Files Community

Clause_Lense / spacy_matcher.py

solfedge

Upload 4 files

1f8cd6e verified 7 months ago

raw

history blame contribute delete

2.59 kB


	import spacy
	from spacy.matcher import Matcher


	nlp = spacy.load("en_core_web_sm")
	matcher = Matcher(nlp.vocab)


	clause_patterns = {
	"CONFIDENTIALITY": [
	[{"LOWER": "confidentiality"}],
	[{"LOWER": "non-disclosure"}],
	[{"LOWER": "nda"}],
	[{"LOWER": "proprietary"}, {"LOWER": "information"}],
	],
	"TERMINATION": [
	[{"LOWER": "termination"}],
	[{"LOWER": "end"}, {"LOWER": "of"}, {"LOWER": "agreement"}],
	[{"LOWER": "terminate"}, {"LOWER": "this"}, {"LOWER": "agreement"}],
	],
	"NON_COMPETE": [
	[{"LOWER": "non-compete"}],
	[{"LOWER": "non"}, {"LOWER": "compete"}],
	[{"LOWER": "competition"}, {"LOWER": "restriction"}],
	],
	"GOVERNING_LAW": [
	[{"LOWER": "governing"}, {"LOWER": "law"}],
	[{"LOWER": "jurisdiction"}],
	[{"LOWER": "choice"}, {"LOWER": "of"}, {"LOWER": "law"}],
	],
	"SEVERABILITY": [
	[{"LOWER": "severability"}],
	[{"LOWER": "invalidity"}, {"OP": "?"}, {"LOWER": "provision"}],
	[{"LOWER": "severable"}],
	],
	"LIABILITY": [
	[{"LOWER": "liability"}],
	[{"LOWER": "limitation"}, {"LOWER": "of"}, {"LOWER": "liability"}],
	[{"LOWER": "indemnification"}],
	[{"LOWER": "cap"}, {"LOWER": "on"}, {"LOWER": "damages"}],
	],
	"FORCE_MAJEURE": [
	[{"LOWER": "force"}, {"LOWER": "majeure"}],
	[{"LOWER": "acts"}, {"LOWER": "of"}, {"LOWER": "god"}],
	[{"LOWER": "unforeseen"}, {"LOWER": "events"}],
	[{"LOWER": "pandemic"}],
	],
	"PAYMENT_TERMS": [
	[{"LOWER": "payment"}, {"LOWER": "terms"}],
	[{"LOWER": "due"}, {"LOWER": "within"}, {"IS_DIGIT": True}, {"LOWER": {"IN": ["days", "weeks"]}}],
	[{"LOWER": "invoice"}, {"LOWER": "shall"}, {"LOWER": "be"}],
	[{"LOWER": "net"}, {"IS_DIGIT": True}],
	]
	}

	# Adding all patterns to matcher
	for label, patterns in clause_patterns.items():
	for pattern in patterns:
	matcher.add(label, [pattern])

	print("Clause matcher loaded with extended patterns.")

	def find_clauses(text, window_size=30):
	"""Find clauses in text and return context."""
	doc = nlp(text)
	matches = matcher(doc)
	results = []
	for match_id, start, end in matches:
	span = doc[start:end]
	label = nlp.vocab.strings[match_id]
	# Get context window
	ctx_start = max(0, start - window_size)
	ctx_end = min(len(doc), end + window_size)
	context = doc[ctx_start:ctx_end].text
	results.append((label, context, start, end))
	return results