Spaces:

Maral
/

epcc-contract-intelligence

Sleeping

epcc-contract-intelligence / src /config.py

maralzar

Initial commit: EPCC clause classifier Streamlit demo for HF Spaces

5212b8e 12 days ago

2.77 kB

	"""Central configuration: paths, seeds, model names, severity mapping."""
	from __future__ import annotations

	from pathlib import Path

	ROOT = Path(__file__).resolve().parents[1]
	DATA_DIR = ROOT / "data"
	MODELS_DIR = ROOT / "models"
	OUTPUTS_DIR = ROOT / "outputs"
	NOTEBOOKS_DIR = ROOT / "notebooks"

	for _p in (DATA_DIR, MODELS_DIR, OUTPUTS_DIR):
	_p.mkdir(exist_ok=True, parents=True)

	CUAD_HF_ID = "dvgodoy/CUAD_v1_Contract_Understanding_clause_classification"
	CUAD_PARQUET = DATA_DIR / "cuad.parquet"

	EMBED_MODEL_NAME = "BAAI/bge-small-en-v1.5"
	EMBED_DIM = 384

	SEED = 42
	TEST_SIZE = 0.20

	# EPCC severity tiers over CUAD's 41 clause labels, viewed from an EPC
	# contractor's perspective. "Critical" labels are the ones where a false
	# negative creates direct commercial exposure (uncapped liability, missing
	# LD/insurance terms, termination economics). "Moderate" affect rights and
	# downstream operations. "Low" are bookkeeping fields useful for indexing.
	SEVERITY_TIERS: dict[str, list[str]] = {
	"critical": [
	"Uncapped Liability",
	"Cap On Liability",
	"Liquidated Damages",
	"Termination For Convenience",
	"Insurance",
	"Warranty Duration",
	"Covenant Not To Sue",
	"Source Code Escrow",
	],
	"moderate": [
	"Change Of Control",
	"Anti-Assignment",
	"Audit Rights",
	"Governing Law",
	"Non-Compete",
	"Exclusivity",
	"No-Solicit Of Customers",
	"No-Solicit Of Employees",
	"Non-Disparagement",
	"Rofr/Rofo/Rofn",
	"Revenue/Profit Sharing",
	"Price Restrictions",
	"Minimum Commitment",
	"Volume Restriction",
	"Ip Ownership Assignment",
	"Joint Ip Ownership",
	"License Grant",
	"Non-Transferable License",
	"Affiliate License-Licensor",
	"Affiliate License-Licensee",
	"Unlimited/All-You-Can-Eat-License",
	"Irrevocable Or Perpetual License",
	"Post-Termination Services",
	"Third Party Beneficiary",
	"Competitive Restriction Exception",
	"Notice Period To Terminate Renewal",
	"Renewal Term",
	],
	"low": [
	"Document Name",
	"Parties",
	"Agreement Date",
	"Effective Date",
	"Expiration Date",
	"Most Favored Nation",
	],
	}


	def label_to_severity() -> dict[str, str]:
	"""Flat label -> severity tier mapping."""
	out: dict[str, str] = {}
	for tier, labels in SEVERITY_TIERS.items():
	for lab in labels:
	out[lab] = tier
	return out


	# Confidence threshold below which the classifier abstains (routes to human
	# review). Chosen empirically from the calibration sweep and overridable.
	DEFAULT_ABSTAIN_THRESHOLD = 0.45