adi-123's picture
Upload 21 files
8c35759 verified
"""Schema policy for LLM-driven graph extraction."""
from __future__ import annotations
from typing import List
class SchemaPolicy:
"""Defines allowed node labels and relationship types for LLM graph extraction.
The LLMGraphTransformer benefits from explicit schema constraints. This schema
is intentionally broad to support diverse project report questions (stakeholders,
contracts, permitting, schedule, finance, risks, etc.).
"""
ALLOWED_NODES: List[str] = [
# Document structure
"Project", "Report", "Document", "Section", "Chunk", "Source", "Evidence",
# Organizations
"Organization", "Company", "Owner", "ParentCompany", "Client", "Customer",
"Partner", "JV", "Consortium", "Contractor", "Subcontractor", "Vendor", "Supplier",
"Consultant", "EngineeringFirm", "EPC", "EPCM", "Operator",
"GovernmentAgency", "Regulator", "Stakeholder",
# People
"Person", "Role", "Team", "Department",
# Geography
"Location", "Address", "City", "State", "Province", "Region", "Country", "County",
"Zone", "Port", "Site", "Plant",
# Finance
"Budget", "Cost", "Capex", "Opex", "Estimate", "Investment", "Funding",
"Currency", "TIV", "Revenue", "Tariff", "Price",
# Timeline
"Timeline", "Schedule", "Milestone", "Phase", "Stage", "Date", "Quarter", "Year",
"Duration", "StartDate", "EndDate",
# Technical
"Industry", "Sector", "Market", "Demand", "Product", "Output", "Capacity",
"Feedstock", "Fuel", "Technology", "Process", "Equipment", "Unit", "System", "Utility",
"Specification", "Standard",
# Contracts
"Contract", "Agreement", "Tender", "Bid", "RFQ", "Procurement", "Permit",
"WorkPackage", "Deliverable", "Requirement", "KPI", "Metric",
# Status
"Status", "StatusReason", "Decision", "Change", "Assumption", "Dependency",
"Risk", "Issue", "Challenge", "Constraint", "Delay", "Cancellation",
# ESG
"EnvironmentalAspect", "Emissions", "Wastewater", "Water", "Waste", "Safety",
"Regulation", "Compliance",
]
ALLOWED_RELATIONSHIPS: List[str] = [
# Document structure
"HAS_REPORT", "HAS_DOCUMENT", "HAS_SECTION", "HAS_CHUNK", "HAS_EVIDENCE",
"EVIDENCED_BY", "SUPPORTED_BY", "MENTIONS", "ABOUT",
# Lifecycle
"HAS_STATUS", "HAS_STATUS_REASON", "HAS_PHASE", "HAS_STAGE",
"HAS_TIMELINE", "HAS_SCHEDULE", "HAS_MILESTONE",
"STARTS_AT", "ENDS_AT", "UPDATED_ON", "RELEASED_ON", "COMPLETES_AT",
# Organizations
"OWNED_BY", "PARENT_OF", "HAS_PARENT", "MANAGED_BY", "OPERATED_BY",
"LED_BY", "RESPONSIBLE_FOR", "WORKS_FOR", "HAS_ROLE",
"PARTNERED_WITH", "CONTRACTED_BY", "DESIGNED_BY", "ENGINEERED_BY",
"CONSTRUCTED_BY", "PROCURED_BY", "SUPPLIED_BY", "REGULATED_BY",
# Geography
"LOCATED_IN", "HAS_ADDRESS", "IN_CITY", "IN_STATE", "IN_COUNTRY", "IN_REGION", "IN_ZONE",
# Finance
"HAS_BUDGET", "HAS_COST", "HAS_CAPEX", "HAS_OPEX", "HAS_TIV", "IN_CURRENCY",
"FUNDED_BY", "ALLOCATED_TO",
# Technical
"IN_INDUSTRY", "IN_SECTOR", "IN_MARKET",
"PRODUCES", "USES_FEEDSTOCK", "USES_FUEL", "USES_TECHNOLOGY", "USES_PROCESS",
"REQUIRES_EQUIPMENT", "HAS_UNIT", "HAS_SYSTEM", "HAS_UTILITY", "HAS_CAPACITY",
"MEETS_STANDARD",
# Governance
"REQUIRES_PERMIT", "HAS_REQUIREMENT", "HAS_DELIVERABLE",
"HAS_ENVIRONMENTAL_ASPECT", "HAS_SAFETY_REQUIREMENT",
# Risks
"HAS_RISK", "HAS_ISSUE", "HAS_CHALLENGE", "HAS_CONSTRAINT",
"CAUSED_BY", "RESULTED_IN", "AFFECTED_BY", "DELAYED_BY", "CANCELLED_DUE_TO",
]
@classmethod
def get_allowed_nodes(cls) -> List[str]:
return cls.ALLOWED_NODES.copy()
@classmethod
def get_allowed_relationships(cls) -> List[str]:
return cls.ALLOWED_RELATIONSHIPS.copy()