File size: 3,997 Bytes
8c35759
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""Schema policy for LLM-driven graph extraction."""

from __future__ import annotations

from typing import List


class SchemaPolicy:
    """Defines allowed node labels and relationship types for LLM graph extraction.

    The LLMGraphTransformer benefits from explicit schema constraints. This schema
    is intentionally broad to support diverse project report questions (stakeholders,
    contracts, permitting, schedule, finance, risks, etc.).
    """

    ALLOWED_NODES: List[str] = [
        # Document structure
        "Project", "Report", "Document", "Section", "Chunk", "Source", "Evidence",
        # Organizations
        "Organization", "Company", "Owner", "ParentCompany", "Client", "Customer",
        "Partner", "JV", "Consortium", "Contractor", "Subcontractor", "Vendor", "Supplier",
        "Consultant", "EngineeringFirm", "EPC", "EPCM", "Operator",
        "GovernmentAgency", "Regulator", "Stakeholder",
        # People
        "Person", "Role", "Team", "Department",
        # Geography
        "Location", "Address", "City", "State", "Province", "Region", "Country", "County",
        "Zone", "Port", "Site", "Plant",
        # Finance
        "Budget", "Cost", "Capex", "Opex", "Estimate", "Investment", "Funding",
        "Currency", "TIV", "Revenue", "Tariff", "Price",
        # Timeline
        "Timeline", "Schedule", "Milestone", "Phase", "Stage", "Date", "Quarter", "Year",
        "Duration", "StartDate", "EndDate",
        # Technical
        "Industry", "Sector", "Market", "Demand", "Product", "Output", "Capacity",
        "Feedstock", "Fuel", "Technology", "Process", "Equipment", "Unit", "System", "Utility",
        "Specification", "Standard",
        # Contracts
        "Contract", "Agreement", "Tender", "Bid", "RFQ", "Procurement", "Permit",
        "WorkPackage", "Deliverable", "Requirement", "KPI", "Metric",
        # Status
        "Status", "StatusReason", "Decision", "Change", "Assumption", "Dependency",
        "Risk", "Issue", "Challenge", "Constraint", "Delay", "Cancellation",
        # ESG
        "EnvironmentalAspect", "Emissions", "Wastewater", "Water", "Waste", "Safety",
        "Regulation", "Compliance",
    ]

    ALLOWED_RELATIONSHIPS: List[str] = [
        # Document structure
        "HAS_REPORT", "HAS_DOCUMENT", "HAS_SECTION", "HAS_CHUNK", "HAS_EVIDENCE",
        "EVIDENCED_BY", "SUPPORTED_BY", "MENTIONS", "ABOUT",
        # Lifecycle
        "HAS_STATUS", "HAS_STATUS_REASON", "HAS_PHASE", "HAS_STAGE",
        "HAS_TIMELINE", "HAS_SCHEDULE", "HAS_MILESTONE",
        "STARTS_AT", "ENDS_AT", "UPDATED_ON", "RELEASED_ON", "COMPLETES_AT",
        # Organizations
        "OWNED_BY", "PARENT_OF", "HAS_PARENT", "MANAGED_BY", "OPERATED_BY",
        "LED_BY", "RESPONSIBLE_FOR", "WORKS_FOR", "HAS_ROLE",
        "PARTNERED_WITH", "CONTRACTED_BY", "DESIGNED_BY", "ENGINEERED_BY",
        "CONSTRUCTED_BY", "PROCURED_BY", "SUPPLIED_BY", "REGULATED_BY",
        # Geography
        "LOCATED_IN", "HAS_ADDRESS", "IN_CITY", "IN_STATE", "IN_COUNTRY", "IN_REGION", "IN_ZONE",
        # Finance
        "HAS_BUDGET", "HAS_COST", "HAS_CAPEX", "HAS_OPEX", "HAS_TIV", "IN_CURRENCY",
        "FUNDED_BY", "ALLOCATED_TO",
        # Technical
        "IN_INDUSTRY", "IN_SECTOR", "IN_MARKET",
        "PRODUCES", "USES_FEEDSTOCK", "USES_FUEL", "USES_TECHNOLOGY", "USES_PROCESS",
        "REQUIRES_EQUIPMENT", "HAS_UNIT", "HAS_SYSTEM", "HAS_UTILITY", "HAS_CAPACITY",
        "MEETS_STANDARD",
        # Governance
        "REQUIRES_PERMIT", "HAS_REQUIREMENT", "HAS_DELIVERABLE",
        "HAS_ENVIRONMENTAL_ASPECT", "HAS_SAFETY_REQUIREMENT",
        # Risks
        "HAS_RISK", "HAS_ISSUE", "HAS_CHALLENGE", "HAS_CONSTRAINT",
        "CAUSED_BY", "RESULTED_IN", "AFFECTED_BY", "DELAYED_BY", "CANCELLED_DUE_TO",
    ]

    @classmethod
    def get_allowed_nodes(cls) -> List[str]:
        return cls.ALLOWED_NODES.copy()

    @classmethod
    def get_allowed_relationships(cls) -> List[str]:
        return cls.ALLOWED_RELATIONSHIPS.copy()