Spaces:
Running
Running
| """Schema policy for LLM-driven graph extraction.""" | |
| from __future__ import annotations | |
| from typing import List | |
| class SchemaPolicy: | |
| """Defines allowed node labels and relationship types for LLM graph extraction. | |
| The LLMGraphTransformer benefits from explicit schema constraints. This schema | |
| is intentionally broad to support diverse project report questions (stakeholders, | |
| contracts, permitting, schedule, finance, risks, etc.). | |
| """ | |
| ALLOWED_NODES: List[str] = [ | |
| # Document structure | |
| "Project", "Report", "Document", "Section", "Chunk", "Source", "Evidence", | |
| # Organizations | |
| "Organization", "Company", "Owner", "ParentCompany", "Client", "Customer", | |
| "Partner", "JV", "Consortium", "Contractor", "Subcontractor", "Vendor", "Supplier", | |
| "Consultant", "EngineeringFirm", "EPC", "EPCM", "Operator", | |
| "GovernmentAgency", "Regulator", "Stakeholder", | |
| # People | |
| "Person", "Role", "Team", "Department", | |
| # Geography | |
| "Location", "Address", "City", "State", "Province", "Region", "Country", "County", | |
| "Zone", "Port", "Site", "Plant", | |
| # Finance | |
| "Budget", "Cost", "Capex", "Opex", "Estimate", "Investment", "Funding", | |
| "Currency", "TIV", "Revenue", "Tariff", "Price", | |
| # Timeline | |
| "Timeline", "Schedule", "Milestone", "Phase", "Stage", "Date", "Quarter", "Year", | |
| "Duration", "StartDate", "EndDate", | |
| # Technical | |
| "Industry", "Sector", "Market", "Demand", "Product", "Output", "Capacity", | |
| "Feedstock", "Fuel", "Technology", "Process", "Equipment", "Unit", "System", "Utility", | |
| "Specification", "Standard", | |
| # Contracts | |
| "Contract", "Agreement", "Tender", "Bid", "RFQ", "Procurement", "Permit", | |
| "WorkPackage", "Deliverable", "Requirement", "KPI", "Metric", | |
| # Status | |
| "Status", "StatusReason", "Decision", "Change", "Assumption", "Dependency", | |
| "Risk", "Issue", "Challenge", "Constraint", "Delay", "Cancellation", | |
| # ESG | |
| "EnvironmentalAspect", "Emissions", "Wastewater", "Water", "Waste", "Safety", | |
| "Regulation", "Compliance", | |
| ] | |
| ALLOWED_RELATIONSHIPS: List[str] = [ | |
| # Document structure | |
| "HAS_REPORT", "HAS_DOCUMENT", "HAS_SECTION", "HAS_CHUNK", "HAS_EVIDENCE", | |
| "EVIDENCED_BY", "SUPPORTED_BY", "MENTIONS", "ABOUT", | |
| # Lifecycle | |
| "HAS_STATUS", "HAS_STATUS_REASON", "HAS_PHASE", "HAS_STAGE", | |
| "HAS_TIMELINE", "HAS_SCHEDULE", "HAS_MILESTONE", | |
| "STARTS_AT", "ENDS_AT", "UPDATED_ON", "RELEASED_ON", "COMPLETES_AT", | |
| # Organizations | |
| "OWNED_BY", "PARENT_OF", "HAS_PARENT", "MANAGED_BY", "OPERATED_BY", | |
| "LED_BY", "RESPONSIBLE_FOR", "WORKS_FOR", "HAS_ROLE", | |
| "PARTNERED_WITH", "CONTRACTED_BY", "DESIGNED_BY", "ENGINEERED_BY", | |
| "CONSTRUCTED_BY", "PROCURED_BY", "SUPPLIED_BY", "REGULATED_BY", | |
| # Geography | |
| "LOCATED_IN", "HAS_ADDRESS", "IN_CITY", "IN_STATE", "IN_COUNTRY", "IN_REGION", "IN_ZONE", | |
| # Finance | |
| "HAS_BUDGET", "HAS_COST", "HAS_CAPEX", "HAS_OPEX", "HAS_TIV", "IN_CURRENCY", | |
| "FUNDED_BY", "ALLOCATED_TO", | |
| # Technical | |
| "IN_INDUSTRY", "IN_SECTOR", "IN_MARKET", | |
| "PRODUCES", "USES_FEEDSTOCK", "USES_FUEL", "USES_TECHNOLOGY", "USES_PROCESS", | |
| "REQUIRES_EQUIPMENT", "HAS_UNIT", "HAS_SYSTEM", "HAS_UTILITY", "HAS_CAPACITY", | |
| "MEETS_STANDARD", | |
| # Governance | |
| "REQUIRES_PERMIT", "HAS_REQUIREMENT", "HAS_DELIVERABLE", | |
| "HAS_ENVIRONMENTAL_ASPECT", "HAS_SAFETY_REQUIREMENT", | |
| # Risks | |
| "HAS_RISK", "HAS_ISSUE", "HAS_CHALLENGE", "HAS_CONSTRAINT", | |
| "CAUSED_BY", "RESULTED_IN", "AFFECTED_BY", "DELAYED_BY", "CANCELLED_DUE_TO", | |
| ] | |
| def get_allowed_nodes(cls) -> List[str]: | |
| return cls.ALLOWED_NODES.copy() | |
| def get_allowed_relationships(cls) -> List[str]: | |
| return cls.ALLOWED_RELATIONSHIPS.copy() | |