Commit
·
608d44d
1
Parent(s):
acd93cd
Squirro Docs added
Browse files
kg_builder/src/api_connections.py
CHANGED
|
@@ -29,8 +29,13 @@ def get_graph_connection(data_source_name):
|
|
| 29 |
url = os.getenv("TRAFFIC_NEO4J_URL")
|
| 30 |
username = os.getenv("TRAFFIC_NEO4J_USERNAME")
|
| 31 |
password = os.getenv("TRAFFIC_NEO4J_PASSWORD")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
else:
|
| 33 |
-
|
|
|
|
| 34 |
|
| 35 |
return Neo4jGraph(url=url, username=username, password=password)
|
| 36 |
|
|
@@ -87,6 +92,39 @@ def get_extraction_chain(
|
|
| 87 |
Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
|
| 88 |
"""
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
elif data_source_name == "Traffic Law":
|
| 91 |
# Traffic Law-specific prompt
|
| 92 |
prompt_text = f"""# Knowledge Graph Instructions for GPT-4
|
|
@@ -121,7 +159,8 @@ def get_extraction_chain(
|
|
| 121 |
"""
|
| 122 |
|
| 123 |
else:
|
| 124 |
-
|
|
|
|
| 125 |
|
| 126 |
logger.info(f"Prompt to extract graph data: {prompt_text}")
|
| 127 |
|
|
|
|
| 29 |
url = os.getenv("TRAFFIC_NEO4J_URL")
|
| 30 |
username = os.getenv("TRAFFIC_NEO4J_USERNAME")
|
| 31 |
password = os.getenv("TRAFFIC_NEO4J_PASSWORD")
|
| 32 |
+
elif data_source_name == "SquirroDocs":
|
| 33 |
+
url = os.getenv("TRAFFIC_NEO4J_URL")
|
| 34 |
+
username = os.getenv("TRAFFIC_NEO4J_USERNAME")
|
| 35 |
+
password = os.getenv("TRAFFIC_NEO4J_PASSWORD")
|
| 36 |
else:
|
| 37 |
+
errorMsg = f"No such Data Source connection configured: {data_source_name}"
|
| 38 |
+
raise ValueError(errorMsg)
|
| 39 |
|
| 40 |
return Neo4jGraph(url=url, username=username, password=password)
|
| 41 |
|
|
|
|
| 92 |
Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
|
| 93 |
"""
|
| 94 |
|
| 95 |
+
elif data_source_name == "SquirroDocs":
|
| 96 |
+
# Squirro Docs-specific prompt
|
| 97 |
+
prompt_text = f"""# Knowledge Graph Instructions for GPT-4
|
| 98 |
+
## 1. Overview
|
| 99 |
+
You are a sophisticated algorithm tailored for parsing Wikipedia pages to construct a knowledge graph about Squirro documentation.
|
| 100 |
+
- **Nodes** symbolize entities such as types of traffic violations, penalties, driving regulations, and relevant legal statutes.
|
| 101 |
+
- The goal is to create a precise and comprehensible knowledge graph, serving as a reliable resource for legal professionals, law enforcement agencies, and the general public.
|
| 102 |
+
|
| 103 |
+
## 2. Labeling Nodes
|
| 104 |
+
- **Consistency**: Utilize uniform labels for node types to maintain clarity.
|
| 105 |
+
- For instance, consistently label violations as **"Violation"**, penalties as **"Penalty"**, and statutes as **"Statute"**.
|
| 106 |
+
- **Node IDs**: Apply descriptive, legible identifiers for node IDs, sourced directly from the text.
|
| 107 |
+
{'- **Allowed Node Labels:**' + ", ".join(['Violation', 'Penalty', 'Statute', 'VehicleType', 'LegalDocument']) if allowed_nodes else ""}
|
| 108 |
+
{'- **Allowed Relationship Types**:' + ", ".join(['Violates', 'Penalizes', 'Governs', 'Cites']) if allowed_rels else ""}
|
| 109 |
+
|
| 110 |
+
## 3. Handling Numerical Data and Dates
|
| 111 |
+
- Integrate numerical data and dates as attributes of the corresponding nodes.
|
| 112 |
+
- **No Isolated Nodes for Dates/Numbers**: Directly associate dates and numerical figures as attributes with pertinent nodes.
|
| 113 |
+
- **Property Format**: Follow a straightforward key-value pattern for properties, with keys in camelCase, for example, `fineAmount`, `lawEffectiveDate`.
|
| 114 |
+
|
| 115 |
+
## 4. Coreference Resolution
|
| 116 |
+
- **Entity Consistency**: Guarantee uniform identification of each entity across the graph.
|
| 117 |
+
- For example, if "Vehicle Code 22350" and "Speed Law" reference the same statute, uniformly apply "Vehicle Code 22350" as the node ID.
|
| 118 |
+
|
| 119 |
+
## 5. Relationship Naming Conventions
|
| 120 |
+
- **Clarity and Standardization**: Utilize clear and standardized relationship names, preferring uppercase with underscores for readability.
|
| 121 |
+
- For instance, use "IS_PENALIZED_BY" instead of "ISPENALIZEDBY", use "IS_GOVERNED_BY" instead of "ISGOVERNEDBY" etc. You keep making the same mistakes of storing the relationships without the "_" in between the words. Any further similar errors will lead to termination.
|
| 122 |
+
- **Relevance and Specificity**: Choose relationship names that accurately reflect the connection between nodes, such as "REQUIRES" or "PROHIBITS" for legal requirements or prohibitions.
|
| 123 |
+
|
| 124 |
+
## 6. Strict Compliance
|
| 125 |
+
Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
|
| 126 |
+
"""
|
| 127 |
+
|
| 128 |
elif data_source_name == "Traffic Law":
|
| 129 |
# Traffic Law-specific prompt
|
| 130 |
prompt_text = f"""# Knowledge Graph Instructions for GPT-4
|
|
|
|
| 159 |
"""
|
| 160 |
|
| 161 |
else:
|
| 162 |
+
errorMsg = f"No prompt configured for Data Source ¨{data_source_name}¨!"
|
| 163 |
+
raise ValueError(errorMsg)
|
| 164 |
|
| 165 |
logger.info(f"Prompt to extract graph data: {prompt_text}")
|
| 166 |
|