File size: 1,469 Bytes
a4ab72e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | import json
from pyTigerGraph import TigerGraphConnection
from dotenv import load_dotenv
import os
load_dotenv()
conn = TigerGraphConnection(
host=os.getenv("TIGERCLOUD_HOST"),
graphname=os.getenv("TIGERCLOUD_GRAPHNAME"),
apiToken=os.getenv("TIGERCLOUD_TOKEN")
)
print(conn.echo())
chunks = json.load(open("data/chunks.json", "r", encoding="utf-8"))
entities = json.load(open("data/entities.json", "r", encoding="utf-8"))
print("Loading vertices...")
companies = {c["company"] for c in chunks}
for company in companies:
conn.upsertVertex("Company", company, {})
for c in chunks:
filing_id = c["doc_name"]
conn.upsertVertex("Filing", filing_id, {})
conn.upsertEdge(
"Company",
c["company"],
"FILED",
"Filing",
filing_id
)
for c in chunks:
conn.upsertVertex(
"Chunk",
c["chunk_id"],
{
"text": {
"value": c["text"]
}
}
)
conn.upsertEdge(
"Filing",
c["doc_name"],
"CONTAINS",
"Chunk",
c["chunk_id"]
)
for e in entities:
for ent in e["entities"]:
ent_id = f"{ent['label']}_{ent['text'][:50]}"
conn.upsertVertex("Entity", ent_id, {})
conn.upsertEdge(
"Chunk",
e["chunk_id"],
"MENTIONS",
"Entity",
ent_id
)
print("Done loading graph.")
print(conn.getVertexCount("*")) |