File size: 1,469 Bytes
a4ab72e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import json
from pyTigerGraph import TigerGraphConnection
from dotenv import load_dotenv
import os

load_dotenv()

conn = TigerGraphConnection(
    host=os.getenv("TIGERCLOUD_HOST"),
    graphname=os.getenv("TIGERCLOUD_GRAPHNAME"),
    apiToken=os.getenv("TIGERCLOUD_TOKEN")
)
print(conn.echo())

chunks = json.load(open("data/chunks.json", "r", encoding="utf-8"))
entities = json.load(open("data/entities.json", "r", encoding="utf-8"))

print("Loading vertices...")

companies = {c["company"] for c in chunks}
for company in companies:
    conn.upsertVertex("Company", company, {})

for c in chunks:
    filing_id = c["doc_name"]

    conn.upsertVertex("Filing", filing_id, {})
    conn.upsertEdge(
        "Company",
        c["company"],
        "FILED",
        "Filing",
        filing_id
    )

for c in chunks:
    conn.upsertVertex(
        "Chunk",
        c["chunk_id"],
        {
            "text": {
                "value": c["text"]
            }
        }
    )

    conn.upsertEdge(
        "Filing",
        c["doc_name"],
        "CONTAINS",
        "Chunk",
        c["chunk_id"]
    )

for e in entities:
    for ent in e["entities"]:
        ent_id = f"{ent['label']}_{ent['text'][:50]}"

        conn.upsertVertex("Entity", ent_id, {})

        conn.upsertEdge(
            "Chunk",
            e["chunk_id"],
            "MENTIONS",
            "Entity",
            ent_id
        )

print("Done loading graph.")
print(conn.getVertexCount("*"))