GodsDevProject commited on
Commit
c68e070
·
verified ·
1 Parent(s): ff39e36

Update entity_graph.py

Browse files
Files changed (1) hide show
  1. entity_graph.py +13 -137
entity_graph.py CHANGED
@@ -1,141 +1,17 @@
 
1
  from typing import List, Dict
2
  import json
3
- import html
4
- import re
5
 
6
- # -------------------------------------------------
7
- # Simple Named-Entity Extraction (rule-based)
8
- # -------------------------------------------------
9
-
10
- ENTITY_PATTERN = re.compile(r"\b[A-Z][A-Za-z0-9\-]{2,}\b")
11
-
12
- def extract_entities(text: str) -> List[str]:
13
- if not text:
14
- return []
15
- return list(set(ENTITY_PATTERN.findall(text)))
16
-
17
-
18
- # -------------------------------------------------
19
- # Build Entity Graph JSON
20
- # -------------------------------------------------
21
-
22
- def build_entity_graph(documents: List[Dict]) -> Dict:
23
- nodes = {}
24
- links = []
25
-
26
- for doc in documents:
27
- entities = extract_entities(doc.get("text", ""))
28
- agency = doc.get("agency", "Unknown")
29
-
30
- for ent in entities:
31
- if ent not in nodes:
32
- nodes[ent] = {
33
- "id": ent,
34
- "group": agency
35
- }
36
-
37
- for i in range(len(entities)):
38
- for j in range(i + 1, len(entities)):
39
- links.append({
40
- "source": entities[i],
41
- "target": entities[j],
42
- "value": 1
43
- })
44
-
45
- return {
46
- "nodes": list(nodes.values()),
47
- "links": links
48
  }
49
-
50
-
51
- # -------------------------------------------------
52
- # Render Client-Side D3 Graph (HTML)
53
- # -------------------------------------------------
54
-
55
- def render_d3(graph: Dict) -> str:
56
- graph_json = html.escape(json.dumps(graph))
57
-
58
- return f"""
59
- <!DOCTYPE html>
60
- <html>
61
- <head>
62
- <meta charset="utf-8">
63
- <script src="https://d3js.org/d3.v7.min.js"></script>
64
- <style>
65
- body {{ font-family: sans-serif; }}
66
- svg {{ width: 100%; height: 600px; }}
67
- .node {{ cursor: pointer; }}
68
- .link {{ stroke: #aaa; stroke-width: 1px; }}
69
- </style>
70
- </head>
71
- <body>
72
- <svg></svg>
73
- <script>
74
- const graph = JSON.parse("{graph_json}");
75
-
76
- const width = 900;
77
- const height = 600;
78
-
79
- const svg = d3.select("svg")
80
- .attr("viewBox", [0, 0, width, height]);
81
-
82
- const simulation = d3.forceSimulation(graph.nodes)
83
- .force("link", d3.forceLink(graph.links).id(d => d.id).distance(80))
84
- .force("charge", d3.forceManyBody().strength(-200))
85
- .force("center", d3.forceCenter(width / 2, height / 2));
86
-
87
- const link = svg.append("g")
88
- .selectAll("line")
89
- .data(graph.links)
90
- .enter()
91
- .append("line")
92
- .attr("class", "link");
93
-
94
- const node = svg.append("g")
95
- .selectAll("circle")
96
- .data(graph.nodes)
97
- .enter()
98
- .append("circle")
99
- .attr("r", 6)
100
- .attr("fill", "#4f46e5")
101
- .call(d3.drag()
102
- .on("start", dragstarted)
103
- .on("drag", dragged)
104
- .on("end", dragended)
105
- );
106
-
107
- node.append("title")
108
- .text(d => d.id);
109
-
110
- simulation.on("tick", () => {{
111
- link
112
- .attr("x1", d => d.source.x)
113
- .attr("y1", d => d.source.y)
114
- .attr("x2", d => d.target.x)
115
- .attr("y2", d => d.target.y);
116
-
117
- node
118
- .attr("cx", d => d.x)
119
- .attr("cy", d => d.y);
120
- }});
121
-
122
- function dragstarted(event, d) {{
123
- if (!event.active) simulation.alphaTarget(0.3).restart();
124
- d.fx = d.x;
125
- d.fy = d.y;
126
- }}
127
-
128
- function dragged(event, d) {{
129
- d.fx = event.x;
130
- d.fy = event.y;
131
- }}
132
-
133
- function dragended(event, d) {{
134
- if (!event.active) simulation.alphaTarget(0);
135
- d.fx = null;
136
- d.fy = null;
137
- }}
138
- </script>
139
- </body>
140
- </html>
141
- """
 
1
+ import networkx as nx
2
  from typing import List, Dict
3
  import json
 
 
4
 
5
+ def build_entity_graph(docs: List[Dict]) -> Dict:
6
+ G = nx.Graph()
7
+ for d in docs:
8
+ G.add_node(d["agency"])
9
+ for word in d["title"].split():
10
+ if word.isupper():
11
+ G.add_edge(d["agency"], word)
12
+
13
+ data = {
14
+ "nodes": [{"id": n} for n in G.nodes],
15
+ "links": [{"source": u, "target": v} for u, v in G.edges]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  }
17
+ return data