vietexob commited on
Commit
d5e6064
·
1 Parent(s): 110ce02

Fixing weird behaviors

Browse files
Files changed (5) hide show
  1. app.py +60 -48
  2. knowledge_graph.html +2 -2
  3. llm_graph.py +28 -12
  4. sample/kv_store_doc_status.json +35 -0
  5. visualize.py +37 -95
app.py CHANGED
@@ -31,11 +31,14 @@ CUSTOM_CSS = """
31
  """
32
 
33
  # Cache directory and file paths
34
- CACHE_DIR = "cache"
 
35
  EXAMPLE_CACHE_FILE = os.path.join(CACHE_DIR, "first_example_cache.pkl")
 
36
 
37
  # Create cache directory if it doesn't exist
38
  os.makedirs(CACHE_DIR, exist_ok=True)
 
39
 
40
  # Initialize the LLMGraph model
41
  model = LLMGraph()
@@ -62,7 +65,7 @@ def handle_text(text=""):
62
 
63
  return " ".join(text.split())
64
 
65
- def extract_kg(text="", model_name=None):
66
  """
67
  Extract knowledge graph from text
68
  """
@@ -73,7 +76,10 @@ def extract_kg(text="", model_name=None):
73
  try:
74
  result = model.extract(text, model_name)
75
 
76
- return rapidjson.loads(result)
 
 
 
77
  except Exception as e:
78
  raise gr.Error(f"❌ Extraction error: {str(e)}")
79
 
@@ -108,7 +114,7 @@ def find_token_indices(doc, substring, text):
108
 
109
  return result
110
 
111
- def create_custom_entity_viz(data, full_text):
112
  """
113
  Create custom entity visualization using spaCy's displacy
114
  """
@@ -130,7 +136,7 @@ def create_custom_entity_viz(data, full_text):
130
  overlapping = any(s.start < end and start < s.end for s in spans)
131
 
132
  if not overlapping:
133
- node_type = node.get("type", "Entity")
134
  span = Span(doc, start, end, label=node_type)
135
  spans.append(span)
136
 
@@ -156,30 +162,33 @@ def create_custom_entity_viz(data, full_text):
156
 
157
  return styled_html
158
 
159
- def create_graph(json_data):
160
  """
161
  Create interactive knowledge graph using pyvis
162
  """
163
 
164
- G = nx.Graph()
 
165
 
166
- # Add nodes with tooltips and error handling for missing keys
167
- for node in json_data['nodes']:
168
- # Get node type with fallback
169
- type = node.get("type", "Entity")
170
 
171
- # Get detailed type with fallback
172
- detailed_type = node.get("detailed_type", type)
173
-
174
- # Use node ID and type info for the tooltip
175
- G.add_node(node['id'], title=f"{type}: {detailed_type}")
176
-
177
- # Add edges with labels
178
- for edge in json_data['edges']:
179
- # Check if the required keys exist
180
- if 'from' in edge and 'to' in edge:
181
- label = edge.get('label', 'related')
182
- G.add_edge(edge['from'], edge['to'], title=label, label=label)
 
 
183
 
184
  # Create network visualization
185
  network = Network(
@@ -193,17 +202,20 @@ def create_graph(json_data):
193
 
194
  # Configure network display
195
  network.from_nx(G)
196
- # network.barnes_hut(
197
- # gravity=-3000,
198
- # central_gravity=0.3,
199
- # spring_length=50,
200
- # spring_strength=0.001,
201
- # damping=0.09,
202
- # overlap=0,
203
- # )
204
 
205
  # Customize node appearance
206
  for node in network.nodes:
 
 
 
207
  node['color'] = {'background': '#e0e7ff', 'border': '#6366f1', 'highlight': {'background': '#c7d2fe', 'border': '#4f46e5'}}
208
  node['font'] = {'size': 14, 'color': '#1e293b'}
209
  node['shape'] = 'dot'
@@ -211,6 +223,9 @@ def create_graph(json_data):
211
 
212
  # Customize edge appearance
213
  for edge in network.edges:
 
 
 
214
  edge['width'] = 4
215
  # edge['arrows'] = {'to': {'enabled': False, 'type': 'arrow'}}
216
  edge['color'] = {'color': '#6366f1', 'highlight': '#4f46e5'}
@@ -236,20 +251,20 @@ def process_and_visualize(text, model_name, progress=gr.Progress()):
236
 
237
  # Check if we're processing the first example for caching
238
  is_first_example = text == EXAMPLES[0][0]
239
-
240
- asyncio.run(model.initialize_rag()) # Ensure RAG is initialized
 
241
 
242
  # Try to load from cache if it's the first example
243
- if is_first_example and os.path.exists(EXAMPLE_CACHE_FILE):
244
  try:
245
  progress(0.3, desc="Loading from cache...")
246
  with open(EXAMPLE_CACHE_FILE, 'rb') as f:
247
- cache_data = pickle.load(f)
248
 
249
  progress(1.0, desc="Loaded from cache!")
250
- return cache_data["graph_html"], cache_data["entities_viz"], cache_data["json_data"], cache_data["stats"]
251
  except Exception as e:
252
- # print(f"Cache loading error: {str(e)}")
253
  logging.error(f"Cache loading error: {str(e)}")
254
 
255
  # Continue with normal processing if cache fails
@@ -257,28 +272,30 @@ def process_and_visualize(text, model_name, progress=gr.Progress()):
257
  json_data = extract_kg(text, model_name)
258
 
259
  progress(0.5, desc="Creating entity visualization...")
260
- entities_viz = create_custom_entity_viz(json_data, text)
 
 
 
261
 
262
  progress(0.8, desc="Building knowledge graph...")
263
- graph_html = create_graph(json_data)
264
 
265
  node_count = len(json_data["nodes"])
266
  edge_count = len(json_data["edges"])
267
  stats = f"📊 Extracted {node_count} entities and {edge_count} relationships"
268
 
269
  # Save to cache if it's the first example
270
- if is_first_example:
271
  try:
272
- cache_data = {
273
  "graph_html": graph_html,
274
  "entities_viz": entities_viz,
275
  "json_data": json_data,
276
  "stats": stats
277
  }
278
  with open(EXAMPLE_CACHE_FILE, 'wb') as f:
279
- pickle.dump(cache_data, f)
280
  except Exception as e:
281
- # print(f"Cache saving error: {str(e)}")
282
  logging.error(f"Cache saving error: {str(e)}")
283
 
284
  progress(1.0, desc="Complete!")
@@ -312,7 +329,6 @@ def generate_first_example():
312
  """
313
 
314
  if not os.path.exists(EXAMPLE_CACHE_FILE):
315
- # print("Generating cache for first example...")
316
  logging.info("Generating cache for first example...")
317
 
318
  try:
@@ -338,15 +354,12 @@ def generate_first_example():
338
 
339
  with open(EXAMPLE_CACHE_FILE, 'wb') as f:
340
  pickle.dump(cached_data, f)
341
- # print("First example cache generated successfully")
342
  logging.info("First example cache generated successfully")
343
 
344
  return cached_data
345
  except Exception as e:
346
- # print(f"Error generating first example cache: {str(e)}")
347
  logging.error(f"Error generating first example cache: {str(e)}")
348
  else:
349
- # print("First example cache already exists")
350
  logging.info("First example cache already exists")
351
 
352
  # Load existing cache
@@ -354,7 +367,6 @@ def generate_first_example():
354
  with open(EXAMPLE_CACHE_FILE, 'rb') as f:
355
  return pickle.load(f)
356
  except Exception as e:
357
- # print(f"Error loading existing cache: {str(e)}")
358
  logging.error(f"Error loading existing cache: {str(e)}")
359
 
360
  return None
 
31
  """
32
 
33
  # Cache directory and file paths
34
+ CACHE_DIR = "./cache"
35
+ WORKING_DIR = "./sample"
36
  EXAMPLE_CACHE_FILE = os.path.join(CACHE_DIR, "first_example_cache.pkl")
37
+ GRAPHML_FILE = WORKING_DIR + "/graph_chunk_entity_relation.graphml"
38
 
39
  # Create cache directory if it doesn't exist
40
  os.makedirs(CACHE_DIR, exist_ok=True)
41
+ os.makedirs(WORKING_DIR, exist_ok=True)
42
 
43
  # Initialize the LLMGraph model
44
  model = LLMGraph()
 
65
 
66
  return " ".join(text.split())
67
 
68
+ def extract_kg(text="", model_name=MODEL_LIST[0]):
69
  """
70
  Extract knowledge graph from text
71
  """
 
76
  try:
77
  result = model.extract(text, model_name)
78
 
79
+ if isinstance(result, dict):
80
+ return result
81
+ else: # convert string to dict
82
+ return rapidjson.loads(result)
83
  except Exception as e:
84
  raise gr.Error(f"❌ Extraction error: {str(e)}")
85
 
 
114
 
115
  return result
116
 
117
+ def create_custom_entity_viz(data, full_text, type_col="type"):
118
  """
119
  Create custom entity visualization using spaCy's displacy
120
  """
 
136
  overlapping = any(s.start < end and start < s.end for s in spans)
137
 
138
  if not overlapping:
139
+ node_type = node.get(type_col, "Entity")
140
  span = Span(doc, start, end, label=node_type)
141
  spans.append(span)
142
 
 
162
 
163
  return styled_html
164
 
165
+ def create_graph(json_data, model_name=MODEL_LIST[0]):
166
  """
167
  Create interactive knowledge graph using pyvis
168
  """
169
 
170
+ if model_name == MODEL_LIST[0]:
171
+ G = nx.Graph()
172
 
173
+ # Add nodes with tooltips and error handling for missing keys
174
+ for node in json_data['nodes']:
175
+ # Get node type with fallback
176
+ type = node.get("type", "Entity")
177
 
178
+ # Get detailed type with fallback
179
+ detailed_type = node.get("detailed_type", type)
180
+
181
+ # Use node ID and type info for the tooltip
182
+ G.add_node(node['id'], title=f"{type}: {detailed_type}")
183
+
184
+ # Add edges with labels
185
+ for edge in json_data['edges']:
186
+ # Check if the required keys exist
187
+ if 'from' in edge and 'to' in edge:
188
+ label = edge.get('label', 'related')
189
+ G.add_edge(edge['from'], edge['to'], title=label, label=label)
190
+ else:
191
+ G = nx.read_graphml(GRAPHML_FILE)
192
 
193
  # Create network visualization
194
  network = Network(
 
202
 
203
  # Configure network display
204
  network.from_nx(G)
205
+ network.barnes_hut(
206
+ gravity=-3000,
207
+ central_gravity=0.3,
208
+ spring_length=50,
209
+ spring_strength=0.001,
210
+ damping=0.09,
211
+ overlap=0,
212
+ )
213
 
214
  # Customize node appearance
215
  for node in network.nodes:
216
+ if "description" in node:
217
+ node["title"] = node["description"]
218
+
219
  node['color'] = {'background': '#e0e7ff', 'border': '#6366f1', 'highlight': {'background': '#c7d2fe', 'border': '#4f46e5'}}
220
  node['font'] = {'size': 14, 'color': '#1e293b'}
221
  node['shape'] = 'dot'
 
223
 
224
  # Customize edge appearance
225
  for edge in network.edges:
226
+ if "description" in edge:
227
+ edge["title"] = edge["description"]
228
+
229
  edge['width'] = 4
230
  # edge['arrows'] = {'to': {'enabled': False, 'type': 'arrow'}}
231
  edge['color'] = {'color': '#6366f1', 'highlight': '#4f46e5'}
 
251
 
252
  # Check if we're processing the first example for caching
253
  is_first_example = text == EXAMPLES[0][0]
254
+
255
+ # Ensure RAG is initialized
256
+ asyncio.run(model.initialize_rag())
257
 
258
  # Try to load from cache if it's the first example
259
+ if is_first_example and model_name == MODEL_LIST[0] and os.path.exists(EXAMPLE_CACHE_FILE):
260
  try:
261
  progress(0.3, desc="Loading from cache...")
262
  with open(EXAMPLE_CACHE_FILE, 'rb') as f:
263
+ cached_data = pickle.load(f)
264
 
265
  progress(1.0, desc="Loaded from cache!")
266
+ return cached_data["graph_html"], cached_data["entities_viz"], cached_data["json_data"], cached_data["stats"]
267
  except Exception as e:
 
268
  logging.error(f"Cache loading error: {str(e)}")
269
 
270
  # Continue with normal processing if cache fails
 
272
  json_data = extract_kg(text, model_name)
273
 
274
  progress(0.5, desc="Creating entity visualization...")
275
+ if model_name == MODEL_LIST[0]:
276
+ entities_viz = create_custom_entity_viz(json_data, text, type_col="type")
277
+ else:
278
+ entities_viz = create_custom_entity_viz(json_data, text, type_col="entity_type")
279
 
280
  progress(0.8, desc="Building knowledge graph...")
281
+ graph_html = create_graph(json_data, model_name)
282
 
283
  node_count = len(json_data["nodes"])
284
  edge_count = len(json_data["edges"])
285
  stats = f"📊 Extracted {node_count} entities and {edge_count} relationships"
286
 
287
  # Save to cache if it's the first example
288
+ if is_first_example and model_name == MODEL_LIST[0]:
289
  try:
290
+ cached_data = {
291
  "graph_html": graph_html,
292
  "entities_viz": entities_viz,
293
  "json_data": json_data,
294
  "stats": stats
295
  }
296
  with open(EXAMPLE_CACHE_FILE, 'wb') as f:
297
+ pickle.dump(cached_data, f)
298
  except Exception as e:
 
299
  logging.error(f"Cache saving error: {str(e)}")
300
 
301
  progress(1.0, desc="Complete!")
 
329
  """
330
 
331
  if not os.path.exists(EXAMPLE_CACHE_FILE):
 
332
  logging.info("Generating cache for first example...")
333
 
334
  try:
 
354
 
355
  with open(EXAMPLE_CACHE_FILE, 'wb') as f:
356
  pickle.dump(cached_data, f)
 
357
  logging.info("First example cache generated successfully")
358
 
359
  return cached_data
360
  except Exception as e:
 
361
  logging.error(f"Error generating first example cache: {str(e)}")
362
  else:
 
363
  logging.info("First example cache already exists")
364
 
365
  # Load existing cache
 
367
  with open(EXAMPLE_CACHE_FILE, 'rb') as f:
368
  return pickle.load(f)
369
  except Exception as e:
 
370
  logging.error(f"Error loading existing cache: {str(e)}")
371
 
372
  return None
knowledge_graph.html CHANGED
@@ -88,8 +88,8 @@
88
 
89
 
90
  // parsing and collecting nodes and edges from the python
91
- nodes = new vis.DataSet([{"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Aerosmith is a legendary rock band that has been active for 54 years and has officially announced their retirement from touring.", "entity_id": "Aerosmith", "entity_type": "organization", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Aerosmith", "label": "Aerosmith", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Aerosmith is a legendary rock band that has been active for 54 years and has officially announced their retirement from touring."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Steven Tyler is the lead singer of Aerosmith who suffered an unrecoverable vocal cord injury, leading to the band\u0027s retirement from touring.", "entity_id": "Steven Tyler", "entity_type": "person", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Steven Tyler", "label": "Steven Tyler", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler is the lead singer of Aerosmith who suffered an unrecoverable vocal cord injury, leading to the band\u0027s retirement from touring."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Vocal cord injury refers to the unrecoverable injury suffered by Steven Tyler that caused Aerosmith to retire from touring.", "entity_id": "Vocal Cord Injury", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Vocal Cord Injury", "label": "Vocal Cord Injury", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Vocal cord injury refers to the unrecoverable injury suffered by Steven Tyler that caused Aerosmith to retire from touring."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Retirement from touring is the event announced by Aerosmith after 54 years, prompted by Steven Tyler\u0027s vocal cord injury.", "entity_id": "Retirement from Touring", "entity_type": "event", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Retirement from Touring", "label": "Retirement from Touring", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Retirement from touring is the event announced by Aerosmith after 54 years, prompted by Steven Tyler\u0027s vocal cord injury."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "September 2023 is the time when Steven Tyler suffered a fractured larynx.", "entity_id": "September 2023", "entity_type": "event", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "September 2023", "label": "September 2023", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "September 2023 is the time when Steven Tyler suffered a fractured larynx."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Fractured larynx is the specific injury Steven Tyler suffered in September 2023, which was unsuccessfully treated.", "entity_id": "Fractured Larynx", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Fractured Larynx", "label": "Fractured Larynx", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Fractured larynx is the specific injury Steven Tyler suffered in September 2023, which was unsuccessfully treated."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Unsuccessful treatment refers to the medical efforts to heal Steven Tyler\u0027s fractured larynx that did not result in recovery.", "entity_id": "Unsuccessful Treatment", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Unsuccessful Treatment", "label": "Unsuccessful Treatment", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Unsuccessful treatment refers to the medical efforts to heal Steven Tyler\u0027s fractured larynx that did not result in recovery."}]);
92
- edges = new vis.DataSet([{"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577684, "description": "Steven Tyler is the lead singer of Aerosmith, whose vocal injury led to the band\u0027s retirement from touring.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Aerosmith", "keywords": "band membership,cause of retirement", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler is the lead singer of Aerosmith, whose vocal injury led to the band\u0027s retirement from touring.", "to": "Steven Tyler", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577685, "description": "Aerosmith\u0027s retirement from touring is due to Steven Tyler\u0027s unrecoverable vocal cord injury.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Aerosmith", "keywords": "cause of retirement,health impact", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Aerosmith\u0027s retirement from touring is due to Steven Tyler\u0027s unrecoverable vocal cord injury.", "to": "Vocal Cord Injury", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577687, "description": "Aerosmith officially announced their retirement from touring after 54 years.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Aerosmith", "keywords": "band decision,career milestone", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Aerosmith officially announced their retirement from touring after 54 years.", "to": "Retirement from Touring", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577685, "description": "Steven Tyler suffered a fractured larynx in September 2023.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "injury timing,medical event", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler suffered a fractured larynx in September 2023.", "to": "September 2023", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577687, "description": "The vocal cord injury is the medical condition affecting Steven Tyler that caused Aerosmith\u0027s retirement.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "cause-effect,medical condition", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The vocal cord injury is the medical condition affecting Steven Tyler that caused Aerosmith\u0027s retirement.", "to": "Vocal Cord Injury", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577690, "description": "The fractured larynx is the specific injury Steven Tyler suffered, leading to unsuccessful treatment and vocal cord damage.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "injury detail,medical diagnosis", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The fractured larynx is the specific injury Steven Tyler suffered, leading to unsuccessful treatment and vocal cord damage.", "to": "Fractured Larynx", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577691, "description": "Steven Tyler underwent months of unsuccessful treatment for his fractured larynx.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "health outcome,medical treatment", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler underwent months of unsuccessful treatment for his fractured larynx.", "to": "Unsuccessful Treatment", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577691, "description": "The vocal cord injury is a result of the fractured larynx suffered by Steven Tyler.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Vocal Cord Injury", "keywords": "injury relationship,medical causation", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The vocal cord injury is a result of the fractured larynx suffered by Steven Tyler.", "to": "Fractured Larynx", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577693, "description": "The unsuccessful treatment was aimed at healing the fractured larynx suffered by Steven Tyler.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Fractured Larynx", "keywords": "injury focus,medical intervention", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The unsuccessful treatment was aimed at healing the fractured larynx suffered by Steven Tyler.", "to": "Unsuccessful Treatment", "width": 4}]);
93
 
94
  nodeColors = {};
95
  allNodes = nodes.get({ returnType: "Object" });
 
88
 
89
 
90
  // parsing and collecting nodes and edges from the python
91
+ nodes = new vis.DataSet([{"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756651432, "description": "Aerosmith is a legendary rock band that has announced their retirement from touring after 54 years.", "entity_id": "Aerosmith", "entity_type": "organization", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Aerosmith", "label": "Aerosmith", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Aerosmith is a legendary rock band that has announced their retirement from touring after 54 years."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756651432, "description": "Steven Tyler is the lead singer of Aerosmith who suffered an unrecoverable vocal cord injury leading to the band\u0027s retirement from touring.", "entity_id": "Steven Tyler", "entity_type": "person", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Steven Tyler", "label": "Steven Tyler", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler is the lead singer of Aerosmith who suffered an unrecoverable vocal cord injury leading to the band\u0027s retirement from touring."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756651432, "description": "Vocal cord injury refers to the unrecoverable injury suffered by Steven Tyler that caused Aerosmith to retire from touring.", "entity_id": "Vocal Cord Injury", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Vocal Cord Injury", "label": "Vocal Cord Injury", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Vocal cord injury refers to the unrecoverable injury suffered by Steven Tyler that caused Aerosmith to retire from touring."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756651432, "description": "Touring refers to the activity of performing live concerts in various locations, which Aerosmith has retired from after 54 years.", "entity_id": "Touring", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Touring", "label": "Touring", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Touring refers to the activity of performing live concerts in various locations, which Aerosmith has retired from after 54 years."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756651432, "description": "Fractured larynx is the specific injury Steven Tyler suffered in September 2023, which was unsuccessfully treated.", "entity_id": "Fractured Larynx", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Fractured Larynx", "label": "Fractured Larynx", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Fractured larynx is the specific injury Steven Tyler suffered in September 2023, which was unsuccessfully treated."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756651432, "description": "Unsuccessful treatment refers to the medical efforts to heal Steven Tyler\u0027s fractured larynx that did not result in recovery.", "entity_id": "Unsuccessful Treatment", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Unsuccessful Treatment", "label": "Unsuccessful Treatment", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Unsuccessful treatment refers to the medical efforts to heal Steven Tyler\u0027s fractured larynx that did not result in recovery."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756651432, "description": "September 2023 is the time when Steven Tyler suffered a fractured larynx.", "entity_id": "September 2023", "entity_type": "event", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "September 2023", "label": "September 2023", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "September 2023 is the time when Steven Tyler suffered a fractured larynx."}]);
92
+ edges = new vis.DataSet([{"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756651440, "description": "Steven Tyler is the lead singer of Aerosmith, whose vocal injury led to the band\u0027s retirement from touring.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Aerosmith", "keywords": "band membership,cause of retirement", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler is the lead singer of Aerosmith, whose vocal injury led to the band\u0027s retirement from touring.", "to": "Steven Tyler", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756651442, "description": "The vocal cord injury to Steven Tyler led to Aerosmith\u0027s retirement from touring.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Aerosmith", "keywords": "band decision,cause and effect", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The vocal cord injury to Steven Tyler led to Aerosmith\u0027s retirement from touring.", "to": "Vocal Cord Injury", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756651444, "description": "Aerosmith has officially retired from touring after 54 years.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Aerosmith", "keywords": "career activity,retirement", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Aerosmith has officially retired from touring after 54 years.", "to": "Touring", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756651441, "description": "Steven Tyler suffered an unrecoverable vocal cord injury that affected his ability to perform.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "career impact,health issue", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler suffered an unrecoverable vocal cord injury that affected his ability to perform.", "to": "Vocal Cord Injury", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756651442, "description": "Steven Tyler\u0027s fractured larynx in September 2023 was the cause of his vocal cord injury.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "health event,injury cause", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler\u0027s fractured larynx in September 2023 was the cause of his vocal cord injury.", "to": "Fractured Larynx", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756651444, "description": "Steven Tyler underwent unsuccessful treatment for his fractured larynx.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "health outcome,medical treatment", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler underwent unsuccessful treatment for his fractured larynx.", "to": "Unsuccessful Treatment", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756651444, "description": "The fractured larynx injury occurred in September 2023.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "September 2023", "keywords": "injury timing,temporal occurrence", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The fractured larynx injury occurred in September 2023.", "to": "Fractured Larynx", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756651447, "description": "The unsuccessful treatment was aimed at healing the fractured larynx.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Fractured Larynx", "keywords": "injury management,medical intervention", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The unsuccessful treatment was aimed at healing the fractured larynx.", "to": "Unsuccessful Treatment", "width": 4}]);
93
 
94
  nodeColors = {};
95
  allNodes = nodes.get({ returnType: "Object" });
llm_graph.py CHANGED
@@ -1,6 +1,8 @@
1
  import os
2
- import asyncio
 
3
  import numpy as np
 
4
 
5
  from textwrap import dedent
6
  from dotenv import load_dotenv
@@ -25,7 +27,8 @@ AZURE_OPENAI_ENDPOINT = os.environ["AZURE_OPENAI_ENDPOINT"]
25
  AZURE_EMBEDDING_DEPLOYMENT = os.environ["AZURE_EMBEDDING_DEPLOYMENT"]
26
  AZURE_EMBEDDING_API_VERSION = os.environ["AZURE_EMBEDDING_API_VERSION"]
27
 
28
- WORKING_DIR = "./cache"
 
29
 
30
  MODEL_LIST = [
31
  "EmergentMethods/Phi-3-mini-128k-instruct-graph",
@@ -52,9 +55,9 @@ class LLMGraph:
52
  func=self._embedding_func,
53
  ),
54
  )
55
-
56
- await self.rag.initialize_storages()
57
- await initialize_pipeline_status()
58
 
59
  # async def test_responses(self):
60
  # """
@@ -151,24 +154,37 @@ class LLMGraph:
151
 
152
  return messages
153
 
154
- def extract(self, text, model_name=MODEL_LIST[0]) -> str:
155
  """
156
- Extract knowledge graph from text
157
  """
158
-
159
- generated_text = "This is a placeholder response."
160
 
161
  if model_name == MODEL_LIST[0]:
162
  # Use Hugging Face Inference API with Phi-3-mini-128k-instruct-graph
163
  messages = self._get_messages(text)
164
- generated_text = self._generate(messages)
 
 
165
  else:
 
 
 
 
166
  # Use LightRAG with Azure OpenAI
 
167
  self.rag.insert(text) # Insert the text into the RAG storage
168
- # TODO: Extract JSON format of the knowledge graph
 
 
 
169
 
170
- return generated_text
 
171
 
 
 
 
 
172
  async def _llm_model_func(self, prompt, system_prompt=None, history_messages=[], **kwargs) -> str:
173
  """
174
  Call the Azure OpenAI chat completion endpoint with the given prompt and optional system prompt and history messages.
 
1
  import os
2
+ import time
3
+ import shutil
4
  import numpy as np
5
+ import networkx as nx
6
 
7
  from textwrap import dedent
8
  from dotenv import load_dotenv
 
27
  AZURE_EMBEDDING_DEPLOYMENT = os.environ["AZURE_EMBEDDING_DEPLOYMENT"]
28
  AZURE_EMBEDDING_API_VERSION = os.environ["AZURE_EMBEDDING_API_VERSION"]
29
 
30
+ WORKING_DIR = "./sample"
31
+ GRAPHML_FILE = WORKING_DIR + "/graph_chunk_entity_relation.graphml"
32
 
33
  MODEL_LIST = [
34
  "EmergentMethods/Phi-3-mini-128k-instruct-graph",
 
55
  func=self._embedding_func,
56
  ),
57
  )
58
+ # TODO: Check if this works as expected
59
+ await self.rag.initialize_storages()
60
+ await initialize_pipeline_status()
61
 
62
  # async def test_responses(self):
63
  # """
 
154
 
155
  return messages
156
 
157
+ def extract(self, text, model_name=MODEL_LIST[0]):
158
  """
159
+ Extract knowledge graph in structured format from text.
160
  """
 
 
161
 
162
  if model_name == MODEL_LIST[0]:
163
  # Use Hugging Face Inference API with Phi-3-mini-128k-instruct-graph
164
  messages = self._get_messages(text)
165
+
166
+ json_graph = self._generate(messages)
167
+ return json_graph
168
  else:
169
+ if os.path.exists(WORKING_DIR):
170
+ shutil.rmtree(WORKING_DIR)
171
+ os.makedirs(WORKING_DIR, exist_ok=True)
172
+
173
  # Use LightRAG with Azure OpenAI
174
+ # TODO: Clear all the previous inserted texts first
175
  self.rag.insert(text) # Insert the text into the RAG storage
176
+
177
+ # Wait for GRAPHML_FILE to be created
178
+ while not os.path.exists(GRAPHML_FILE):
179
+ time.sleep(0.1) # Sleep for 100ms before checking again
180
 
181
+ # Extract dict format of the knowledge graph
182
+ G = nx.read_graphml(GRAPHML_FILE)
183
 
184
+ # Convert the graph to node-link data format
185
+ dict_graph = nx.node_link_data(G, edges="edges")
186
+ return dict_graph
187
+
188
  async def _llm_model_func(self, prompt, system_prompt=None, history_messages=[], **kwargs) -> str:
189
  """
190
  Call the Azure OpenAI chat completion endpoint with the given prompt and optional system prompt and history messages.
sample/kv_store_doc_status.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "doc-605403c35618c5288c57c562f8eca566": {
3
+ "status": "processed",
4
+ "chunks_count": 1,
5
+ "chunks_list": [
6
+ "chunk-605403c35618c5288c57c562f8eca566"
7
+ ],
8
+ "content_summary": "The family of Azerbaijan President Ilham Aliyev leads a charmed, glamorous life, thanks in part to financial interests in almost every sector of the economy. His wife, Mehriban, comes from the privileged and powerful Pashayev family that owns banks, ...",
9
+ "content_length": 1074,
10
+ "created_at": "2025-08-31T15:50:59.506391+00:00",
11
+ "updated_at": "2025-08-31T15:52:26.018288+00:00",
12
+ "file_path": "unknown_source",
13
+ "track_id": "insert_20250831_235059_6946ff78",
14
+ "metadata": {
15
+ "processing_start_time": 1756655459,
16
+ "processing_end_time": 1756655546
17
+ }
18
+ },
19
+ "doc-eea199eb7feea197ebb82e9333a2d2f2": {
20
+ "status": "processing",
21
+ "chunks_count": 1,
22
+ "chunks_list": [
23
+ "chunk-eea199eb7feea197ebb82e9333a2d2f2"
24
+ ],
25
+ "content_summary": "Les jardins du Luxembourg, situés au cœur du sixième arrondissement de Paris, offrent un véritable havre de paix aux citadins pressés. Créés au début du dix-septième siècle sur l'initiative de Marie de Médicis, ces jardins à la française s'étendent s...",
26
+ "content_length": 697,
27
+ "created_at": "2025-08-31T15:54:38.060638+00:00",
28
+ "updated_at": "2025-08-31T15:54:38.068349+00:00",
29
+ "file_path": "unknown_source",
30
+ "track_id": "insert_20250831_235438_22d326d7",
31
+ "metadata": {
32
+ "processing_start_time": 1756655678
33
+ }
34
+ }
35
+ }
visualize.py CHANGED
@@ -1,5 +1,4 @@
1
  import networkx as nx
2
- import rapidjson
3
  import warnings
4
  import os
5
 
@@ -13,98 +12,41 @@ file_path = "./cache/graph_chunk_entity_relation.graphml"
13
  assert os.path.exists(file_path), f"File {file_path} does not exist."
14
  G = nx.read_graphml(file_path)
15
 
16
- def create_graph(json_data):
17
- """
18
- Create interactive knowledge graph using pyvis
19
- """
20
-
21
- G = nx.Graph()
22
-
23
- # Add nodes with tooltips and error handling for missing keys
24
- for node in json_data['nodes']:
25
- # Get node type with fallback
26
- type = node.get("type", "Entity")
27
-
28
- # Get detailed type with fallback
29
- detailed_type = node.get("detailed_type", type)
30
-
31
- # Use node ID and type info for the tooltip
32
- G.add_node(node['id'], title=f"{type}: {detailed_type}")
33
-
34
- # Add edges with labels
35
- for edge in json_data['edges']:
36
- # Check if the required keys exist
37
- if 'from' in edge and 'to' in edge:
38
- label = edge.get('label', 'related')
39
- G.add_edge(edge['from'], edge['to'], title=label, label=label)
40
-
41
- # Create network visualization
42
- network = Network(
43
- width="100%",
44
- height="100vh",
45
- notebook=False,
46
- bgcolor="#f8fafc",
47
- font_color="#1e293b"
48
- )
49
-
50
- # Configure network display
51
- network.from_nx(G)
52
-
53
- # Customize node appearance
54
- for node in network.nodes:
55
- node['color'] = {'background': '#e0e7ff', 'border': '#6366f1', 'highlight': {'background': '#c7d2fe', 'border': '#4f46e5'}}
56
- node['font'] = {'size': 14, 'color': '#1e293b'}
57
- node['shape'] = 'dot'
58
- node['size'] = 20
59
-
60
- # Customize edge appearance
61
- for edge in network.edges:
62
- edge['width'] = 4
63
- edge['color'] = {'color': '#6366f1', 'highlight': '#4f46e5'}
64
- edge['font'] = {'size': 12, 'color': '#4b5563', 'face': 'Arial'}
65
-
66
- # Save and display the network
67
- filename_out = "knowledge_graph.html"
68
- network.show(filename_out)
69
- print(f"Knowledge graph saved to {filename_out}")
70
-
71
  # Convert the graph to node-link data format
72
- js_graph = nx.node_link_data(G)
73
- js_data = rapidjson.loads(rapidjson.dumps(js_graph))
74
- # print(js_data)
75
-
76
- create_graph(js_data)
77
-
78
- # # Create a Pyvis network
79
- # network = Network(width="100%",
80
- # height="100vh",
81
- # notebook=True,
82
- # bgcolor="#f8fafc",
83
- # font_color="#1e293b")
84
-
85
- # # Convert NetworkX graph to Pyvis network
86
- # network.from_nx(G)
87
-
88
- # # Add colors and title to nodes
89
- # for node in network.nodes:
90
- # if "description" in node:
91
- # node["title"] = node["description"]
92
-
93
- # node['color'] = {'background': '#e0e7ff', 'border': '#6366f1', 'highlight': {'background': '#c7d2fe', 'border': '#4f46e5'}}
94
- # node['font'] = {'size': 14, 'color': '#1e293b'}
95
- # node['shape'] = 'dot'
96
- # node['size'] = 20
97
-
98
- # # Add title to edges
99
- # for edge in network.edges:
100
- # if "description" in edge:
101
- # edge["title"] = edge["description"]
102
-
103
- # edge['width'] = 4
104
- # edge['color'] = {'color': '#6366f1', 'highlight': '#4f46e5'}
105
- # edge['font'] = {'size': 12, 'color': '#4b5563', 'face': 'Arial'}
106
-
107
- # # Save and display the network
108
- # filename_out = "knowledge_graph.html"
109
- # network.show(filename_out)
110
- # print(f"Knowledge graph saved to {filename_out}")
 
1
  import networkx as nx
 
2
  import warnings
3
  import os
4
 
 
12
  assert os.path.exists(file_path), f"File {file_path} does not exist."
13
  G = nx.read_graphml(file_path)
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  # Convert the graph to node-link data format
16
+ dict_graph = nx.node_link_data(G)
17
+ print("Number of nodes:", len(dict_graph['nodes']))
18
+ print("Number of edges:", len(dict_graph['links']))
19
+
20
+ # Create a Pyvis network
21
+ network = Network(width="100%",
22
+ height="100vh",
23
+ notebook=True,
24
+ bgcolor="#f8fafc",
25
+ font_color="#1e293b")
26
+
27
+ # Convert NetworkX graph to Pyvis network
28
+ network.from_nx(G)
29
+
30
+ # Add colors and title to nodes
31
+ for node in network.nodes:
32
+ if "description" in node:
33
+ node["title"] = node["description"]
34
+
35
+ node['color'] = {'background': '#e0e7ff', 'border': '#6366f1', 'highlight': {'background': '#c7d2fe', 'border': '#4f46e5'}}
36
+ node['font'] = {'size': 14, 'color': '#1e293b'}
37
+ node['shape'] = 'dot'
38
+ node['size'] = 20
39
+
40
+ # Add title to edges
41
+ for edge in network.edges:
42
+ if "description" in edge:
43
+ edge["title"] = edge["description"]
44
+
45
+ edge['width'] = 4
46
+ edge['color'] = {'color': '#6366f1', 'highlight': '#4f46e5'}
47
+ edge['font'] = {'size': 12, 'color': '#4b5563', 'face': 'Arial'}
48
+
49
+ # Save and display the network
50
+ filename_out = "knowledge_graph.html"
51
+ network.show(filename_out)
52
+ print(f"Knowledge graph saved to {filename_out}")