Spaces:
Running
on
Zero
Running
on
Zero
Refactored the code
Browse files- app.py +5 -7
- data/sample3_en.txt +7 -0
- data/sys_prompt.txt +25 -0
- llm_graph.py +10 -32
app.py
CHANGED
|
@@ -12,7 +12,6 @@ import rapidjson
|
|
| 12 |
import gradio as gr
|
| 13 |
import networkx as nx
|
| 14 |
|
| 15 |
-
# from dotenv import load_dotenv
|
| 16 |
from llm_graph import LLMGraph, MODEL_LIST
|
| 17 |
|
| 18 |
from pyvis.network import Network
|
|
@@ -48,6 +47,10 @@ text_en_file2 = "./data/sample2_en.txt"
|
|
| 48 |
with open(text_en_file2, 'r', encoding='utf-8') as file:
|
| 49 |
text2_en = file.read()
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
text_fr_file = "./data/sample_fr.txt"
|
| 52 |
with open(text_fr_file, 'r', encoding='utf-8') as file:
|
| 53 |
text_fr = file.read()
|
|
@@ -339,6 +342,7 @@ EXAMPLES = [
|
|
| 339 |
[handle_text(text_fr)],
|
| 340 |
[handle_text(text2_en)],
|
| 341 |
[handle_text(text_es)],
|
|
|
|
| 342 |
]
|
| 343 |
|
| 344 |
def generate_first_example():
|
|
@@ -416,12 +420,6 @@ def create_ui():
|
|
| 416 |
with gr.Row():
|
| 417 |
# Left panel - Input controls
|
| 418 |
with gr.Column(scale=1):
|
| 419 |
-
# input_model = gr.Dropdown(
|
| 420 |
-
# MODEL_LIST,
|
| 421 |
-
# label="🤖 Select Model",
|
| 422 |
-
# info="Choose a model to process your text",
|
| 423 |
-
# value=MODEL_LIST[0] if MODEL_LIST else None,
|
| 424 |
-
# )
|
| 425 |
input_model = gr.Radio(
|
| 426 |
MODEL_LIST,
|
| 427 |
label="🤖 Select Model",
|
|
|
|
| 12 |
import gradio as gr
|
| 13 |
import networkx as nx
|
| 14 |
|
|
|
|
| 15 |
from llm_graph import LLMGraph, MODEL_LIST
|
| 16 |
|
| 17 |
from pyvis.network import Network
|
|
|
|
| 47 |
with open(text_en_file2, 'r', encoding='utf-8') as file:
|
| 48 |
text2_en = file.read()
|
| 49 |
|
| 50 |
+
text_en_file3 = "./data/sample3_en.txt"
|
| 51 |
+
with open(text_en_file3, 'r', encoding='utf-8') as file:
|
| 52 |
+
text3_en = file.read()
|
| 53 |
+
|
| 54 |
text_fr_file = "./data/sample_fr.txt"
|
| 55 |
with open(text_fr_file, 'r', encoding='utf-8') as file:
|
| 56 |
text_fr = file.read()
|
|
|
|
| 342 |
[handle_text(text_fr)],
|
| 343 |
[handle_text(text2_en)],
|
| 344 |
[handle_text(text_es)],
|
| 345 |
+
[handle_text(text3_en)]
|
| 346 |
]
|
| 347 |
|
| 348 |
def generate_first_example():
|
|
|
|
| 420 |
with gr.Row():
|
| 421 |
# Left panel - Input controls
|
| 422 |
with gr.Column(scale=1):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
input_model = gr.Radio(
|
| 424 |
MODEL_LIST,
|
| 425 |
label="🤖 Select Model",
|
data/sample3_en.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
The small coffee shop on Maple Street had become an unlikely sanctuary for the neighborhood's most eccentric characters.
|
| 2 |
+
Every Tuesday at precisely 2:47 PM, Mrs. Chen would arrive with her collection of vintage postcards, spreading them across
|
| 3 |
+
table six while muttering corrections to the historical inaccuracies she'd discovered in travel documentaries. The barista,
|
| 4 |
+
a philosophy student named Marcus, had learned to prepare her lavender latte without being asked, and had grown oddly fond
|
| 5 |
+
of her lengthy monologues about the real story behind the Eiffel Tower's construction. Meanwhile, the jazz musician in the
|
| 6 |
+
corner booth scribbled chord progressions on napkins, occasionally humming melodies that seemed to respond to the rhythmic
|
| 7 |
+
hiss of the espresso machine. By closing time, the air hung thick with caffeine, dreams, and the comfortable weight of shared solitude.
|
data/sys_prompt.txt
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
A chat between a curious user and an artificial intelligence Assistant. The Assistant is an expert at identifying entities and relationships in text. The Assistant responds in JSON output only.
|
| 2 |
+
The User provides text in the format:
|
| 3 |
+
|
| 4 |
+
-------Text begin-------
|
| 5 |
+
<User provided text>
|
| 6 |
+
-------Text end-------
|
| 7 |
+
|
| 8 |
+
The Assistant follows the following steps before replying to the User:
|
| 9 |
+
|
| 10 |
+
1. **identify the most important entities** The Assistant identifies the most important entities in the text. These entities are listed in the JSON output under the key "nodes", they follow the structure of a list of dictionaries where each dict is:
|
| 11 |
+
|
| 12 |
+
"nodes":[{"id": <entity N>, "type": <type>, "detailed_type": <detailed type>}, ...]
|
| 13 |
+
|
| 14 |
+
where "type": <type> is a broad categorization of the entity. "detailed type": <detailed_type> is a very descriptive categorization of the entity.
|
| 15 |
+
|
| 16 |
+
2. **determine relationships** The Assistant uses the text between -------Text begin------- and -------Text end------- to determine the relationships between the entities identified in the "nodes" list defined above. These relationships are called "edges" and they follow the structure of:
|
| 17 |
+
|
| 18 |
+
"edges":[{"from": <entity 1>, "to": <entity 2>, "label": <relationship>}, ...]
|
| 19 |
+
|
| 20 |
+
The <entity N> must correspond to the "id" of an entity in the "nodes" list.
|
| 21 |
+
|
| 22 |
+
The Assistant never repeats the same node twice. The Assistant never repeats the same edge twice.
|
| 23 |
+
The Assistant responds to the User in JSON only, according to the following JSON schema:
|
| 24 |
+
|
| 25 |
+
{"type":"object","properties":{"nodes":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string"},"type":{"type":"string"},"detailed_type":{"type":"string"}},"required":["id","type","detailed_type"],"additionalProperties":false}},"edges":{"type":"array","items":{"type":"object","properties":{"from":{"type":"string"},"to":{"type":"string"},"label":{"type":"string"}},"required":["from","to","label"],"additionalProperties":false}}},"required":["nodes","edges"],"additionalProperties":false}
|
llm_graph.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
import time
|
| 3 |
-
|
| 4 |
import numpy as np
|
| 5 |
import networkx as nx
|
| 6 |
|
|
@@ -35,6 +35,11 @@ MODEL_LIST = [
|
|
| 35 |
"OpenAI/GPT-4.1-mini",
|
| 36 |
]
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
class LLMGraph:
|
| 39 |
"""
|
| 40 |
A class to interact with LLMs for knowledge graph extraction.
|
|
@@ -106,35 +111,8 @@ class LLMGraph:
|
|
| 106 |
Construct the message list for the chat model.
|
| 107 |
"""
|
| 108 |
|
| 109 |
-
context = dedent(
|
| 110 |
-
A chat between a curious user and an artificial intelligence Assistant. The Assistant is an expert at identifying entities and relationships in text. The Assistant responds in JSON output only.
|
| 111 |
-
|
| 112 |
-
The User provides text in the format:
|
| 113 |
-
|
| 114 |
-
-------Text begin-------
|
| 115 |
-
<User provided text>
|
| 116 |
-
-------Text end-------
|
| 117 |
-
|
| 118 |
-
The Assistant follows the following steps before replying to the User:
|
| 119 |
-
|
| 120 |
-
1. **identify the most important entities** The Assistant identifies the most important entities in the text. These entities are listed in the JSON output under the key "nodes", they follow the structure of a list of dictionaries where each dict is:
|
| 121 |
-
|
| 122 |
-
"nodes":[{"id": <entity N>, "type": <type>, "detailed_type": <detailed type>}, ...]
|
| 123 |
-
|
| 124 |
-
where "type": <type> is a broad categorization of the entity. "detailed type": <detailed_type> is a very descriptive categorization of the entity.
|
| 125 |
|
| 126 |
-
2. **determine relationships** The Assistant uses the text between -------Text begin------- and -------Text end------- to determine the relationships between the entities identified in the "nodes" list defined above. These relationships are called "edges" and they follow the structure of:
|
| 127 |
-
|
| 128 |
-
"edges":[{"from": <entity 1>, "to": <entity 2>, "label": <relationship>}, ...]
|
| 129 |
-
|
| 130 |
-
The <entity N> must correspond to the "id" of an entity in the "nodes" list.
|
| 131 |
-
|
| 132 |
-
The Assistant never repeats the same node twice. The Assistant never repeats the same edge twice.
|
| 133 |
-
The Assistant responds to the User in JSON only, according to the following JSON schema:
|
| 134 |
-
|
| 135 |
-
{"type":"object","properties":{"nodes":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string"},"type":{"type":"string"},"detailed_type":{"type":"string"}},"required":["id","type","detailed_type"],"additionalProperties":false}},"edges":{"type":"array","items":{"type":"object","properties":{"from":{"type":"string"},"to":{"type":"string"},"label":{"type":"string"}},"required":["from","to","label"],"additionalProperties":false}}},"required":["nodes","edges"],"additionalProperties":false}
|
| 136 |
-
""")
|
| 137 |
-
|
| 138 |
user_message = dedent(f"""\n
|
| 139 |
-------Text begin-------
|
| 140 |
{text}
|
|
@@ -168,11 +146,11 @@ class LLMGraph:
|
|
| 168 |
else:
|
| 169 |
# Use LightRAG with Azure OpenAI
|
| 170 |
self.rag.insert(text) # Insert the text into the RAG storage
|
| 171 |
-
|
| 172 |
# Wait for GRAPHML_FILE to be created
|
| 173 |
while not os.path.exists(GRAPHML_FILE):
|
| 174 |
-
time.sleep(0.1) # Sleep for
|
| 175 |
-
|
| 176 |
# Extract dict format of the knowledge graph
|
| 177 |
G = nx.read_graphml(GRAPHML_FILE)
|
| 178 |
|
|
|
|
| 1 |
import os
|
| 2 |
import time
|
| 3 |
+
|
| 4 |
import numpy as np
|
| 5 |
import networkx as nx
|
| 6 |
|
|
|
|
| 35 |
"OpenAI/GPT-4.1-mini",
|
| 36 |
]
|
| 37 |
|
| 38 |
+
# Read the system prompt
|
| 39 |
+
sys_prompt_file = "./data/sys_prompt.txt"
|
| 40 |
+
with open(sys_prompt_file, 'r', encoding='utf-8') as file:
|
| 41 |
+
sys_prompt = file.read()
|
| 42 |
+
|
| 43 |
class LLMGraph:
|
| 44 |
"""
|
| 45 |
A class to interact with LLMs for knowledge graph extraction.
|
|
|
|
| 111 |
Construct the message list for the chat model.
|
| 112 |
"""
|
| 113 |
|
| 114 |
+
context = dedent(sys_prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
user_message = dedent(f"""\n
|
| 117 |
-------Text begin-------
|
| 118 |
{text}
|
|
|
|
| 146 |
else:
|
| 147 |
# Use LightRAG with Azure OpenAI
|
| 148 |
self.rag.insert(text) # Insert the text into the RAG storage
|
| 149 |
+
|
| 150 |
# Wait for GRAPHML_FILE to be created
|
| 151 |
while not os.path.exists(GRAPHML_FILE):
|
| 152 |
+
time.sleep(0.1) # Sleep for 0.1 seconds before checking again
|
| 153 |
+
|
| 154 |
# Extract dict format of the knowledge graph
|
| 155 |
G = nx.read_graphml(GRAPHML_FILE)
|
| 156 |
|