Spaces:
Running
on
Zero
Running
on
Zero
Fixing errors of repeated texts in OpenAI model
Browse files- .gitignore +1 -0
- app.py +10 -8
- llm_graph.py +13 -12
- sample/kv_store_doc_status.json +5 -21
.gitignore
CHANGED
|
@@ -170,3 +170,4 @@ __marimo__/
|
|
| 170 |
*.pdf
|
| 171 |
*.csv
|
| 172 |
cache/
|
|
|
|
|
|
| 170 |
*.pdf
|
| 171 |
*.csv
|
| 172 |
cache/
|
| 173 |
+
sample/
|
app.py
CHANGED
|
@@ -202,14 +202,15 @@ def create_graph(json_data, model_name=MODEL_LIST[0]):
|
|
| 202 |
|
| 203 |
# Configure network display
|
| 204 |
network.from_nx(G)
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
|
|
|
| 213 |
|
| 214 |
# Customize node appearance
|
| 215 |
for node in network.nodes:
|
|
@@ -253,6 +254,7 @@ def process_and_visualize(text, model_name, progress=gr.Progress()):
|
|
| 253 |
is_first_example = text == EXAMPLES[0][0]
|
| 254 |
|
| 255 |
# Ensure RAG is initialized
|
|
|
|
| 256 |
asyncio.run(model.initialize_rag())
|
| 257 |
|
| 258 |
# Try to load from cache if it's the first example
|
|
|
|
| 202 |
|
| 203 |
# Configure network display
|
| 204 |
network.from_nx(G)
|
| 205 |
+
if model_name == MODEL_LIST[0]:
|
| 206 |
+
network.barnes_hut(
|
| 207 |
+
gravity=-3000,
|
| 208 |
+
central_gravity=0.3,
|
| 209 |
+
spring_length=50,
|
| 210 |
+
spring_strength=0.001,
|
| 211 |
+
damping=0.09,
|
| 212 |
+
overlap=0,
|
| 213 |
+
)
|
| 214 |
|
| 215 |
# Customize node appearance
|
| 216 |
for node in network.nodes:
|
|
|
|
| 254 |
is_first_example = text == EXAMPLES[0][0]
|
| 255 |
|
| 256 |
# Ensure RAG is initialized
|
| 257 |
+
# TODO: Clear all the previous inserted texts
|
| 258 |
asyncio.run(model.initialize_rag())
|
| 259 |
|
| 260 |
# Try to load from cache if it's the first example
|
llm_graph.py
CHANGED
|
@@ -45,17 +45,18 @@ class LLMGraph:
|
|
| 45 |
Initialize the LightRAG instance with the specified embedding dimension.
|
| 46 |
"""
|
| 47 |
|
| 48 |
-
if self.rag is None:
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
)
|
| 58 |
-
|
|
|
|
| 59 |
await self.rag.initialize_storages()
|
| 60 |
await initialize_pipeline_status()
|
| 61 |
|
|
@@ -171,7 +172,7 @@ class LLMGraph:
|
|
| 171 |
os.makedirs(WORKING_DIR, exist_ok=True)
|
| 172 |
|
| 173 |
# Use LightRAG with Azure OpenAI
|
| 174 |
-
# TODO: Clear all the previous inserted texts
|
| 175 |
self.rag.insert(text) # Insert the text into the RAG storage
|
| 176 |
|
| 177 |
# Wait for GRAPHML_FILE to be created
|
|
|
|
| 45 |
Initialize the LightRAG instance with the specified embedding dimension.
|
| 46 |
"""
|
| 47 |
|
| 48 |
+
# if self.rag is None:
|
| 49 |
+
# TODO: Check how to clear all the previous inserted texts
|
| 50 |
+
self.rag = LightRAG(
|
| 51 |
+
working_dir=WORKING_DIR,
|
| 52 |
+
llm_model_func=self._llm_model_func,
|
| 53 |
+
embedding_func=EmbeddingFunc(
|
| 54 |
+
embedding_dim=embedding_dimension,
|
| 55 |
+
max_token_size=8192,
|
| 56 |
+
func=self._embedding_func,
|
| 57 |
+
),
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
await self.rag.initialize_storages()
|
| 61 |
await initialize_pipeline_status()
|
| 62 |
|
|
|
|
| 172 |
os.makedirs(WORKING_DIR, exist_ok=True)
|
| 173 |
|
| 174 |
# Use LightRAG with Azure OpenAI
|
| 175 |
+
# TODO: Clear all the previous inserted texts
|
| 176 |
self.rag.insert(text) # Insert the text into the RAG storage
|
| 177 |
|
| 178 |
# Wait for GRAPHML_FILE to be created
|
sample/kv_store_doc_status.json
CHANGED
|
@@ -7,29 +7,13 @@
|
|
| 7 |
],
|
| 8 |
"content_summary": "The family of Azerbaijan President Ilham Aliyev leads a charmed, glamorous life, thanks in part to financial interests in almost every sector of the economy. His wife, Mehriban, comes from the privileged and powerful Pashayev family that owns banks, ...",
|
| 9 |
"content_length": 1074,
|
| 10 |
-
"created_at": "2025-
|
| 11 |
-
"updated_at": "2025-
|
| 12 |
"file_path": "unknown_source",
|
| 13 |
-
"track_id": "
|
| 14 |
"metadata": {
|
| 15 |
-
"processing_start_time":
|
| 16 |
-
"processing_end_time":
|
| 17 |
-
}
|
| 18 |
-
},
|
| 19 |
-
"doc-eea199eb7feea197ebb82e9333a2d2f2": {
|
| 20 |
-
"status": "processing",
|
| 21 |
-
"chunks_count": 1,
|
| 22 |
-
"chunks_list": [
|
| 23 |
-
"chunk-eea199eb7feea197ebb82e9333a2d2f2"
|
| 24 |
-
],
|
| 25 |
-
"content_summary": "Les jardins du Luxembourg, situés au cœur du sixième arrondissement de Paris, offrent un véritable havre de paix aux citadins pressés. Créés au début du dix-septième siècle sur l'initiative de Marie de Médicis, ces jardins à la française s'étendent s...",
|
| 26 |
-
"content_length": 697,
|
| 27 |
-
"created_at": "2025-08-31T15:54:38.060638+00:00",
|
| 28 |
-
"updated_at": "2025-08-31T15:54:38.068349+00:00",
|
| 29 |
-
"file_path": "unknown_source",
|
| 30 |
-
"track_id": "insert_20250831_235438_22d326d7",
|
| 31 |
-
"metadata": {
|
| 32 |
-
"processing_start_time": 1756655678
|
| 33 |
}
|
| 34 |
}
|
| 35 |
}
|
|
|
|
| 7 |
],
|
| 8 |
"content_summary": "The family of Azerbaijan President Ilham Aliyev leads a charmed, glamorous life, thanks in part to financial interests in almost every sector of the economy. His wife, Mehriban, comes from the privileged and powerful Pashayev family that owns banks, ...",
|
| 9 |
"content_length": 1074,
|
| 10 |
+
"created_at": "2025-09-01T14:53:10.896398+00:00",
|
| 11 |
+
"updated_at": "2025-09-01T14:54:44.898862+00:00",
|
| 12 |
"file_path": "unknown_source",
|
| 13 |
+
"track_id": "insert_20250901_225310_a186b861",
|
| 14 |
"metadata": {
|
| 15 |
+
"processing_start_time": 1756738390,
|
| 16 |
+
"processing_end_time": 1756738484
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
}
|
| 18 |
}
|
| 19 |
}
|