Spaces:
Running
on
Zero
Running
on
Zero
Fixing the app
Browse files
app.py
CHANGED
|
@@ -4,6 +4,8 @@ import spacy
|
|
| 4 |
import shutil
|
| 5 |
import pickle
|
| 6 |
import random
|
|
|
|
|
|
|
| 7 |
import logging
|
| 8 |
import asyncio
|
| 9 |
import warnings
|
|
@@ -24,6 +26,11 @@ warnings.filterwarnings("ignore", category=UserWarning)
|
|
| 24 |
# Constants
|
| 25 |
TITLE = "🌐 Text2Graph: Extract Knowledge Graphs from Natural Language"
|
| 26 |
SUBTITLE = "✨ Extract and visualize knowledge graphs from texts in any language!"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
# Basic CSS for styling
|
| 29 |
CUSTOM_CSS = """
|
|
@@ -99,7 +106,7 @@ def extract_kg(text="", model_name=MODEL_LIST[0], model=None):
|
|
| 99 |
raise gr.Error("⚠️ Both text and model must be provided!")
|
| 100 |
if not model:
|
| 101 |
raise gr.Error("⚠️ Model must be provided!")
|
| 102 |
-
|
| 103 |
try:
|
| 104 |
start_time = time.time()
|
| 105 |
result = model.extract(text, model_name)
|
|
@@ -285,15 +292,6 @@ def process_and_visualize(text, model_name, progress=gr.Progress()):
|
|
| 285 |
# Check if we're processing the first example for caching
|
| 286 |
is_first_example = text == EXAMPLES[0][0]
|
| 287 |
|
| 288 |
-
# Clear the working directory if it exists
|
| 289 |
-
if os.path.exists(WORKING_DIR):
|
| 290 |
-
shutil.rmtree(WORKING_DIR)
|
| 291 |
-
os.makedirs(WORKING_DIR, exist_ok=True)
|
| 292 |
-
|
| 293 |
-
# Initialize the LLMGraph model
|
| 294 |
-
model = LLMGraph()
|
| 295 |
-
asyncio.run(model.initialize_rag())
|
| 296 |
-
|
| 297 |
# Try to load from cache if it's the first example
|
| 298 |
if is_first_example and model_name == MODEL_LIST[0] and os.path.exists(EXAMPLE_CACHE_FILE):
|
| 299 |
try:
|
|
@@ -306,6 +304,28 @@ def process_and_visualize(text, model_name, progress=gr.Progress()):
|
|
| 306 |
except Exception as e:
|
| 307 |
logging.error(f"Cache loading error: {str(e)}")
|
| 308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
# Continue with normal processing if cache fails
|
| 310 |
progress(0, desc="Starting extraction...")
|
| 311 |
json_data = extract_kg(text, model_name, model)
|
|
|
|
| 4 |
import shutil
|
| 5 |
import pickle
|
| 6 |
import random
|
| 7 |
+
import hashlib
|
| 8 |
+
|
| 9 |
import logging
|
| 10 |
import asyncio
|
| 11 |
import warnings
|
|
|
|
| 26 |
# Constants
|
| 27 |
TITLE = "🌐 Text2Graph: Extract Knowledge Graphs from Natural Language"
|
| 28 |
SUBTITLE = "✨ Extract and visualize knowledge graphs from texts in any language!"
|
| 29 |
+
MIN_CHARS = 20
|
| 30 |
+
MAX_CHARS = 3500
|
| 31 |
+
|
| 32 |
+
# Keep track of all processed texts
|
| 33 |
+
doc_ids = []
|
| 34 |
|
| 35 |
# Basic CSS for styling
|
| 36 |
CUSTOM_CSS = """
|
|
|
|
| 106 |
raise gr.Error("⚠️ Both text and model must be provided!")
|
| 107 |
if not model:
|
| 108 |
raise gr.Error("⚠️ Model must be provided!")
|
| 109 |
+
|
| 110 |
try:
|
| 111 |
start_time = time.time()
|
| 112 |
result = model.extract(text, model_name)
|
|
|
|
| 292 |
# Check if we're processing the first example for caching
|
| 293 |
is_first_example = text == EXAMPLES[0][0]
|
| 294 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
# Try to load from cache if it's the first example
|
| 296 |
if is_first_example and model_name == MODEL_LIST[0] and os.path.exists(EXAMPLE_CACHE_FILE):
|
| 297 |
try:
|
|
|
|
| 304 |
except Exception as e:
|
| 305 |
logging.error(f"Cache loading error: {str(e)}")
|
| 306 |
|
| 307 |
+
# Catch too long or too short text
|
| 308 |
+
if len(text) < MIN_CHARS:
|
| 309 |
+
raise gr.Error(f"⚠️ Text is too short! Please provide at least {MIN_CHARS} characters.")
|
| 310 |
+
if len(text) > MAX_CHARS:
|
| 311 |
+
raise gr.Error(f"⚠️ Text is too long! Please provide no more than {MAX_CHARS} characters.")
|
| 312 |
+
|
| 313 |
+
if model_name == MODEL_LIST[1]:
|
| 314 |
+
# Compute the unique hash for the document
|
| 315 |
+
doc_id = hashlib.md5(text.strip().encode()).hexdigest()
|
| 316 |
+
|
| 317 |
+
if doc_id not in doc_ids:
|
| 318 |
+
doc_ids.append(doc_id)
|
| 319 |
+
|
| 320 |
+
# Clear the working directory if it exists
|
| 321 |
+
if os.path.exists(WORKING_DIR):
|
| 322 |
+
shutil.rmtree(WORKING_DIR)
|
| 323 |
+
os.makedirs(WORKING_DIR, exist_ok=True)
|
| 324 |
+
|
| 325 |
+
# Initialize the LLMGraph model
|
| 326 |
+
model = LLMGraph()
|
| 327 |
+
asyncio.run(model.initialize_rag())
|
| 328 |
+
|
| 329 |
# Continue with normal processing if cache fails
|
| 330 |
progress(0, desc="Starting extraction...")
|
| 331 |
json_data = extract_kg(text, model_name, model)
|