vietexob commited on
Commit
4931ab5
·
1 Parent(s): a605fa2

Fixing the app

Browse files
Files changed (1) hide show
  1. app.py +30 -10
app.py CHANGED
@@ -4,6 +4,8 @@ import spacy
4
  import shutil
5
  import pickle
6
  import random
 
 
7
  import logging
8
  import asyncio
9
  import warnings
@@ -24,6 +26,11 @@ warnings.filterwarnings("ignore", category=UserWarning)
24
  # Constants
25
  TITLE = "🌐 Text2Graph: Extract Knowledge Graphs from Natural Language"
26
  SUBTITLE = "✨ Extract and visualize knowledge graphs from texts in any language!"
 
 
 
 
 
27
 
28
  # Basic CSS for styling
29
  CUSTOM_CSS = """
@@ -99,7 +106,7 @@ def extract_kg(text="", model_name=MODEL_LIST[0], model=None):
99
  raise gr.Error("⚠️ Both text and model must be provided!")
100
  if not model:
101
  raise gr.Error("⚠️ Model must be provided!")
102
-
103
  try:
104
  start_time = time.time()
105
  result = model.extract(text, model_name)
@@ -285,15 +292,6 @@ def process_and_visualize(text, model_name, progress=gr.Progress()):
285
  # Check if we're processing the first example for caching
286
  is_first_example = text == EXAMPLES[0][0]
287
 
288
- # Clear the working directory if it exists
289
- if os.path.exists(WORKING_DIR):
290
- shutil.rmtree(WORKING_DIR)
291
- os.makedirs(WORKING_DIR, exist_ok=True)
292
-
293
- # Initialize the LLMGraph model
294
- model = LLMGraph()
295
- asyncio.run(model.initialize_rag())
296
-
297
  # Try to load from cache if it's the first example
298
  if is_first_example and model_name == MODEL_LIST[0] and os.path.exists(EXAMPLE_CACHE_FILE):
299
  try:
@@ -306,6 +304,28 @@ def process_and_visualize(text, model_name, progress=gr.Progress()):
306
  except Exception as e:
307
  logging.error(f"Cache loading error: {str(e)}")
308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  # Continue with normal processing if cache fails
310
  progress(0, desc="Starting extraction...")
311
  json_data = extract_kg(text, model_name, model)
 
4
  import shutil
5
  import pickle
6
  import random
7
+ import hashlib
8
+
9
  import logging
10
  import asyncio
11
  import warnings
 
26
  # Constants
27
  TITLE = "🌐 Text2Graph: Extract Knowledge Graphs from Natural Language"
28
  SUBTITLE = "✨ Extract and visualize knowledge graphs from texts in any language!"
29
+ MIN_CHARS = 20
30
+ MAX_CHARS = 3500
31
+
32
+ # Keep track of all processed texts
33
+ doc_ids = []
34
 
35
  # Basic CSS for styling
36
  CUSTOM_CSS = """
 
106
  raise gr.Error("⚠️ Both text and model must be provided!")
107
  if not model:
108
  raise gr.Error("⚠️ Model must be provided!")
109
+
110
  try:
111
  start_time = time.time()
112
  result = model.extract(text, model_name)
 
292
  # Check if we're processing the first example for caching
293
  is_first_example = text == EXAMPLES[0][0]
294
 
 
 
 
 
 
 
 
 
 
295
  # Try to load from cache if it's the first example
296
  if is_first_example and model_name == MODEL_LIST[0] and os.path.exists(EXAMPLE_CACHE_FILE):
297
  try:
 
304
  except Exception as e:
305
  logging.error(f"Cache loading error: {str(e)}")
306
 
307
+ # Catch too long or too short text
308
+ if len(text) < MIN_CHARS:
309
+ raise gr.Error(f"⚠️ Text is too short! Please provide at least {MIN_CHARS} characters.")
310
+ if len(text) > MAX_CHARS:
311
+ raise gr.Error(f"⚠️ Text is too long! Please provide no more than {MAX_CHARS} characters.")
312
+
313
+ if model_name == MODEL_LIST[1]:
314
+ # Compute the unique hash for the document
315
+ doc_id = hashlib.md5(text.strip().encode()).hexdigest()
316
+
317
+ if doc_id not in doc_ids:
318
+ doc_ids.append(doc_id)
319
+
320
+ # Clear the working directory if it exists
321
+ if os.path.exists(WORKING_DIR):
322
+ shutil.rmtree(WORKING_DIR)
323
+ os.makedirs(WORKING_DIR, exist_ok=True)
324
+
325
+ # Initialize the LLMGraph model
326
+ model = LLMGraph()
327
+ asyncio.run(model.initialize_rag())
328
+
329
  # Continue with normal processing if cache fails
330
  progress(0, desc="Starting extraction...")
331
  json_data = extract_kg(text, model_name, model)