Spaces:
Build error
Build error
enhance performance
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from typing import
|
| 2 |
from queue import Empty, Queue
|
| 3 |
from threading import Thread
|
| 4 |
from bot.web_scrapping.crawler_and_indexer import content_crawler_and_index
|
|
@@ -16,12 +16,10 @@ human_message_prompt_template = HumanMessagePromptTemplate.from_template("{text}
|
|
| 16 |
|
| 17 |
|
| 18 |
def bot_learning(urls, file_formats, llm, prompt, chat_mode=False):
|
| 19 |
-
index = content_crawler_and_index(url=str(urls), llm=llm, prompt=prompt, file_format=file_formats)
|
| 20 |
if chat_mode:
|
| 21 |
-
return
|
| 22 |
else:
|
| 23 |
-
|
| 24 |
-
return fb
|
| 25 |
|
| 26 |
|
| 27 |
def chat_start(
|
|
|
|
| 1 |
+
from typing import Optional, Tuple
|
| 2 |
from queue import Empty, Queue
|
| 3 |
from threading import Thread
|
| 4 |
from bot.web_scrapping.crawler_and_indexer import content_crawler_and_index
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
def bot_learning(urls, file_formats, llm, prompt, chat_mode=False):
|
|
|
|
| 19 |
if chat_mode:
|
| 20 |
+
return content_crawler_and_index(url=str(urls), llm=llm, prompt=prompt, file_format=file_formats)
|
| 21 |
else:
|
| 22 |
+
return 'Training Completed'
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
def chat_start(
|
bot/web_scrapping/searchable_index.py
CHANGED
|
@@ -13,6 +13,7 @@ import pandas as pd
|
|
| 13 |
import threading
|
| 14 |
import glob
|
| 15 |
import os
|
|
|
|
| 16 |
import queue
|
| 17 |
|
| 18 |
|
|
@@ -72,41 +73,52 @@ class SearchableIndex:
|
|
| 72 |
if os.path.exists(index_store):
|
| 73 |
local_db = FAISS.load_local(index_store, embeddings)
|
| 74 |
local_db.merge_from(faiss_db)
|
| 75 |
-
|
| 76 |
-
logger.info("Merge index completed")
|
| 77 |
else:
|
| 78 |
-
faiss_db
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
| 81 |
return local_db
|
| 82 |
|
| 83 |
@classmethod
|
| 84 |
-
def
|
| 85 |
if index_files:
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
@classmethod
|
| 92 |
def embed_index(cls, url, path, llm, prompt, target_col=None, sheet_name=None):
|
| 93 |
embeddings = OpenAIEmbeddings()
|
| 94 |
|
| 95 |
-
if
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
index_files = glob.glob(os.path.join(path, '*_index'))
|
| 103 |
-
|
| 104 |
-
result_queue = queue.Queue() # Create a queue to store the result
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
thread.start()
|
| 109 |
-
local_db = result_queue.get() # Retrieve the result from the queue
|
| 110 |
return Query(prompt, llm, local_db)
|
| 111 |
|
| 112 |
|
|
|
|
| 13 |
import threading
|
| 14 |
import glob
|
| 15 |
import os
|
| 16 |
+
import asyncio
|
| 17 |
import queue
|
| 18 |
|
| 19 |
|
|
|
|
| 73 |
if os.path.exists(index_store):
|
| 74 |
local_db = FAISS.load_local(index_store, embeddings)
|
| 75 |
local_db.merge_from(faiss_db)
|
| 76 |
+
operation_info = "Merge"
|
|
|
|
| 77 |
else:
|
| 78 |
+
local_db = faiss_db # Use the provided faiss_db directly for a new store
|
| 79 |
+
operation_info = "New store creation"
|
| 80 |
+
|
| 81 |
+
local_db.save_local(index_store)
|
| 82 |
+
logger.info(f"{operation_info} index completed")
|
| 83 |
return local_db
|
| 84 |
|
| 85 |
@classmethod
|
| 86 |
+
def load_index(cls, index_files, embeddings, logger):
|
| 87 |
if index_files:
|
| 88 |
+
return FAISS.load_local(index_files[0], embeddings)
|
| 89 |
+
logger.warning("Index store does not exist")
|
| 90 |
+
return None
|
| 91 |
+
|
| 92 |
+
@classmethod
|
| 93 |
+
def check_and_load_index(cls, index_files, embeddings, logger, result_queue):
|
| 94 |
+
local_db = cls.load_index(index_files, embeddings, logger)
|
| 95 |
+
result_queue.put(local_db)
|
| 96 |
+
|
| 97 |
+
@classmethod
|
| 98 |
+
def load_index_asynchronously(cls, index_files, embeddings, logger):
|
| 99 |
+
result_queue = queue.Queue()
|
| 100 |
+
thread = threading.Thread(
|
| 101 |
+
target=cls.check_and_load_index,
|
| 102 |
+
args=(index_files, embeddings, logger, result_queue)
|
| 103 |
+
)
|
| 104 |
+
thread.start()
|
| 105 |
+
thread.join() # Wait for the thread to finish
|
| 106 |
+
return result_queue.get()
|
| 107 |
|
| 108 |
@classmethod
|
| 109 |
def embed_index(cls, url, path, llm, prompt, target_col=None, sheet_name=None):
|
| 110 |
embeddings = OpenAIEmbeddings()
|
| 111 |
|
| 112 |
+
if path:
|
| 113 |
+
if url != 'NO_URL':
|
| 114 |
+
doc_list = cls.get_splits(path, target_col, sheet_name)
|
| 115 |
+
faiss_db = FAISS.from_texts(doc_list, embeddings)
|
| 116 |
+
index_store = os.path.splitext(path)[0] + "_index"
|
| 117 |
+
local_db = cls.merge_or_create_index(index_store, faiss_db, embeddings, logger)
|
| 118 |
+
return Query(prompt, llm, local_db)
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
+
index_files = glob.glob(os.path.join(path, '*_index'))
|
| 121 |
+
local_db = cls.load_index_asynchronously(index_files, embeddings, logger)
|
|
|
|
|
|
|
| 122 |
return Query(prompt, llm, local_db)
|
| 123 |
|
| 124 |
|
learning_documents/combined_content_index/index.faiss
CHANGED
|
Binary files a/learning_documents/combined_content_index/index.faiss and b/learning_documents/combined_content_index/index.faiss differ
|
|
|
learning_documents/combined_content_index/index.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0d19bc7afb8227f67225b52afd8c746bc67aceca43fb5e5c84a19e94cda0e9d
|
| 3 |
+
size 3959
|
requirements.txt
CHANGED
|
@@ -154,6 +154,7 @@ typer==0.9.0
|
|
| 154 |
typing-inspect==0.9.0
|
| 155 |
typing_extensions==4.8.0
|
| 156 |
tzdata==2023.3
|
|
|
|
| 157 |
unstructured==0.10.29
|
| 158 |
urllib3==1.26.18
|
| 159 |
uvicorn==0.24.0.post1
|
|
|
|
| 154 |
typing-inspect==0.9.0
|
| 155 |
typing_extensions==4.8.0
|
| 156 |
tzdata==2023.3
|
| 157 |
+
twine
|
| 158 |
unstructured==0.10.29
|
| 159 |
urllib3==1.26.18
|
| 160 |
uvicorn==0.24.0.post1
|