Spaces:
Running
Running
Enhance knowledge base management: Add functions to retrieve and save knowledge base metadata, improve error handling, and update constants for better clarity and functionality.
Browse files- app.py +75 -4
- config/constants.py +31 -20
- src/knowledge_base/dataset.py +101 -0
app.py
CHANGED
|
@@ -1160,7 +1160,13 @@ with gr.Blocks(css="""
|
|
| 1160 |
gr.Markdown("#### Knowledge Base Information")
|
| 1161 |
|
| 1162 |
# Функция для получения информации о базе знаний
|
| 1163 |
-
def get_kb_info():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1164 |
try:
|
| 1165 |
vector_store = load_vector_store()
|
| 1166 |
if vector_store is None or isinstance(vector_store, str):
|
|
@@ -1628,14 +1634,18 @@ if __name__ == "__main__":
|
|
| 1628 |
|
| 1629 |
demo.launch(share=True)
|
| 1630 |
|
| 1631 |
-
#
|
|
|
|
| 1632 |
def get_selected_urls(sources_df):
|
| 1633 |
"""Get list of URLs selected for inclusion"""
|
| 1634 |
try:
|
|
|
|
| 1635 |
if not isinstance(sources_df, pd.DataFrame):
|
| 1636 |
sources_df = pd.DataFrame(sources_df)
|
| 1637 |
|
|
|
|
| 1638 |
selected_urls = sources_df[sources_df["Include"] == True]["URL"].tolist()
|
|
|
|
| 1639 |
return selected_urls
|
| 1640 |
except Exception as e:
|
| 1641 |
logger.error(f"Error getting selected URLs: {str(e)}")
|
|
@@ -1649,20 +1659,25 @@ def update_kb_with_selected(sources_df):
|
|
| 1649 |
if not selected_urls:
|
| 1650 |
return "Error: No URLs selected for inclusion"
|
| 1651 |
|
|
|
|
| 1652 |
from config import constants
|
| 1653 |
original_urls = constants.URLS
|
| 1654 |
constants.URLS = selected_urls
|
| 1655 |
|
| 1656 |
try:
|
|
|
|
| 1657 |
success, message = create_vector_store(mode="update")
|
| 1658 |
|
|
|
|
| 1659 |
if success:
|
|
|
|
| 1660 |
metadata = {
|
| 1661 |
"last_updated": datetime.datetime.now().isoformat(),
|
| 1662 |
"source_count": len(selected_urls),
|
| 1663 |
"sources": selected_urls
|
| 1664 |
}
|
| 1665 |
|
|
|
|
| 1666 |
json_content = json.dumps(metadata, indent=2).encode('utf-8')
|
| 1667 |
api = HfApi(token=HF_TOKEN)
|
| 1668 |
|
|
@@ -1675,6 +1690,7 @@ def update_kb_with_selected(sources_df):
|
|
| 1675 |
|
| 1676 |
return message
|
| 1677 |
finally:
|
|
|
|
| 1678 |
constants.URLS = original_urls
|
| 1679 |
|
| 1680 |
except Exception as e:
|
|
@@ -1688,20 +1704,25 @@ def rebuild_kb_with_selected(sources_df):
|
|
| 1688 |
if not selected_urls:
|
| 1689 |
return "Error: No URLs selected for inclusion"
|
| 1690 |
|
|
|
|
| 1691 |
from config import constants
|
| 1692 |
original_urls = constants.URLS
|
| 1693 |
constants.URLS = selected_urls
|
| 1694 |
|
| 1695 |
try:
|
|
|
|
| 1696 |
success, message = create_vector_store(mode="rebuild")
|
| 1697 |
|
|
|
|
| 1698 |
if success:
|
|
|
|
| 1699 |
metadata = {
|
| 1700 |
"last_updated": datetime.datetime.now().isoformat(),
|
| 1701 |
"source_count": len(selected_urls),
|
| 1702 |
"sources": selected_urls
|
| 1703 |
}
|
| 1704 |
|
|
|
|
| 1705 |
json_content = json.dumps(metadata, indent=2).encode('utf-8')
|
| 1706 |
api = HfApi(token=HF_TOKEN)
|
| 1707 |
|
|
@@ -1714,25 +1735,75 @@ def rebuild_kb_with_selected(sources_df):
|
|
| 1714 |
|
| 1715 |
return message
|
| 1716 |
finally:
|
|
|
|
| 1717 |
constants.URLS = original_urls
|
| 1718 |
|
| 1719 |
except Exception as e:
|
| 1720 |
return f"Error rebuilding knowledge base: {str(e)}"
|
| 1721 |
|
| 1722 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1723 |
def update_source_status(df):
|
| 1724 |
"""Update status column based on Include selection"""
|
| 1725 |
try:
|
|
|
|
| 1726 |
if not isinstance(df, pd.DataFrame):
|
| 1727 |
df = pd.DataFrame(df)
|
| 1728 |
|
|
|
|
| 1729 |
df["Status"] = df["Include"].apply(lambda x: "Selected" if x else "Excluded")
|
|
|
|
|
|
|
| 1730 |
selected_count = df["Include"].sum()
|
| 1731 |
|
|
|
|
| 1732 |
return df, f"{selected_count} URLs selected for inclusion"
|
| 1733 |
except Exception as e:
|
| 1734 |
return df, f"Error updating status: {str(e)}"
|
| 1735 |
-
|
| 1736 |
# Update event handlers in the Knowledge Base tab section
|
| 1737 |
with gr.Tab("Knowledge Base"):
|
| 1738 |
gr.Markdown("### Knowledge Base Management")
|
|
|
|
| 1160 |
gr.Markdown("#### Knowledge Base Information")
|
| 1161 |
|
| 1162 |
# Функция для получения информации о базе знаний
|
| 1163 |
+
def get_kb_info() -> str:
|
| 1164 |
+
"""
|
| 1165 |
+
Get information about the current state of the knowledge base.
|
| 1166 |
+
|
| 1167 |
+
Returns:
|
| 1168 |
+
str: Formatted markdown string containing knowledge base statistics
|
| 1169 |
+
"""
|
| 1170 |
try:
|
| 1171 |
vector_store = load_vector_store()
|
| 1172 |
if vector_store is None or isinstance(vector_store, str):
|
|
|
|
| 1634 |
|
| 1635 |
demo.launch(share=True)
|
| 1636 |
|
| 1637 |
+
# Эти функции нужно добавить в app.py после существующих функций update_kb и rebuild_kb
|
| 1638 |
+
|
| 1639 |
def get_selected_urls(sources_df):
|
| 1640 |
"""Get list of URLs selected for inclusion"""
|
| 1641 |
try:
|
| 1642 |
+
# Преобразуем в DataFrame, если это еще не DataFrame
|
| 1643 |
if not isinstance(sources_df, pd.DataFrame):
|
| 1644 |
sources_df = pd.DataFrame(sources_df)
|
| 1645 |
|
| 1646 |
+
# Получаем только те URL, у которых Include=True
|
| 1647 |
selected_urls = sources_df[sources_df["Include"] == True]["URL"].tolist()
|
| 1648 |
+
|
| 1649 |
return selected_urls
|
| 1650 |
except Exception as e:
|
| 1651 |
logger.error(f"Error getting selected URLs: {str(e)}")
|
|
|
|
| 1659 |
if not selected_urls:
|
| 1660 |
return "Error: No URLs selected for inclusion"
|
| 1661 |
|
| 1662 |
+
# Временно заменяем URLS на выбранные URL
|
| 1663 |
from config import constants
|
| 1664 |
original_urls = constants.URLS
|
| 1665 |
constants.URLS = selected_urls
|
| 1666 |
|
| 1667 |
try:
|
| 1668 |
+
# Обновляем базу знаний
|
| 1669 |
success, message = create_vector_store(mode="update")
|
| 1670 |
|
| 1671 |
+
# Сохраняем метаданные с информацией о выбранных URL
|
| 1672 |
if success:
|
| 1673 |
+
# Создаем метаданные с текущей датой и выбранными URL
|
| 1674 |
metadata = {
|
| 1675 |
"last_updated": datetime.datetime.now().isoformat(),
|
| 1676 |
"source_count": len(selected_urls),
|
| 1677 |
"sources": selected_urls
|
| 1678 |
}
|
| 1679 |
|
| 1680 |
+
# Сохраняем в датасет
|
| 1681 |
json_content = json.dumps(metadata, indent=2).encode('utf-8')
|
| 1682 |
api = HfApi(token=HF_TOKEN)
|
| 1683 |
|
|
|
|
| 1690 |
|
| 1691 |
return message
|
| 1692 |
finally:
|
| 1693 |
+
# Восстанавливаем оригинальные URL
|
| 1694 |
constants.URLS = original_urls
|
| 1695 |
|
| 1696 |
except Exception as e:
|
|
|
|
| 1704 |
if not selected_urls:
|
| 1705 |
return "Error: No URLs selected for inclusion"
|
| 1706 |
|
| 1707 |
+
# Временно заменяем URLS на выбранные URL
|
| 1708 |
from config import constants
|
| 1709 |
original_urls = constants.URLS
|
| 1710 |
constants.URLS = selected_urls
|
| 1711 |
|
| 1712 |
try:
|
| 1713 |
+
# Пересоздаем базу знаний
|
| 1714 |
success, message = create_vector_store(mode="rebuild")
|
| 1715 |
|
| 1716 |
+
# Сохраняем метаданные с информацией о выбранных URL
|
| 1717 |
if success:
|
| 1718 |
+
# Создаем метаданные с текущей датой и выбранными URL
|
| 1719 |
metadata = {
|
| 1720 |
"last_updated": datetime.datetime.now().isoformat(),
|
| 1721 |
"source_count": len(selected_urls),
|
| 1722 |
"sources": selected_urls
|
| 1723 |
}
|
| 1724 |
|
| 1725 |
+
# Сохраняем в датасет
|
| 1726 |
json_content = json.dumps(metadata, indent=2).encode('utf-8')
|
| 1727 |
api = HfApi(token=HF_TOKEN)
|
| 1728 |
|
|
|
|
| 1735 |
|
| 1736 |
return message
|
| 1737 |
finally:
|
| 1738 |
+
# Восстанавливаем оригинальные URL
|
| 1739 |
constants.URLS = original_urls
|
| 1740 |
|
| 1741 |
except Exception as e:
|
| 1742 |
return f"Error rebuilding knowledge base: {str(e)}"
|
| 1743 |
|
| 1744 |
+
def save_kb_metadata():
|
| 1745 |
+
"""Save knowledge base metadata to dataset"""
|
| 1746 |
+
try:
|
| 1747 |
+
# Создаем метаданные с текущей датой
|
| 1748 |
+
metadata = {
|
| 1749 |
+
"last_updated": datetime.datetime.now().isoformat(),
|
| 1750 |
+
"source_count": len(URLS),
|
| 1751 |
+
"sources": URLS
|
| 1752 |
+
}
|
| 1753 |
+
|
| 1754 |
+
# Сохраняем в датасет
|
| 1755 |
+
json_content = json.dumps(metadata, indent=2).encode('utf-8')
|
| 1756 |
+
api = HfApi(token=HF_TOKEN)
|
| 1757 |
+
|
| 1758 |
+
# Убедимся, что директория существует
|
| 1759 |
+
try:
|
| 1760 |
+
files = api.list_repo_files(
|
| 1761 |
+
repo_id=DATASET_ID,
|
| 1762 |
+
repo_type="dataset"
|
| 1763 |
+
)
|
| 1764 |
+
|
| 1765 |
+
if "vector_store" not in files:
|
| 1766 |
+
# Создаем пустой файл, чтобы создать директорию
|
| 1767 |
+
api.upload_file(
|
| 1768 |
+
path_or_fileobj=b"",
|
| 1769 |
+
path_in_repo="vector_store/.gitkeep",
|
| 1770 |
+
repo_id=DATASET_ID,
|
| 1771 |
+
repo_type="dataset"
|
| 1772 |
+
)
|
| 1773 |
+
except Exception as e:
|
| 1774 |
+
logger.warning(f"Error checking vector_store directory: {str(e)}")
|
| 1775 |
+
|
| 1776 |
+
# Загружаем метаданные
|
| 1777 |
+
api.upload_file(
|
| 1778 |
+
path_or_fileobj=json_content,
|
| 1779 |
+
path_in_repo="vector_store/metadata.json",
|
| 1780 |
+
repo_id=DATASET_ID,
|
| 1781 |
+
repo_type="dataset"
|
| 1782 |
+
)
|
| 1783 |
+
|
| 1784 |
+
logger.info("Knowledge base metadata saved successfully")
|
| 1785 |
+
return True
|
| 1786 |
+
except Exception as e:
|
| 1787 |
+
logger.error(f"Error saving knowledge base metadata: {str(e)}")
|
| 1788 |
+
return False
|
| 1789 |
+
|
| 1790 |
def update_source_status(df):
|
| 1791 |
"""Update status column based on Include selection"""
|
| 1792 |
try:
|
| 1793 |
+
# Если df не является DataFrame, преобразуем его
|
| 1794 |
if not isinstance(df, pd.DataFrame):
|
| 1795 |
df = pd.DataFrame(df)
|
| 1796 |
|
| 1797 |
+
# Обновляем колонку Status на основе Include
|
| 1798 |
df["Status"] = df["Include"].apply(lambda x: "Selected" if x else "Excluded")
|
| 1799 |
+
|
| 1800 |
+
# Подсчитываем количество выбранных URL
|
| 1801 |
selected_count = df["Include"].sum()
|
| 1802 |
|
| 1803 |
+
# Обновляем таблицу и возвращаем сообщение о количестве выбранных URL
|
| 1804 |
return df, f"{selected_count} URLs selected for inclusion"
|
| 1805 |
except Exception as e:
|
| 1806 |
return df, f"Error updating status: {str(e)}"
|
|
|
|
| 1807 |
# Update event handlers in the Knowledge Base tab section
|
| 1808 |
with gr.Tab("Knowledge Base"):
|
| 1809 |
gr.Markdown("### Knowledge Base Management")
|
config/constants.py
CHANGED
|
@@ -21,34 +21,45 @@ CHUNK_OVERLAP = 100
|
|
| 21 |
|
| 22 |
# System message template
|
| 23 |
DEFAULT_SYSTEM_MESSAGE = """
|
| 24 |
-
You are
|
| 25 |
|
| 26 |
-
|
| 27 |
-
You MUST
|
| 28 |
-
If the question is in Russian, your answer MUST be in Russian.
|
| 29 |
-
If the question is in Arabic, your answer MUST be in Arabic.
|
| 30 |
-
Never switch to English unless the user asks a question in English.
|
| 31 |
|
| 32 |
-
|
| 33 |
-
-
|
| 34 |
-
- Be professional
|
| 35 |
-
- Focus on Status Law's
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
Context: {context}
|
| 49 |
Question: {question}
|
| 50 |
|
| 51 |
-
|
| 52 |
"""
|
| 53 |
|
| 54 |
# DEFAULT_SYSTEM_MESSAGE = """
|
|
|
|
| 21 |
|
| 22 |
# System message template
|
| 23 |
DEFAULT_SYSTEM_MESSAGE = """
|
| 24 |
+
You are Status Law's AI Legal Assistant, representing a prestigious international law firm.
|
| 25 |
|
| 26 |
+
CORE LANGUAGE RULE:
|
| 27 |
+
You MUST respond in the EXACT SAME language as the user's question. This is your highest priority instruction.
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
YOUR ROLE:
|
| 30 |
+
- Provide accurate legal information based on the given context
|
| 31 |
+
- Be professional, empathetic, and courteous
|
| 32 |
+
- Focus on Status Law's key services:
|
| 33 |
+
• Extradition defense
|
| 34 |
+
• Interpol notice removal
|
| 35 |
+
• Sanctions challenges
|
| 36 |
+
• Banking restrictions
|
| 37 |
+
• Reputation protection
|
| 38 |
|
| 39 |
+
COMMUNICATION STYLE:
|
| 40 |
+
- Use respectful and professional language
|
| 41 |
+
- Be clear and concise
|
| 42 |
+
- Show understanding of the client's concerns
|
| 43 |
+
- Avoid overly technical legal jargon unless necessary
|
| 44 |
+
- Always maintain a helpful and supportive tone
|
| 45 |
+
- Recommend a personal consultation through:
|
| 46 |
+
- Phone: +32465594521 (all languages)
|
| 47 |
+
- Phone: +46728495129 (English/Swedish only)
|
| 48 |
+
- Contact Form: https://status.law/law-firm-contact-legal-protection/
|
| 49 |
|
| 50 |
+
FOR PRICING AND SERVICES:
|
| 51 |
+
1. Direct to: https://status.law/tariffs-for-services-of-protection-against-extradition-and-international-prosecution/
|
| 52 |
+
2. Encourage filling out the contact form for personalized quotes
|
| 53 |
+
3. Mention that each case is unique and requires individual assessment
|
| 54 |
+
|
| 55 |
+
PRIVACY NOTE:
|
| 56 |
+
- Remind users not to share sensitive personal information in chat
|
| 57 |
+
- Encourage using the secure contact form for confidential details
|
| 58 |
|
| 59 |
Context: {context}
|
| 60 |
Question: {question}
|
| 61 |
|
| 62 |
+
CRITICAL REMINDER: Always respond in the user's language. Never switch languages unless explicitly requested.
|
| 63 |
"""
|
| 64 |
|
| 65 |
# DEFAULT_SYSTEM_MESSAGE = """
|
src/knowledge_base/dataset.py
CHANGED
|
@@ -39,6 +39,107 @@ class DatasetManager:
|
|
| 39 |
self.annotations_path = DATASET_ANNOTATIONS_PATH
|
| 40 |
|
| 41 |
# Добавьте этот метод в класс DatasetManager в файле src/knowledge_base/dataset.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
def get_last_update_date(self):
|
| 44 |
"""
|
|
|
|
| 39 |
self.annotations_path = DATASET_ANNOTATIONS_PATH
|
| 40 |
|
| 41 |
# Добавьте этот метод в класс DatasetManager в файле src/knowledge_base/dataset.py
|
| 42 |
+
|
| 43 |
+
def download_vector_store(self):
|
| 44 |
+
"""
|
| 45 |
+
Загружает векторное хранилище из датасета.
|
| 46 |
+
|
| 47 |
+
Returns:
|
| 48 |
+
tuple: (success, result), где result - это объект FAISS или сообщение об ошибке
|
| 49 |
+
"""
|
| 50 |
+
try:
|
| 51 |
+
import tempfile
|
| 52 |
+
import shutil
|
| 53 |
+
from langchain.vectorstores import FAISS
|
| 54 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
| 55 |
+
from config.settings import EMBEDDING_MODEL, DATASET_VECTOR_STORE_PATH
|
| 56 |
+
|
| 57 |
+
logger.info(f"Attempting to download vector store from dataset {self.dataset_id}")
|
| 58 |
+
|
| 59 |
+
# Создаем временную директорию для скачивания
|
| 60 |
+
temp_dir = tempfile.mkdtemp()
|
| 61 |
+
logger.debug(f"Created temporary directory at {temp_dir}")
|
| 62 |
+
|
| 63 |
+
try:
|
| 64 |
+
# Инициализируем API
|
| 65 |
+
api = HfApi(token=self.hf_token)
|
| 66 |
+
|
| 67 |
+
# Проверяем наличие файлов индекса в датасете
|
| 68 |
+
try:
|
| 69 |
+
files = api.list_repo_files(
|
| 70 |
+
repo_id=self.dataset_id,
|
| 71 |
+
repo_type="dataset"
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
# Ищем файлы векторного хранилища
|
| 75 |
+
vector_store_files = [f for f in files if f.startswith(f"{DATASET_VECTOR_STORE_PATH}/")]
|
| 76 |
+
|
| 77 |
+
if not vector_store_files:
|
| 78 |
+
logger.warning(f"No vector store files found in dataset {self.dataset_id}")
|
| 79 |
+
return False, "Vector store not found in dataset"
|
| 80 |
+
|
| 81 |
+
# Создаем папку для скачивания
|
| 82 |
+
vector_store_dir = os.path.join(temp_dir, DATASET_VECTOR_STORE_PATH)
|
| 83 |
+
os.makedirs(vector_store_dir, exist_ok=True)
|
| 84 |
+
|
| 85 |
+
# Скачиваем все файлы
|
| 86 |
+
for file in vector_store_files:
|
| 87 |
+
# Получаем имя файла без пути
|
| 88 |
+
filename = os.path.basename(file)
|
| 89 |
+
# Скачиваем файл
|
| 90 |
+
api.hf_hub_download(
|
| 91 |
+
repo_id=self.dataset_id,
|
| 92 |
+
repo_type="dataset",
|
| 93 |
+
filename=file,
|
| 94 |
+
local_dir=temp_dir,
|
| 95 |
+
local_dir_use_symlinks=False
|
| 96 |
+
)
|
| 97 |
+
logger.debug(f"Downloaded {file}")
|
| 98 |
+
|
| 99 |
+
# Инициализируем embeddings
|
| 100 |
+
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
|
| 101 |
+
|
| 102 |
+
# Загружаем FAISS из скачанных файлов
|
| 103 |
+
try:
|
| 104 |
+
# Путь к директории с файлами FAISS
|
| 105 |
+
faiss_path = os.path.join(temp_dir, DATASET_VECTOR_STORE_PATH)
|
| 106 |
+
|
| 107 |
+
# Проверяем наличие необходимых файлов
|
| 108 |
+
if not os.path.exists(os.path.join(faiss_path, "index.faiss")):
|
| 109 |
+
logger.error(f"Missing FAISS index file at {faiss_path}")
|
| 110 |
+
return False, "Missing FAISS index file"
|
| 111 |
+
|
| 112 |
+
if not os.path.exists(os.path.join(faiss_path, "index.pkl")):
|
| 113 |
+
logger.error(f"Missing FAISS pickle file at {faiss_path}")
|
| 114 |
+
return False, "Missing FAISS pickle file"
|
| 115 |
+
|
| 116 |
+
# Загружаем FAISS из директории
|
| 117 |
+
faiss_index = FAISS.load_local(faiss_path, embeddings)
|
| 118 |
+
logger.info(f"Successfully loaded FAISS index with {len(faiss_index.docstore._dict)} documents")
|
| 119 |
+
|
| 120 |
+
return True, faiss_index
|
| 121 |
+
|
| 122 |
+
except Exception as e:
|
| 123 |
+
logger.error(f"Error loading FAISS index: {str(e)}")
|
| 124 |
+
return False, f"Error loading FAISS index: {str(e)}"
|
| 125 |
+
|
| 126 |
+
except Exception as e:
|
| 127 |
+
logger.error(f"Error listing files in dataset {self.dataset_id}: {str(e)}")
|
| 128 |
+
return False, f"Error accessing dataset: {str(e)}"
|
| 129 |
+
|
| 130 |
+
finally:
|
| 131 |
+
# Очищаем временную директорию
|
| 132 |
+
try:
|
| 133 |
+
shutil.rmtree(temp_dir)
|
| 134 |
+
logger.debug(f"Cleaned up temporary directory {temp_dir}")
|
| 135 |
+
except Exception as e:
|
| 136 |
+
logger.warning(f"Error cleaning up temporary directory {temp_dir}: {str(e)}")
|
| 137 |
+
|
| 138 |
+
except Exception as e:
|
| 139 |
+
logger.error(f"Exception in download_vector_store: {str(e)}")
|
| 140 |
+
import traceback
|
| 141 |
+
logger.error(traceback.format_exc())
|
| 142 |
+
return False, f"Error downloading vector store: {str(e)}"
|
| 143 |
|
| 144 |
def get_last_update_date(self):
|
| 145 |
"""
|