Rulga commited on
Commit
b7c98a7
·
1 Parent(s): e0aee71

Enhance knowledge base update functionality: Add methods to retrieve selected URLs and update the knowledge base with selected sources, improving error handling and logging.

Browse files
Files changed (2) hide show
  1. app.py +47 -2
  2. src/knowledge_base/dataset.py +2 -1
app.py CHANGED
@@ -27,7 +27,7 @@ langdetect.DetectorFactory.seed = 0
27
  load_dotenv()
28
 
29
  # Local imports - config
30
- from config.constants import DEFAULT_SYSTEM_MESSAGE
31
  from config.settings import (
32
  API_CONFIG,
33
  ACTIVE_MODEL,
@@ -36,13 +36,58 @@ from config.settings import (
36
  DATASET_ID,
37
  DATASET_PREFERENCES_PATH,
38
  DATASET_VECTOR_STORE_PATH,
39
- DATASET_ANNOTATIONS_PATH, # Добавляем импорт
40
  DEFAULT_MODEL,
41
  EMBEDDING_MODEL,
42
  HF_TOKEN,
43
  MODELS
44
  )
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  # Local imports - source modules
47
  from src.analytics.chat_evaluator import ChatEvaluator
48
  from src.knowledge_base.vector_store import create_vector_store, load_vector_store
 
27
  load_dotenv()
28
 
29
  # Local imports - config
30
+ from config.constants import DEFAULT_SYSTEM_MESSAGE, URLS
31
  from config.settings import (
32
  API_CONFIG,
33
  ACTIVE_MODEL,
 
36
  DATASET_ID,
37
  DATASET_PREFERENCES_PATH,
38
  DATASET_VECTOR_STORE_PATH,
39
+ DATASET_ANNOTATIONS_PATH,
40
  DEFAULT_MODEL,
41
  EMBEDDING_MODEL,
42
  HF_TOKEN,
43
  MODELS
44
  )
45
 
46
+ from src.chat.evaluator import ChatEvaluator
47
+ from src.knowledge_base.dataset import DatasetManager
48
+ from src.knowledge_base.vector_store import create_vector_store, load_vector_store
49
+ import config.constants as constants
50
+
51
+ def get_selected_urls(sources_df):
52
+ """Get list of URLs selected for inclusion"""
53
+ try:
54
+ if not isinstance(sources_df, pd.DataFrame):
55
+ sources_df = pd.DataFrame(sources_df)
56
+ selected_urls = sources_df[sources_df["Include"] == True]["URL"].tolist()
57
+ return selected_urls
58
+ except Exception as e:
59
+ logger.error(f"Error getting selected URLs: {str(e)}")
60
+ return []
61
+
62
+ def update_kb_with_selected(sources_df) -> str:
63
+ """Updates knowledge base with selected sources"""
64
+ try:
65
+ selected_urls = get_selected_urls(sources_df)
66
+
67
+ if not selected_urls:
68
+ return "Error: No sources selected"
69
+
70
+ original_urls = URLS.copy()
71
+ constants.URLS = selected_urls
72
+
73
+ try:
74
+ success, message = create_vector_store(mode="update")
75
+ if success:
76
+ save_kb_metadata()
77
+ return message
78
+ finally:
79
+ constants.URLS = original_urls
80
+
81
+ except Exception as e:
82
+ logger.error(f"Error updating knowledge base: {str(e)}")
83
+ return f"Error updating knowledge base: {str(e)}"
84
+
85
+ # Set seed for consistent results
86
+ langdetect.DetectorFactory.seed = 0
87
+
88
+ # Load environment variables
89
+ load_dotenv()
90
+
91
  # Local imports - source modules
92
  from src.analytics.chat_evaluator import ChatEvaluator
93
  from src.knowledge_base.vector_store import create_vector_store, load_vector_store
src/knowledge_base/dataset.py CHANGED
@@ -7,8 +7,10 @@ import json
7
  import tempfile
8
  from typing import Tuple, List, Dict, Any, Optional, Union
9
  from datetime import datetime
 
10
  from huggingface_hub import HfApi, HfFolder
11
  from langchain_community.vectorstores import FAISS
 
12
  from config.settings import (
13
  VECTOR_STORE_PATH,
14
  HF_TOKEN,
@@ -23,7 +25,6 @@ from config.settings import (
23
  from langchain_huggingface import HuggingFaceEmbeddings
24
  import logging
25
 
26
- logging.basicConfig(level=logging.INFO)
27
  logger = logging.getLogger(__name__)
28
 
29
  class DatasetManager:
 
7
  import tempfile
8
  from typing import Tuple, List, Dict, Any, Optional, Union
9
  from datetime import datetime
10
+ import logging
11
  from huggingface_hub import HfApi, HfFolder
12
  from langchain_community.vectorstores import FAISS
13
+ from langchain_huggingface import HuggingFaceEmbeddings
14
  from config.settings import (
15
  VECTOR_STORE_PATH,
16
  HF_TOKEN,
 
25
  from langchain_huggingface import HuggingFaceEmbeddings
26
  import logging
27
 
 
28
  logger = logging.getLogger(__name__)
29
 
30
  class DatasetManager: