Spaces:
Sleeping
Sleeping
Commit
·
edc2f6f
1
Parent(s):
8befcd1
bm = 50, semantic = 50. hybrid = 50
Browse files- index_retriever.py +2 -2
- utils.py +4 -40
index_retriever.py
CHANGED
|
@@ -16,7 +16,7 @@ def create_query_engine(vector_index):
|
|
| 16 |
try:
|
| 17 |
bm25_retriever = BM25Retriever.from_defaults(
|
| 18 |
docstore=vector_index.docstore,
|
| 19 |
-
similarity_top_k=
|
| 20 |
)
|
| 21 |
|
| 22 |
vector_retriever = VectorIndexRetriever(
|
|
@@ -27,7 +27,7 @@ def create_query_engine(vector_index):
|
|
| 27 |
|
| 28 |
hybrid_retriever = QueryFusionRetriever(
|
| 29 |
[vector_retriever, bm25_retriever],
|
| 30 |
-
similarity_top_k=
|
| 31 |
num_queries=1
|
| 32 |
)
|
| 33 |
|
|
|
|
| 16 |
try:
|
| 17 |
bm25_retriever = BM25Retriever.from_defaults(
|
| 18 |
docstore=vector_index.docstore,
|
| 19 |
+
similarity_top_k=50
|
| 20 |
)
|
| 21 |
|
| 22 |
vector_retriever = VectorIndexRetriever(
|
|
|
|
| 27 |
|
| 28 |
hybrid_retriever = QueryFusionRetriever(
|
| 29 |
[vector_retriever, bm25_retriever],
|
| 30 |
+
similarity_top_k=50,
|
| 31 |
num_queries=1
|
| 32 |
)
|
| 33 |
|
utils.py
CHANGED
|
@@ -225,32 +225,6 @@ def generate_sources_html(nodes, chunks_df=None):
|
|
| 225 |
|
| 226 |
html += "</div>"
|
| 227 |
return html
|
| 228 |
-
|
| 229 |
-
def expand_query(question, llm_model):
|
| 230 |
-
"""
|
| 231 |
-
Generate multiple query variations for better retrieval
|
| 232 |
-
"""
|
| 233 |
-
expansion_prompt = f"""Дан вопрос: "{question}"
|
| 234 |
-
|
| 235 |
-
Сгенерируй 2 альтернативные формулировки этого вопроса для поиска в базе данных.
|
| 236 |
-
Используй синонимы и разные формулировки, сохраняя смысл.
|
| 237 |
-
|
| 238 |
-
Формат ответа (только вопросы, по одному на строку):
|
| 239 |
-
1. [первая формулировка]
|
| 240 |
-
2. [вторая формулировка]"""
|
| 241 |
-
|
| 242 |
-
try:
|
| 243 |
-
response = llm_model.complete(expansion_prompt)
|
| 244 |
-
expanded = [q.strip() for q in response.text.split('\n') if q.strip() and not q.strip().startswith('1.') and not q.strip().startswith('2.')]
|
| 245 |
-
# Clean up
|
| 246 |
-
expanded = [q.lstrip('12. ').strip() for q in expanded if len(q) > 10][:2]
|
| 247 |
-
log_message(f"Query expansion: {len(expanded)} вариантов")
|
| 248 |
-
return [question] + expanded
|
| 249 |
-
except Exception as e:
|
| 250 |
-
log_message(f"Ошибка расширения запроса: {str(e)}")
|
| 251 |
-
return [question]
|
| 252 |
-
|
| 253 |
-
|
| 254 |
def answer_question(question, query_engine, reranker, current_model, chunks_df=None):
|
| 255 |
if query_engine is None:
|
| 256 |
return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Система не инициализирована</div>", "", ""
|
|
@@ -260,24 +234,14 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
|
|
| 260 |
|
| 261 |
llm = get_llm_model(current_model)
|
| 262 |
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
all_nodes = []
|
| 266 |
-
seen_node_ids = set()
|
| 267 |
-
|
| 268 |
-
for query_var in query_variations:
|
| 269 |
-
retrieved = query_engine.retriever.retrieve(query_var)
|
| 270 |
-
for node in retrieved:
|
| 271 |
-
node_id = f"{node.node_id if hasattr(node, 'node_id') else hash(node.text)}"
|
| 272 |
-
if node_id not in seen_node_ids:
|
| 273 |
-
all_nodes.append(node)
|
| 274 |
-
seen_node_ids.add(node_id)
|
| 275 |
|
| 276 |
-
log_message(f"Получено {len(
|
| 277 |
|
| 278 |
reranked_nodes = rerank_nodes(
|
| 279 |
question,
|
| 280 |
-
|
| 281 |
reranker,
|
| 282 |
top_k=20,
|
| 283 |
min_score_threshold=0.5,
|
|
|
|
| 225 |
|
| 226 |
html += "</div>"
|
| 227 |
return html
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
def answer_question(question, query_engine, reranker, current_model, chunks_df=None):
|
| 229 |
if query_engine is None:
|
| 230 |
return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Система не инициализирована</div>", "", ""
|
|
|
|
| 234 |
|
| 235 |
llm = get_llm_model(current_model)
|
| 236 |
|
| 237 |
+
# Direct retrieval without query expansion
|
| 238 |
+
retrieved_nodes = query_engine.retriever.retrieve(question)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
+
log_message(f"Получено {len(retrieved_nodes)} узлов")
|
| 241 |
|
| 242 |
reranked_nodes = rerank_nodes(
|
| 243 |
question,
|
| 244 |
+
retrieved_nodes,
|
| 245 |
reranker,
|
| 246 |
top_k=20,
|
| 247 |
min_score_threshold=0.5,
|