Spaces:
Sleeping
Sleeping
Commit
·
ee0dae9
1
Parent(s):
7ffbe71
hybrid format + llm + keywoard, + 4000, 30
Browse files
config.py
CHANGED
|
@@ -51,7 +51,7 @@ DEFAULT_MODEL = "Gemini 2.5 Flash"
|
|
| 51 |
CHUNK_SIZE = 1500
|
| 52 |
CHUNK_OVERLAP = 128
|
| 53 |
|
| 54 |
-
MAX_CHARS_TABLE =
|
| 55 |
MAX_ROWS_TABLE = 30
|
| 56 |
|
| 57 |
|
|
|
|
| 51 |
CHUNK_SIZE = 1500
|
| 52 |
CHUNK_OVERLAP = 128
|
| 53 |
|
| 54 |
+
MAX_CHARS_TABLE = 4000
|
| 55 |
MAX_ROWS_TABLE = 30
|
| 56 |
|
| 57 |
|
utils.py
CHANGED
|
@@ -197,7 +197,7 @@ def debug_search_tables(vector_index, search_term="С-25"):
|
|
| 197 |
|
| 198 |
GENERIC_STEEL_CONTEXT = "стандарт ГОСТ технические условия марка материал применение сварка"
|
| 199 |
|
| 200 |
-
|
| 201 |
from documents_prep import normalize_text, normalize_steel_designations
|
| 202 |
|
| 203 |
STEEL_PRODUCT_EXPANSIONS = {
|
|
@@ -264,9 +264,23 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
|
|
| 264 |
normalized_question = normalize_text(question)
|
| 265 |
normalized_question_2, query_changes, change_list = normalize_steel_designations(question)
|
| 266 |
|
| 267 |
-
#
|
| 268 |
enhanced_question = enhance_query_for_steel_grades(normalized_question_2)
|
| 269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
if change_list:
|
| 271 |
log_message(f"Query changes: {', '.join(change_list)}")
|
| 272 |
if query_engine is None:
|
|
@@ -274,12 +288,11 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
|
|
| 274 |
|
| 275 |
try:
|
| 276 |
start_time = time.time()
|
| 277 |
-
# FIX: Use enhanced_question instead of normalized_question_2
|
| 278 |
retrieved_nodes = query_engine.retriever.retrieve(enhanced_question)
|
| 279 |
log_message(f"user query: {question}")
|
| 280 |
log_message(f"normalized query: {normalized_question}")
|
| 281 |
log_message(f"after steel normalization: {normalized_question_2}")
|
| 282 |
-
log_message(f"enhanced query: {enhanced_question}")
|
| 283 |
log_message(f"Steel grades normalized in query: {query_changes}")
|
| 284 |
|
| 285 |
log_message(f"RETRIEVED: {len(retrieved_nodes)} nodes")
|
|
|
|
| 197 |
|
| 198 |
GENERIC_STEEL_CONTEXT = "стандарт ГОСТ технические условия марка материал применение сварка"
|
| 199 |
|
| 200 |
+
from config import QUERY_EXPANSION_PROMPT
|
| 201 |
from documents_prep import normalize_text, normalize_steel_designations
|
| 202 |
|
| 203 |
STEEL_PRODUCT_EXPANSIONS = {
|
|
|
|
| 264 |
normalized_question = normalize_text(question)
|
| 265 |
normalized_question_2, query_changes, change_list = normalize_steel_designations(question)
|
| 266 |
|
| 267 |
+
# Step 1: Keyword-based enhancement (existing)
|
| 268 |
enhanced_question = enhance_query_for_steel_grades(normalized_question_2)
|
| 269 |
|
| 270 |
+
# Step 2: LLM-based query expansion (NEW)
|
| 271 |
+
try:
|
| 272 |
+
llm = get_llm_model(current_model)
|
| 273 |
+
expansion_prompt = QUERY_EXPANSION_PROMPT.format(original_query=enhanced_question)
|
| 274 |
+
expanded_queries = llm.complete(expansion_prompt).text.strip()
|
| 275 |
+
|
| 276 |
+
# Combine original + expanded queries
|
| 277 |
+
enhanced_question = f"{enhanced_question} {expanded_queries}"
|
| 278 |
+
log_message(f"LLM expanded query: {expanded_queries[:200]}...")
|
| 279 |
+
except Exception as e:
|
| 280 |
+
log_message(f"Query expansion failed: {e}, using keyword-only enhancement")
|
| 281 |
+
|
| 282 |
+
if change_list:
|
| 283 |
+
log_message(f"Query changes: {', '.join(change_list)}")
|
| 284 |
if change_list:
|
| 285 |
log_message(f"Query changes: {', '.join(change_list)}")
|
| 286 |
if query_engine is None:
|
|
|
|
| 288 |
|
| 289 |
try:
|
| 290 |
start_time = time.time()
|
|
|
|
| 291 |
retrieved_nodes = query_engine.retriever.retrieve(enhanced_question)
|
| 292 |
log_message(f"user query: {question}")
|
| 293 |
log_message(f"normalized query: {normalized_question}")
|
| 294 |
log_message(f"after steel normalization: {normalized_question_2}")
|
| 295 |
+
log_message(f"enhanced query: {enhanced_question}")
|
| 296 |
log_message(f"Steel grades normalized in query: {query_changes}")
|
| 297 |
|
| 298 |
log_message(f"RETRIEVED: {len(retrieved_nodes)} nodes")
|