MrSimple07 commited on
Commit
ee0dae9
·
1 Parent(s): 7ffbe71

hybrid format + llm + keywoard, + 4000, 30

Browse files
Files changed (2) hide show
  1. config.py +1 -1
  2. utils.py +17 -4
config.py CHANGED
@@ -51,7 +51,7 @@ DEFAULT_MODEL = "Gemini 2.5 Flash"
51
  CHUNK_SIZE = 1500
52
  CHUNK_OVERLAP = 128
53
 
54
- MAX_CHARS_TABLE = 3000
55
  MAX_ROWS_TABLE = 30
56
 
57
 
 
51
  CHUNK_SIZE = 1500
52
  CHUNK_OVERLAP = 128
53
 
54
+ MAX_CHARS_TABLE = 4000
55
  MAX_ROWS_TABLE = 30
56
 
57
 
utils.py CHANGED
@@ -197,7 +197,7 @@ def debug_search_tables(vector_index, search_term="С-25"):
197
 
198
  GENERIC_STEEL_CONTEXT = "стандарт ГОСТ технические условия марка материал применение сварка"
199
 
200
-
201
  from documents_prep import normalize_text, normalize_steel_designations
202
 
203
  STEEL_PRODUCT_EXPANSIONS = {
@@ -264,9 +264,23 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
264
  normalized_question = normalize_text(question)
265
  normalized_question_2, query_changes, change_list = normalize_steel_designations(question)
266
 
267
- # FIX: Add query enhancement for steel grades
268
  enhanced_question = enhance_query_for_steel_grades(normalized_question_2)
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  if change_list:
271
  log_message(f"Query changes: {', '.join(change_list)}")
272
  if query_engine is None:
@@ -274,12 +288,11 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
274
 
275
  try:
276
  start_time = time.time()
277
- # FIX: Use enhanced_question instead of normalized_question_2
278
  retrieved_nodes = query_engine.retriever.retrieve(enhanced_question)
279
  log_message(f"user query: {question}")
280
  log_message(f"normalized query: {normalized_question}")
281
  log_message(f"after steel normalization: {normalized_question_2}")
282
- log_message(f"enhanced query: {enhanced_question}") # ADD THIS LOG
283
  log_message(f"Steel grades normalized in query: {query_changes}")
284
 
285
  log_message(f"RETRIEVED: {len(retrieved_nodes)} nodes")
 
197
 
198
  GENERIC_STEEL_CONTEXT = "стандарт ГОСТ технические условия марка материал применение сварка"
199
 
200
+ from config import QUERY_EXPANSION_PROMPT
201
  from documents_prep import normalize_text, normalize_steel_designations
202
 
203
  STEEL_PRODUCT_EXPANSIONS = {
 
264
  normalized_question = normalize_text(question)
265
  normalized_question_2, query_changes, change_list = normalize_steel_designations(question)
266
 
267
+ # Step 1: Keyword-based enhancement (existing)
268
  enhanced_question = enhance_query_for_steel_grades(normalized_question_2)
269
 
270
+ # Step 2: LLM-based query expansion (NEW)
271
+ try:
272
+ llm = get_llm_model(current_model)
273
+ expansion_prompt = QUERY_EXPANSION_PROMPT.format(original_query=enhanced_question)
274
+ expanded_queries = llm.complete(expansion_prompt).text.strip()
275
+
276
+ # Combine original + expanded queries
277
+ enhanced_question = f"{enhanced_question} {expanded_queries}"
278
+ log_message(f"LLM expanded query: {expanded_queries[:200]}...")
279
+ except Exception as e:
280
+ log_message(f"Query expansion failed: {e}, using keyword-only enhancement")
281
+
282
+ if change_list:
283
+ log_message(f"Query changes: {', '.join(change_list)}")
284
  if change_list:
285
  log_message(f"Query changes: {', '.join(change_list)}")
286
  if query_engine is None:
 
288
 
289
  try:
290
  start_time = time.time()
 
291
  retrieved_nodes = query_engine.retriever.retrieve(enhanced_question)
292
  log_message(f"user query: {question}")
293
  log_message(f"normalized query: {normalized_question}")
294
  log_message(f"after steel normalization: {normalized_question_2}")
295
+ log_message(f"enhanced query: {enhanced_question}")
296
  log_message(f"Steel grades normalized in query: {query_changes}")
297
 
298
  log_message(f"RETRIEVED: {len(retrieved_nodes)} nodes")