MrSimple07 commited on
Commit
5789aa7
·
1 Parent(s): c3088ad

new keyword based enhancement + 3000, 30

Browse files
Files changed (2) hide show
  1. config.py +1 -1
  2. utils.py +18 -43
config.py CHANGED
@@ -51,7 +51,7 @@ DEFAULT_MODEL = "Gemini 2.5 Flash"
51
  CHUNK_SIZE = 1500
52
  CHUNK_OVERLAP = 128
53
 
54
- MAX_CHARS_TABLE = 4500
55
  MAX_ROWS_TABLE = 30
56
 
57
 
 
51
  CHUNK_SIZE = 1500
52
  CHUNK_OVERLAP = 128
53
 
54
+ MAX_CHARS_TABLE = 3000
55
  MAX_ROWS_TABLE = 30
56
 
57
 
utils.py CHANGED
@@ -195,69 +195,46 @@ def debug_search_tables(vector_index, search_term="С-25"):
195
 
196
  return matching
197
 
198
- GENERIC_STEEL_CONTEXT = "стандарт ГОСТ технические условия марка материал применение сварка"
199
-
200
  from config import QUERY_EXPANSION_PROMPT
201
  from documents_prep import normalize_text, normalize_steel_designations
202
 
203
- STEEL_PRODUCT_EXPANSIONS = {
204
  "08X18H10T": ["Листы", "Трубы", "Поковки", "Крепежные изделия", "Сортовой прокат", "Отливки"],
205
  "12X18H10T": ["Листы", "Поковки", "Сортовой прокат"],
206
  "10X17H13M2T": ["Трубы", "Арматура", "Поковки", "Фланцы"],
207
  "20X23H18": ["Листы", "Сортовой прокат", "Поковки"],
208
  "03X17H14M3": ["Трубы", "Листы", "Проволока"],
209
  "59023.6": ["Режимы термической обработки стали 59023.6"],
 
210
  }
211
 
212
-
213
- def enhance_query_for_steel_grades(query):
214
- """Expand query with steel grade specific context"""
215
- import re
216
-
217
- # FIX: Use the same pattern as normalize_steel_designations
218
- # Pattern for regular steel grades: 08X18H10T, 12X18H10T, etc.
219
- steel_pattern = r'\b\d{1,3}(?:[A-ZА-ЯЁ]\d*)+\b'
220
- # Pattern for welding wires: СВ-08X19H10, CB-08X19H10
221
- wire_pattern = r'\b[СC][ВB]-\d{1,3}(?:[A-ZА-ЯЁ]\d*)+\b'
222
 
223
- matches = re.findall(steel_pattern, query, re.IGNORECASE)
224
- wire_matches = re.findall(wire_pattern, query, re.IGNORECASE)
225
-
226
- all_matches = matches + wire_matches
227
-
228
- if not all_matches:
229
- return query
230
-
231
- # Collect context expansions
232
  added_context = []
233
- grades_found = []
234
 
235
- for match in all_matches:
236
- match_upper = match.upper()
237
- grades_found.append(match_upper)
238
 
239
- # Check if we have specific context for this grade
240
- if match_upper in STEEL_PRODUCT_EXPANSIONS:
241
- context = ' '.join(STEEL_PRODUCT_EXPANSIONS[match_upper])
242
  added_context.append(context)
243
- log_message(f" Found specific context for {match_upper}: {context}")
244
- else:
245
- # Use generic context for unknown grades
246
- added_context.append(GENERIC_STEEL_CONTEXT)
247
- log_message(f" Using generic context for {match_upper}")
248
 
249
  # Build enhanced query
250
  if added_context:
251
- # Remove duplicates from context
252
  unique_context = ' '.join(set(' '.join(added_context).split()))
253
  enhanced = f"{query} {unique_context}"
254
 
255
- log_message(f"Enhanced query for steel grades: {', '.join(grades_found)}")
256
  log_message(f"Added context: {unique_context[:100]}...")
257
 
258
  return enhanced
259
-
260
- return query
261
 
262
 
263
  def answer_question(question, query_engine, reranker, current_model, chunks_df=None, rerank_top_k=20):
@@ -265,16 +242,14 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
265
  normalized_question = normalize_text(question)
266
  normalized_question_2, query_changes, change_list = normalize_steel_designations(question)
267
 
268
- # Step 1: Keyword-based enhancement (existing)
269
- enhanced_question = enhance_query_for_steel_grades(normalized_question_2)
270
 
271
- # Step 2: LLM-based query expansion (NEW)
272
  try:
273
  llm = get_llm_model(current_model)
274
  expansion_prompt = QUERY_EXPANSION_PROMPT.format(original_query=enhanced_question)
275
  expanded_queries = llm.complete(expansion_prompt).text.strip()
276
-
277
- # Combine original + expanded queries
278
  enhanced_question = f"{enhanced_question} {expanded_queries}"
279
  log_message(f"LLM expanded query: {expanded_queries[:200]}...")
280
  except Exception as e:
 
195
 
196
  return matching
197
 
 
 
198
  from config import QUERY_EXPANSION_PROMPT
199
  from documents_prep import normalize_text, normalize_steel_designations
200
 
201
+ KEYWORD_EXPANSIONS = {
202
  "08X18H10T": ["Листы", "Трубы", "Поковки", "Крепежные изделия", "Сортовой прокат", "Отливки"],
203
  "12X18H10T": ["Листы", "Поковки", "Сортовой прокат"],
204
  "10X17H13M2T": ["Трубы", "Арматура", "Поковки", "Фланцы"],
205
  "20X23H18": ["Листы", "Сортовой прокат", "Поковки"],
206
  "03X17H14M3": ["Трубы", "Листы", "Проволока"],
207
  "59023.6": ["Режимы термической обработки стали 59023.6"],
208
+ "СВ-08X19H10": ["Сварочная проволока", "Сварка", "Сварочные материалы"],
209
  }
210
 
211
+ def enhance_query_with_keywords(query):
212
+ query_upper = query.upper()
 
 
 
 
 
 
 
 
213
 
214
+ # Find matching keywords
 
 
 
 
 
 
 
 
215
  added_context = []
216
+ keywords_found = []
217
 
218
+ for keyword, expansions in KEYWORD_EXPANSIONS.items():
219
+ keyword_upper = keyword.upper()
 
220
 
221
+ # Check if keyword is in query (case-insensitive)
222
+ if keyword_upper in query_upper:
223
+ context = ' '.join(expansions)
224
  added_context.append(context)
225
+ keywords_found.append(keyword)
226
+ log_message(f" Found keyword '{keyword}': added context '{context}'")
 
 
 
227
 
228
  # Build enhanced query
229
  if added_context:
 
230
  unique_context = ' '.join(set(' '.join(added_context).split()))
231
  enhanced = f"{query} {unique_context}"
232
 
233
+ log_message(f"Enhanced query with keywords: {', '.join(keywords_found)}")
234
  log_message(f"Added context: {unique_context[:100]}...")
235
 
236
  return enhanced
237
+ return f"{query}"
 
238
 
239
 
240
  def answer_question(question, query_engine, reranker, current_model, chunks_df=None, rerank_top_k=20):
 
242
  normalized_question = normalize_text(question)
243
  normalized_question_2, query_changes, change_list = normalize_steel_designations(question)
244
 
245
+ # Step 1: Keyword-based enhancement
246
+ enhanced_question = enhance_query_with_keywords(normalized_question_2)
247
 
248
+ # Step 2: LLM-based query expansion
249
  try:
250
  llm = get_llm_model(current_model)
251
  expansion_prompt = QUERY_EXPANSION_PROMPT.format(original_query=enhanced_question)
252
  expanded_queries = llm.complete(expansion_prompt).text.strip()
 
 
253
  enhanced_question = f"{enhanced_question} {expanded_queries}"
254
  log_message(f"LLM expanded query: {expanded_queries[:200]}...")
255
  except Exception as e: