MrSimple01 commited on
Commit
ae5dcf5
·
verified ·
1 Parent(s): 5779b15

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +20 -44
utils.py CHANGED
@@ -195,68 +195,46 @@ def debug_search_tables(vector_index, search_term="С-25"):
195
 
196
  return matching
197
 
198
- GENERIC_STEEL_CONTEXT = "стандарт ГОСТ технические условия марка материал применение сварка"
199
-
200
  from config import QUERY_EXPANSION_PROMPT
201
  from documents_prep import normalize_text, normalize_steel_designations
202
 
203
- STEEL_PRODUCT_EXPANSIONS = {
204
  "08X18H10T": ["Листы", "Трубы", "Поковки", "Крепежные изделия", "Сортовой прокат", "Отливки"],
205
  "12X18H10T": ["Листы", "Поковки", "Сортовой прокат"],
206
  "10X17H13M2T": ["Трубы", "Арматура", "Поковки", "Фланцы"],
207
  "20X23H18": ["Листы", "Сортовой прокат", "Поковки"],
208
- "03X17H14M3": ["Трубы", "Листы", "Проволока"]
 
 
209
  }
210
 
211
-
212
- def enhance_query_for_steel_grades(query):
213
- """Expand query with steel grade specific context"""
214
- import re
215
-
216
- # FIX: Use the same pattern as normalize_steel_designations
217
- # Pattern for regular steel grades: 08X18H10T, 12X18H10T, etc.
218
- steel_pattern = r'\b\d{1,3}(?:[A-ZА-ЯЁ]\d*)+\b'
219
- # Pattern for welding wires: СВ-08X19H10, CB-08X19H10
220
- wire_pattern = r'\b[СC][ВB]-\d{1,3}(?:[A-ZА-ЯЁ]\d*)+\b'
221
 
222
- matches = re.findall(steel_pattern, query, re.IGNORECASE)
223
- wire_matches = re.findall(wire_pattern, query, re.IGNORECASE)
224
-
225
- all_matches = matches + wire_matches
226
-
227
- if not all_matches:
228
- return query
229
-
230
- # Collect context expansions
231
  added_context = []
232
- grades_found = []
233
 
234
- for match in all_matches:
235
- match_upper = match.upper()
236
- grades_found.append(match_upper)
237
 
238
- # Check if we have specific context for this grade
239
- if match_upper in STEEL_PRODUCT_EXPANSIONS:
240
- context = ' '.join(STEEL_PRODUCT_EXPANSIONS[match_upper])
241
  added_context.append(context)
242
- log_message(f" Found specific context for {match_upper}: {context}")
243
- else:
244
- # Use generic context for unknown grades
245
- added_context.append(GENERIC_STEEL_CONTEXT)
246
- log_message(f" Using generic context for {match_upper}")
247
 
248
  # Build enhanced query
249
  if added_context:
250
- # Remove duplicates from context
251
  unique_context = ' '.join(set(' '.join(added_context).split()))
252
  enhanced = f"{query} {unique_context}"
253
 
254
- log_message(f"Enhanced query for steel grades: {', '.join(grades_found)}")
255
  log_message(f"Added context: {unique_context[:100]}...")
256
 
257
  return enhanced
258
-
259
- return query
260
 
261
 
262
  def answer_question(question, query_engine, reranker, current_model, chunks_df=None, rerank_top_k=20):
@@ -264,16 +242,14 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
264
  normalized_question = normalize_text(question)
265
  normalized_question_2, query_changes, change_list = normalize_steel_designations(question)
266
 
267
- # Step 1: Keyword-based enhancement (existing)
268
- enhanced_question = enhance_query_for_steel_grades(normalized_question_2)
269
 
270
- # Step 2: LLM-based query expansion (NEW)
271
  try:
272
  llm = get_llm_model(current_model)
273
  expansion_prompt = QUERY_EXPANSION_PROMPT.format(original_query=enhanced_question)
274
  expanded_queries = llm.complete(expansion_prompt).text.strip()
275
-
276
- # Combine original + expanded queries
277
  enhanced_question = f"{enhanced_question} {expanded_queries}"
278
  log_message(f"LLM expanded query: {expanded_queries[:200]}...")
279
  except Exception as e:
 
195
 
196
  return matching
197
 
 
 
198
  from config import QUERY_EXPANSION_PROMPT
199
  from documents_prep import normalize_text, normalize_steel_designations
200
 
201
+ KEYWORD_EXPANSIONS = {
202
  "08X18H10T": ["Листы", "Трубы", "Поковки", "Крепежные изделия", "Сортовой прокат", "Отливки"],
203
  "12X18H10T": ["Листы", "Поковки", "Сортовой прокат"],
204
  "10X17H13M2T": ["Трубы", "Арматура", "Поковки", "Фланцы"],
205
  "20X23H18": ["Листы", "Сортовой прокат", "Поковки"],
206
+ "03X17H14M3": ["Трубы", "Листы", "Проволока"],
207
+ "59023.6": ["Режимы термической обработки стали 59023.6"],
208
+ "СВ-08X19H10": ["Сварочная проволока", "Сварка", "Сварочные материалы"],
209
  }
210
 
211
+ def enhance_query_with_keywords(query):
212
+ query_upper = query.upper()
 
 
 
 
 
 
 
 
213
 
214
+ # Find matching keywords
 
 
 
 
 
 
 
 
215
  added_context = []
216
+ keywords_found = []
217
 
218
+ for keyword, expansions in KEYWORD_EXPANSIONS.items():
219
+ keyword_upper = keyword.upper()
 
220
 
221
+ # Check if keyword is in query (case-insensitive)
222
+ if keyword_upper in query_upper:
223
+ context = ' '.join(expansions)
224
  added_context.append(context)
225
+ keywords_found.append(keyword)
226
+ log_message(f" Found keyword '{keyword}': added context '{context}'")
 
 
 
227
 
228
  # Build enhanced query
229
  if added_context:
 
230
  unique_context = ' '.join(set(' '.join(added_context).split()))
231
  enhanced = f"{query} {unique_context}"
232
 
233
+ log_message(f"Enhanced query with keywords: {', '.join(keywords_found)}")
234
  log_message(f"Added context: {unique_context[:100]}...")
235
 
236
  return enhanced
237
+ return f"{query}"
 
238
 
239
 
240
  def answer_question(question, query_engine, reranker, current_model, chunks_df=None, rerank_top_k=20):
 
242
  normalized_question = normalize_text(question)
243
  normalized_question_2, query_changes, change_list = normalize_steel_designations(question)
244
 
245
+ # Step 1: Keyword-based enhancement
246
+ enhanced_question = enhance_query_with_keywords(normalized_question_2)
247
 
248
+ # Step 2: LLM-based query expansion
249
  try:
250
  llm = get_llm_model(current_model)
251
  expansion_prompt = QUERY_EXPANSION_PROMPT.format(original_query=enhanced_question)
252
  expanded_queries = llm.complete(expansion_prompt).text.strip()
 
 
253
  enhanced_question = f"{enhanced_question} {expanded_queries}"
254
  log_message(f"LLM expanded query: {expanded_queries[:200]}...")
255
  except Exception as e: