MrSimple07 commited on
Commit
05ffb47
·
1 Parent(s): 0d6b2c5

new r + stee product expansion

Browse files
Files changed (1) hide show
  1. utils.py +46 -9
utils.py CHANGED
@@ -195,20 +195,57 @@ def debug_search_tables(vector_index, search_term="С-25"):
195
 
196
  return matching
197
 
 
 
 
198
  from documents_prep import normalize_text, normalize_steel_designations
199
 
 
 
 
 
 
 
 
 
 
200
  def enhance_query_for_steel_grades(query):
201
- """Expand query with related terms for better steel grade retrieval"""
202
- import re
203
-
204
- # Detect if query contains steel grades
205
- steel_pattern = r'\b\d{1,3}[XHТCВKMAPХНТСВКМАР]\d*[XHТCВKMAPХНТСВКМАР\d]*\b'
206
  matches = re.findall(steel_pattern, query, re.IGNORECASE)
 
 
 
207
 
208
- if matches:
209
- # Add contextual terms
210
- enhanced = query + " стандарт материал марка стали применение"
211
- log_message(f"Enhanced query with steel context: {enhanced}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  return enhanced
213
 
214
  return query
 
195
 
196
  return matching
197
 
198
+ GENERIC_STEEL_CONTEXT = "стандарт ГОСТ технические условия марка материал применение сварка"
199
+
200
+
201
  from documents_prep import normalize_text, normalize_steel_designations
202
 
203
+ STEEL_PRODUCT_EXPANSIONS = {
204
+ "08X18H10T": ["Листы", "Трубы", "Поковки", "Крепежные изделия", "Сортовой прокат", "Отливки"],
205
+ "12X18H10T": ["Листы", "Поковки", "Сортовой прокат"],
206
+ "10X17H13M2T": ["Трубы", "Арматура", "Поковки", "Фланцы"],
207
+ "20X23H18": ["Листы", "Сортовой прокат", "Поковки"],
208
+ "03X17H14M3": ["Трубы", "Листы", "Проволока"]
209
+ }
210
+
211
+
212
  def enhance_query_for_steel_grades(query):
213
+ """Expand query with steel grade specific context"""
214
+ import re
215
+ # Detect steel grades in query
216
+ steel_pattern = r'\b[СC][ВB]-\d{1,3}(?:[A-ZА-ЯЁ]\d*)+\b'
 
217
  matches = re.findall(steel_pattern, query, re.IGNORECASE)
218
+
219
+ if not matches:
220
+ return query
221
 
222
+ # Collect context expansions
223
+ added_context = []
224
+ grades_found = []
225
+
226
+ for match in matches:
227
+ match_upper = match.upper()
228
+ grades_found.append(match_upper)
229
+
230
+ # Check if we have specific context for this grade
231
+ if match_upper in STEEL_PRODUCT_EXPANSIONS:
232
+ context = STEEL_PRODUCT_EXPANSIONS[match_upper]
233
+ added_context.append(context)
234
+ log_message(f" Found specific context for {match_upper}")
235
+ else:
236
+ # Use generic context for unknown grades
237
+ added_context.append(GENERIC_STEEL_CONTEXT)
238
+ log_message(f" Using generic context for {match_upper}")
239
+
240
+ # Build enhanced query
241
+ if added_context:
242
+ # Remove duplicates from context
243
+ unique_context = ' '.join(set(' '.join(added_context).split()))
244
+ enhanced = f"{query} {unique_context}"
245
+
246
+ log_message(f"Enhanced query for steel grades: {', '.join(grades_found)}")
247
+ log_message(f"Added context: {unique_context[:100]}...")
248
+
249
  return enhanced
250
 
251
  return query