Spaces:

MrSimple01
/

RAG_AIEXP_01

Sleeping

App Files Files Community

MrSimple07 commited on Oct 17, 2025

Commit

5789aa7

1 Parent(s): c3088ad

new keyword based enhancement + 3000, 30

Browse files

Files changed (2) hide show

config.py +1 -1
utils.py +18 -43

config.py CHANGED Viewed

@@ -51,7 +51,7 @@ DEFAULT_MODEL = "Gemini 2.5 Flash"
 CHUNK_SIZE = 1500
 CHUNK_OVERLAP = 128
-MAX_CHARS_TABLE = 4500
 MAX_ROWS_TABLE = 30

 CHUNK_SIZE = 1500
 CHUNK_OVERLAP = 128
+MAX_CHARS_TABLE = 3000
 MAX_ROWS_TABLE = 30

utils.py CHANGED Viewed

@@ -195,69 +195,46 @@ def debug_search_tables(vector_index, search_term="С-25"):
     return matching
-GENERIC_STEEL_CONTEXT = "стандарт ГОСТ технические условия марка материал применение сварка"
 from config import QUERY_EXPANSION_PROMPT
 from documents_prep import normalize_text, normalize_steel_designations
-STEEL_PRODUCT_EXPANSIONS = {
     "08X18H10T": ["Листы", "Трубы", "Поковки", "Крепежные изделия", "Сортовой прокат", "Отливки"],
     "12X18H10T": ["Листы", "Поковки", "Сортовой прокат"],
     "10X17H13M2T": ["Трубы", "Арматура", "Поковки", "Фланцы"],
     "20X23H18": ["Листы", "Сортовой прокат", "Поковки"],
     "03X17H14M3": ["Трубы", "Листы", "Проволока"],
     "59023.6": ["Режимы термической обработки стали 59023.6"],
 }
-def enhance_query_for_steel_grades(query):
-    """Expand query with steel grade specific context"""
-    import re
-    # FIX: Use the same pattern as normalize_steel_designations
-    # Pattern for regular steel grades: 08X18H10T, 12X18H10T, etc.
-    steel_pattern = r'\b\d{1,3}(?:[A-ZА-ЯЁ]\d*)+\b'
-    # Pattern for welding wires: СВ-08X19H10, CB-08X19H10
-    wire_pattern = r'\b[СC][ВB]-\d{1,3}(?:[A-ZА-ЯЁ]\d*)+\b'
-    matches = re.findall(steel_pattern, query, re.IGNORECASE)
-    wire_matches = re.findall(wire_pattern, query, re.IGNORECASE)
-    all_matches = matches + wire_matches
-    if not all_matches:
-        return query
-    # Collect context expansions
     added_context = []
-    grades_found = []
-    for match in all_matches:
-        match_upper = match.upper()
-        grades_found.append(match_upper)
-        # Check if we have specific context for this grade
-        if match_upper in STEEL_PRODUCT_EXPANSIONS:
-            context = ' '.join(STEEL_PRODUCT_EXPANSIONS[match_upper])
             added_context.append(context)
-            log_message(f"  Found specific context for {match_upper}: {context}")
-        else:
-            # Use generic context for unknown grades
-            added_context.append(GENERIC_STEEL_CONTEXT)
-            log_message(f"  Using generic context for {match_upper}")
     # Build enhanced query
     if added_context:
-        # Remove duplicates from context
         unique_context = ' '.join(set(' '.join(added_context).split()))
         enhanced = f"{query} {unique_context}"
-        log_message(f"Enhanced query for steel grades: {', '.join(grades_found)}")
         log_message(f"Added context: {unique_context[:100]}...")
         return enhanced
-    return query
 def answer_question(question, query_engine, reranker, current_model, chunks_df=None, rerank_top_k=20):
@@ -265,16 +242,14 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
     normalized_question = normalize_text(question)
     normalized_question_2, query_changes, change_list = normalize_steel_designations(question)
-    # Step 1: Keyword-based enhancement (existing)
-    enhanced_question = enhance_query_for_steel_grades(normalized_question_2)
-    # Step 2: LLM-based query expansion (NEW)
     try:
         llm = get_llm_model(current_model)
         expansion_prompt = QUERY_EXPANSION_PROMPT.format(original_query=enhanced_question)
         expanded_queries = llm.complete(expansion_prompt).text.strip()
-        # Combine original + expanded queries
         enhanced_question = f"{enhanced_question} {expanded_queries}"
         log_message(f"LLM expanded query: {expanded_queries[:200]}...")
     except Exception as e:

     return matching
 from config import QUERY_EXPANSION_PROMPT
 from documents_prep import normalize_text, normalize_steel_designations
+KEYWORD_EXPANSIONS = {
     "08X18H10T": ["Листы", "Трубы", "Поковки", "Крепежные изделия", "Сортовой прокат", "Отливки"],
     "12X18H10T": ["Листы", "Поковки", "Сортовой прокат"],
     "10X17H13M2T": ["Трубы", "Арматура", "Поковки", "Фланцы"],
     "20X23H18": ["Листы", "Сортовой прокат", "Поковки"],
     "03X17H14M3": ["Трубы", "Листы", "Проволока"],
     "59023.6": ["Режимы термической обработки стали 59023.6"],
+    "СВ-08X19H10": ["Сварочная проволока", "Сварка", "Сварочные материалы"],
 }
+def enhance_query_with_keywords(query):
+    query_upper = query.upper()
+    # Find matching keywords
     added_context = []
+    keywords_found = []
+    for keyword, expansions in KEYWORD_EXPANSIONS.items():
+        keyword_upper = keyword.upper()
+        # Check if keyword is in query (case-insensitive)
+        if keyword_upper in query_upper:
+            context = ' '.join(expansions)
             added_context.append(context)
+            keywords_found.append(keyword)
+            log_message(f"  Found keyword '{keyword}': added context '{context}'")
     # Build enhanced query
     if added_context:
         unique_context = ' '.join(set(' '.join(added_context).split()))
         enhanced = f"{query} {unique_context}"
+        log_message(f"Enhanced query with keywords: {', '.join(keywords_found)}")
         log_message(f"Added context: {unique_context[:100]}...")
         return enhanced
+    return f"{query}"
 def answer_question(question, query_engine, reranker, current_model, chunks_df=None, rerank_top_k=20):
     normalized_question = normalize_text(question)
     normalized_question_2, query_changes, change_list = normalize_steel_designations(question)
+    # Step 1: Keyword-based enhancement
+    enhanced_question = enhance_query_with_keywords(normalized_question_2)
+    # Step 2: LLM-based query expansion
     try:
         llm = get_llm_model(current_model)
         expansion_prompt = QUERY_EXPANSION_PROMPT.format(original_query=enhanced_question)
         expanded_queries = llm.complete(expansion_prompt).text.strip()
         enhanced_question = f"{enhanced_question} {expanded_queries}"
         log_message(f"LLM expanded query: {expanded_queries[:200]}...")
     except Exception as e: