Spaces:

MrSimple01
/

RAG_AIEXP_01

Sleeping

MrSimple07 commited on Oct 14, 2025

Commit

6c839c3

1 Parent(s): 7c27a96

new normalizer C to Latin C + max table = 20, max chunk = 4000

Files changed (3) hide show

config.py CHANGED Viewed

@@ -52,7 +52,7 @@ DEFAULT_MODEL = "Gemini 2.5 Flash"
 CHUNK_SIZE = 1500
 CHUNK_OVERLAP = 128
-MAX_CHARS_TABLE = 3000
 MAX_ROWS_TABLE = 20
 CUSTOM_PROMPT = """

 CHUNK_SIZE = 1500
 CHUNK_OVERLAP = 128
+MAX_CHARS_TABLE = 4000
 MAX_ROWS_TABLE = 20
 CUSTOM_PROMPT = """

documents_prep.py CHANGED Viewed

@@ -35,14 +35,10 @@ def chunk_text_documents(documents):
     return chunked
 def normalize_text(text):
-    """
-    Normalize text by converting Latin C to Cyrillic С for consistency
-    This ensures "C-25" and "С-25" are treated as the same in search
-    """
     if not text:
         return text
-    # Replace Latin 'C' with Cyrillic 'С' (U+0421)
     # This is for welding types like C-25 -> С-25
     text = text.replace('С-', 'C')

     return chunked
 def normalize_text(text):
     if not text:
         return text
+    # Replace Cyrillic 'C' with Latin 'С' (U+0421)
     # This is for welding types like C-25 -> С-25
     text = text.replace('С-', 'C')

index_retriever.py CHANGED Viewed

@@ -71,12 +71,12 @@ def create_query_engine(vector_index):
         bm25_retriever = BM25Retriever.from_defaults(
             docstore=vector_index.docstore,
-            similarity_top_k=70
         )
         vector_retriever = VectorIndexRetriever(
             index=vector_index,
-            similarity_top_k=70,
             similarity_cutoff=0.45
         )

         bm25_retriever = BM25Retriever.from_defaults(
             docstore=vector_index.docstore,
+            similarity_top_k=80
         )
         vector_retriever = VectorIndexRetriever(
             index=vector_index,
+            similarity_top_k=80,
             similarity_cutoff=0.45
         )