MrSimple07 commited on
Commit
abdb242
·
1 Parent(s): bfd4369

fixing chunk size + overlap

Browse files
Files changed (2) hide show
  1. app.py +11 -0
  2. config_1.py +1 -1
app.py CHANGED
@@ -40,6 +40,8 @@ def initialize_system(repo_id, hf_token, download_dir, chunks_filename=None,
40
  try:
41
  log_message("Инициализация системы")
42
  os.makedirs(download_dir, exist_ok=True)
 
 
43
 
44
  embed_model = get_embedding_model()
45
  llm = get_llm_model(DEFAULT_MODEL)
@@ -47,6 +49,15 @@ def initialize_system(repo_id, hf_token, download_dir, chunks_filename=None,
47
 
48
  Settings.embed_model = embed_model
49
  Settings.llm = llm
 
 
 
 
 
 
 
 
 
50
 
51
  all_documents = []
52
  chunks_df = None
 
40
  try:
41
  log_message("Инициализация системы")
42
  os.makedirs(download_dir, exist_ok=True)
43
+ from config import CHUNK_SIZE, CHUNK_OVERLAP
44
+ from llama_index.core.text_splitter import SentenceSplitter
45
 
46
  embed_model = get_embedding_model()
47
  llm = get_llm_model(DEFAULT_MODEL)
 
49
 
50
  Settings.embed_model = embed_model
51
  Settings.llm = llm
52
+ Settings.text_splitter = SentenceSplitter(
53
+ chunk_size=CHUNK_SIZE,
54
+ chunk_overlap=CHUNK_OVERLAP,
55
+ separator=" "
56
+ )
57
+ # Add this after setting Settings
58
+ log_message(f"Configured chunk size: {CHUNK_SIZE}")
59
+ log_message(f"Configured chunk overlap: {CHUNK_OVERLAP}")
60
+ log_message(f"Settings text splitter chunk size: {Settings.text_splitter.chunk_size if hasattr(Settings, 'text_splitter') else 'Not set'}")
61
 
62
  all_documents = []
63
  chunks_df = None
config_1.py CHANGED
@@ -16,7 +16,7 @@ download_dir = "rag_files"
16
  HF_TOKEN = os.getenv('HF_TOKEN')
17
  GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
18
 
19
- CHUNK_SIZE = 1024
20
  CHUNK_OVERLAP = 256
21
 
22
 
 
16
  HF_TOKEN = os.getenv('HF_TOKEN')
17
  GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
18
 
19
+ CHUNK_SIZE = 2048
20
  CHUNK_OVERLAP = 256
21
 
22