Wajahat698 commited on
Commit
19ed182
·
verified ·
1 Parent(s): 59ed08b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -8
app.py CHANGED
@@ -422,23 +422,35 @@ def get_trust_tip_and_suggestion():
422
 
423
 
424
 
 
 
 
425
  def load_main_data_source():
 
 
 
426
  try:
427
- with open("./data_source/time_to_rethink_trust_book.md", "r") as f:
 
428
  main_content = f.read()
429
-
430
- # Split main content into chunks
431
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
 
 
 
432
  main_texts = text_splitter.split_text(main_content)
433
 
434
- # Create Document objects for the main data source
435
  main_documents = [Document(page_content=text) for text in main_texts]
436
  return main_documents
437
- except Exception as e:
438
- st.error(f"Error loading main data source: {e}")
439
- return []
440
 
 
 
 
441
 
 
 
442
 
443
 
444
 
 
422
 
423
 
424
 
425
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
426
+ from langchain.schema import Document
427
+
428
  def load_main_data_source():
429
+ """
430
+ Load the main data source, split it into chunks, and return Document objects.
431
+ """
432
  try:
433
+ # Load the main data source
434
+ with open("./data_source/time_to_rethink_trust_book.md", "r", encoding="utf-8") as f:
435
  main_content = f.read()
436
+
437
+ # Use a more robust text splitter
438
+ text_splitter = RecursiveCharacterTextSplitter(
439
+ chunk_size=2000, # Adjust the chunk size based on your LLM's token limit
440
+ chunk_overlap=500, # Add overlap to improve context continuity
441
+ )
442
  main_texts = text_splitter.split_text(main_content)
443
 
444
+ # Create Document objects for the split texts
445
  main_documents = [Document(page_content=text) for text in main_texts]
446
  return main_documents
 
 
 
447
 
448
+ except FileNotFoundError:
449
+ st.error("The file './data_source/time_to_rethink_trust_book.md' was not found.")
450
+ return []
451
 
452
+ except Exception as e:
453
+ st.error(f"An unexpected error occurred while loading
454
 
455
 
456