Ahmed-Alghamdi commited on
Commit
d12e375
·
verified ·
1 Parent(s): bf23ab2

Update document_processor.py

Browse files
Files changed (1) hide show
  1. document_processor.py +8 -10
document_processor.py CHANGED
@@ -7,13 +7,11 @@ from utils import clean_text, setup_logger
7
 
8
  logger = setup_logger('document_processor')
9
 
10
- def load_documents(folder_path):
11
- documents = []
12
- for file_path in tqdm(glob.glob(os.path.join(folder_path, '*.txt')), desc="Loading documents"):
13
- try:
14
- with open(file_path, 'r', encoding='utf-8') as file:
15
- content = clean_text(file.read())
16
- documents.append({'path': file_path, 'content': content})
17
- except Exception as e:
18
- logger.error(f"Error reading {file_path}: {e}")
19
- return pd.DataFrame(documents)
 
7
 
8
  logger = setup_logger('document_processor')
9
 
10
+ def load_single_document(file_path):
11
+ try:
12
+ with open(file_path, 'r', encoding='utf-8') as file:
13
+ content = clean_text(file.read())
14
+ return pd.DataFrame([{'path': file_path, 'content': content}])
15
+ except Exception as e:
16
+ logger.error(f"Error reading {file_path}: {e}")
17
+ return pd.DataFrame()