# document_processor.py import os import glob from tqdm import tqdm import pandas as pd from utils import clean_text, setup_logger logger = setup_logger('document_processor') def load_single_document(file_path): try: with open(file_path, 'r', encoding='utf-8') as file: content = clean_text(file.read()) return pd.DataFrame([{'path': file_path, 'content': content}]) except Exception as e: logger.error(f"Error reading {file_path}: {e}") return pd.DataFrame()