Arabic-Rag-Chatbot / document_processor.py
Ahmed-Alghamdi's picture
Update document_processor.py
d12e375 verified
raw
history blame
518 Bytes
# document_processor.py
import os
import glob
from tqdm import tqdm
import pandas as pd
from utils import clean_text, setup_logger
logger = setup_logger('document_processor')
def load_single_document(file_path):
try:
with open(file_path, 'r', encoding='utf-8') as file:
content = clean_text(file.read())
return pd.DataFrame([{'path': file_path, 'content': content}])
except Exception as e:
logger.error(f"Error reading {file_path}: {e}")
return pd.DataFrame()