ocr / test_loader.py
Guhanselvam's picture
Upload 5 files
7262f10 verified
import os
# Mock loader classes for demonstration purposes
class PyPDFLoader:
def __init__(self, file_path):
self.file_path = file_path
print(f'PDF loader initialized with {file_path}')
class TextLoader:
def __init__(self, file_path):
self.file_path = file_path
print(f'Text loader initialized with {file_path}')
class CSVLoader:
def __init__(self, file_path):
self.file_path = file_path
print(f'CSV loader initialized with {file_path}')
# Function to determine the loader based on file extension
def get_loader_by_file_extension(temp_file):
if not isinstance(temp_file, str):
raise TypeError("Expected file path as a string.")
file_split = os.path.splitext(temp_file)
file_extension = file_split[1] # Extract the extension
print('file_extension - ', file_extension)
# Initialize loader based on file extension
if file_extension == '.pdf':
loader = PyPDFLoader(temp_file)
print('Loader Created for PDF file')
elif file_extension == '.txt':
loader = TextLoader(temp_file)
elif file_extension == '.csv':
loader = CSVLoader(temp_file)
else:
raise ValueError(f"Unsupported file type: {file_extension}")
return loader
# Test the function with different file types
if __name__ == "__main__":
test_files = [
"document.pdf",
"notes.txt",
"data.csv",
"image.jpg" # This should raise an error
]
for test_file in test_files:
print(f"\nTesting with file: {test_file}")
try:
loader = get_loader_by_file_extension(test_file)
except Exception as e:
print(f'Error: {e}')