Spaces:
Sleeping
Sleeping
File size: 1,934 Bytes
9c90775 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
from src.utils.asyncHandler import asyncHandler
import logging
from src.entity.config_entity import DataIngestionConfig
from src.utils.ingestion_utils import text_to_pdf,image_to_pdf,docs_to_pdf
from src.entity.artifact_entity import DataIngestionArtifact
import shutil
import os
class DataIngestion:
def __init__(self,data_ingestion_config:DataIngestionConfig):
self.data_ingestion_config=data_ingestion_config
@asyncHandler
async def ingest_data(self)->DataIngestionArtifact:
logging.info(f"Ensuring directory exists for: {self.data_ingestion_config.save_file_path}")
os.makedirs(os.path.dirname(self.data_ingestion_config.save_file_path), exist_ok=True)
match (self.data_ingestion_config.input_file_path.split(".")[-1]):
case "docx":
await docs_to_pdf(docs_path=self.data_ingestion_config.input_file_path,output_pdf_path=self.data_ingestion_config.save_file_path)
case "txt":
await text_to_pdf(file_path=self.data_ingestion_config.input_file_path,output_file_path=self.data_ingestion_config.save_file_path)
case "png":
await image_to_pdf(imge_path=self.data_ingestion_config.input_file_path,output_pdf_path=self.data_ingestion_config.save_file_path)
case "jpg":
await image_to_pdf(imge_path=self.data_ingestion_config.input_file_path,output_pdf_path=self.data_ingestion_config.save_file_path)
case "webp":
await image_to_pdf(imge_path=self.data_ingestion_config.input_file_path,output_pdf_path=self.data_ingestion_config.save_file_path)
case "pdf":
shutil.copy(src=self.data_ingestion_config.input_file_path, dst=self.data_ingestion_config.save_file_path)
return DataIngestionArtifact(ingested_file_path=self.data_ingestion_config.save_file_path)
|