Spaces:
Sleeping
Sleeping
| from src.components.loaders.websiteCrawler import WebsiteCrawler | |
| from src.components.loaders.youtubeLoader import YoutubeTranscriptLoader | |
| from src.components.loaders.pdfLoader import PdfLoader | |
| from src.components.rag.RAG import Chain | |
| from dotenv import load_dotenv | |
| load_dotenv("secrets.env") | |
| class Pipeline: | |
| def __init__(self): | |
| self.pdfLoader = PdfLoader() | |
| self.webCrawler = WebsiteCrawler() | |
| self.youtubeLoader = YoutubeTranscriptLoader() | |
| self.ragChain = Chain() | |
| def plainText(self, text: str): | |
| chain = self.ragChain.returnChain(text = text) | |
| return chain | |
| def searchablePdf(self, path: str): | |
| extractedText = self.pdfLoader.searchablePdf(pdfPath = path) | |
| chain = self.ragChain.returnChain(text = extractedText) | |
| return chain | |
| def scannablePdf(self, path: str): | |
| extractedText = self.pdfLoader.scannablePdf(pdfPath = path) | |
| chain = self.ragChain.returnChain(text = extractedText) | |
| return chain | |
| def webCrawl(self, urls: list[str]): | |
| extractedText = self.webCrawler.extractTextFromUrlList(urls = urls) | |
| chain = self.ragChain.returnChain(text = extractedText) | |
| return chain | |
| def youtubeLinks(self, urls: list[str]): | |
| extractedText = self.youtubeLoader.getTranscripts(urls = urls) | |
| print(extractedText) | |
| chain = self.ragChain.returnChain(text = extractedText) | |
| return chain |