ConversAI / src /pipelines /completePipeline.py
techconsptrs's picture
INITIAL COMMIT
7e24b41
raw
history blame
1.5 kB
from src.components.loaders.websiteCrawler import WebsiteCrawler
from src.components.loaders.youtubeLoader import YoutubeTranscriptLoader
from src.components.loaders.pdfLoader import PdfLoader
from src.components.rag.RAG import Chain
from dotenv import load_dotenv
load_dotenv("secrets.env")
class Pipeline:
def __init__(self):
self.pdfLoader = PdfLoader()
self.webCrawler = WebsiteCrawler()
self.youtubeLoader = YoutubeTranscriptLoader()
self.ragChain = Chain()
def plainText(self, text: str):
chain = self.ragChain.returnChain(text = text)
return chain
def searchablePdf(self, path: str):
extractedText = self.pdfLoader.searchablePdf(pdfPath = path)
chain = self.ragChain.returnChain(text = extractedText)
return chain
def scannablePdf(self, path: str):
extractedText = self.pdfLoader.scannablePdf(pdfPath = path)
chain = self.ragChain.returnChain(text = extractedText)
return chain
def webCrawl(self, urls: list[str]):
extractedText = self.webCrawler.extractTextFromUrlList(urls = urls)
chain = self.ragChain.returnChain(text = extractedText)
return chain
def youtubeLinks(self, urls: list[str]):
extractedText = self.youtubeLoader.getTranscripts(urls = urls)
print(extractedText)
chain = self.ragChain.returnChain(text = extractedText)
return chain