Spaces:

techconsptr
/

ConversAI

Sleeping

App Files Files Community

ConversAI / src /pipelines /completePipeline.py

techconsptrs

UPDATE: code update

1802405 over 1 year ago

raw

history blame contribute delete

2.66 kB

	from src.components.loaders.websiteCrawler import WebsiteCrawler
	from src.components.loaders.youtubeLoader import YoutubeTranscriptLoader
	from src.components.loaders.pdfLoader import PdfLoader
	from src.components.rag.RAG import Chain
	from dotenv import load_dotenv

	load_dotenv("secrets.env")

	class Pipeline:
	def __init__(self):
	"""Initialize the Pipeline with loaders and the RAG chain."""
	self.pdfLoader = PdfLoader()
	self.webCrawler = WebsiteCrawler()
	self.youtubeLoader = YoutubeTranscriptLoader()
	self.ragChain = Chain()

	def plainText(self, text: str):
	"""
	Process plain text through the RAG chain.

	Args:
	text (str): The input text to process.

	Returns:
	Chain: The processed chain for the input text.
	"""
	chain = self.ragChain.returnChain(text=text)
	return chain

	def searchablePdf(self, path: str):
	"""
	Process a searchable PDF file.

	Args:
	path (str): The path to the PDF file.

	Returns:
	Chain: The processed chain from the extracted text.
	"""
	extractedText = self.pdfLoader.searchablePdf(pdfPath=path)
	chain = self.ragChain.returnChain(text=extractedText)
	return chain

	def scannablePdf(self, path: str):
	"""
	Process a scannable PDF file.

	Args:
	path (str): The path to the PDF file.

	Returns:
	Chain: The processed chain from the extracted text.
	"""
	extractedText = self.pdfLoader.scannablePdf(pdfPath=path)
	chain = self.ragChain.returnChain(text=extractedText)
	return chain

	def webCrawl(self, urls: list[str]):
	"""
	Crawl the web for text extraction from provided URLs.

	Args:
	urls (list[str]): A list of URLs to crawl.

	Returns:
	Chain: The processed chain from the extracted text.
	"""
	extractedText = self.webCrawler.extractTextFromUrlList(urls=urls)
	chain = self.ragChain.returnChain(text=extractedText)
	return chain

	def youtubeLinks(self, urls: list[str]):
	"""
	Extract transcripts from YouTube links.

	Args:
	urls (list[str]): A list of YouTube video URLs.

	Returns:
	Chain: The processed chain from the extracted transcripts.
	"""
	extractedText = self.youtubeLoader.getTranscripts(urls=urls)
	chain = self.ragChain.returnChain(text=extractedText)
	return chain