ConversAI / src /components /loaders /youtubeLoader.py
techconsptrs's picture
INITIAL COMMIT
7e24b41
raw
history blame
819 Bytes
from langchain_community.document_loaders import YoutubeLoader
from src.utils.exceptions import CustomException
from src.utils.functions import cleanText
from src.utils.logging import logger
class YoutubeTranscriptLoader:
def __init__(self):
pass
def getTranscripts(self, urls: str):
texts = []
for url in set(urls):
try:
loader = YoutubeLoader.from_youtube_url(
url, add_video_info=False
)
doc = " ".join([x.page_content for x in loader.load()])
texts.append(cleanText(text = doc))
except Exception as e:
logger.error(CustomException(e))
doc = ""
texts.append(doc)
return "\n".join(texts)