Spaces:
Sleeping
Sleeping
| from langchain_community.document_loaders import YoutubeLoader | |
| from src.utils.exceptions import CustomException | |
| from src.utils.functions import cleanText | |
| from src.utils.logging import logger | |
| class YoutubeTranscriptLoader: | |
| def __init__(self): | |
| """Initialize the YoutubeTranscriptLoader.""" | |
| pass | |
| def getTranscripts(self, urls: str) -> str: | |
| """ | |
| Retrieve transcripts from a list of YouTube URLs. | |
| Args: | |
| urls (str): Comma-separated YouTube URLs to fetch transcripts from. | |
| Returns: | |
| str: Combined transcripts cleaned and joined by newlines. | |
| """ | |
| texts = [] | |
| for url in set(urls): | |
| try: | |
| loader = YoutubeLoader.from_youtube_url(url, add_video_info=False) | |
| doc = " ".join([x.page_content for x in loader.load()]) | |
| texts.append(cleanText(text=doc)) | |
| except Exception as e: | |
| logger.error(CustomException(e)) | |
| texts.append("") # Append an empty string on error | |
| return "\n".join(texts) |