Spaces:

midlajvalappil
/

AI-Powered-YouTube-Transcript-Tutor

Sleeping

+# YouTube Transcript Chatbot Configuration
+app:
+  title: "AI-Powered YouTube Transcript Tutor"
+  description: "Ask questions from YouTube lecture transcripts using AI"
+  version: "1.0.0"
+ui:
+  theme: "light"  # light, dark, auto
+  sidebar_width: 300
+  max_chat_history_display: 50
+  enable_animations: true
+processing:
+  default_chunk_size: 1000
+  chunk_overlap: 200
+  max_transcript_length: 1000000  # 1MB
+  supported_languages: ["en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"]
+  default_language: "en"
+ai:
+  model_temperature: 0.7
+  max_tokens: 2000
+  retrieval_k: 4  # Number of documents to retrieve
+  chain_type: "stuff"  # stuff, map_reduce, refine, map_rerank
+export:
+  formats: ["pdf", "txt", "json"]
+  max_export_entries: 1000
+  pdf_page_size: "A4"
+cache:
+  enable_vectorstore_cache: true
+  cache_directory: "cache"
+  max_cache_size_mb: 500
+logging:
+  level: "INFO"  # DEBUG, INFO, WARNING, ERROR, CRITICAL
+  file: "logs/app.log"
+  max_file_size_mb: 10
+  backup_count: 5
+security:
+  max_url_length: 2048
+  allowed_domains: ["youtube.com", "youtu.be", "m.youtube.com"]
+  rate_limit_requests: 100
+  rate_limit_window_minutes: 60

src/config/huggingface.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+# Hugging Face Spaces specific configuration
+app:
+  title: "AI-Powered YouTube Transcript Tutor"
+  description: "Ask questions from YouTube lecture transcripts using AI"
+logging:
+  level: "INFO"
+  file: null  # Disable file logging
+cache:
+  enable_vectorstore_cache: false
+  cache_directory: null
+security:
+  youtube_api_fallback: true  # Enable fallback methods for YouTube API

src/config/settings.py ADDED Viewed

	@@ -0,0 +1,183 @@

+"""
+Configuration settings management.
+"""
+import os
+import yaml
+from typing import Dict, Any, Optional
+from pathlib import Path
+class Settings:
+    """Application settings manager."""
+    def __init__(self, config_file: str = "config/config.yaml"):
+        """
+        Initialize settings from config file and environment variables.
+        Args:
+            config_file (str): Path to configuration file
+        """
+        self.config_file = config_file
+        self.config = self._load_config()
+        self._override_with_env()
+    def _load_config(self) -> Dict[str, Any]:
+        """Load configuration from YAML file."""
+        try:
+            config_path = Path(self.config_file)
+            if config_path.exists():
+                with open(config_path, 'r', encoding='utf-8') as f:
+                    return yaml.safe_load(f) or {}
+            else:
+                return self._get_default_config()
+        except Exception as e:
+            print(f"Error loading config file: {e}")
+            return self._get_default_config()
+    def _get_default_config(self) -> Dict[str, Any]:
+        """Get default configuration."""
+        return {
+            'app': {
+                'title': 'AI-Powered YouTube Transcript Tutor',
+                'description': 'Ask questions from YouTube lecture transcripts using AI',
+                'version': '1.0.0'
+            },
+            'ui': {
+                'theme': 'light',
+                'sidebar_width': 300,
+                'max_chat_history_display': 50,
+                'enable_animations': True
+            },
+            'processing': {
+                'default_chunk_size': 1000,
+                'chunk_overlap': 200,
+                'max_transcript_length': 1000000,
+                'supported_languages': ['en', 'es', 'fr', 'de', 'it', 'pt', 'ru', 'ja', 'ko', 'zh'],
+                'default_language': 'en'
+            },
+            'ai': {
+                'model_temperature': 0.7,
+                'max_tokens': 2000,
+                'retrieval_k': 4,
+                'chain_type': 'stuff'
+            },
+            'export': {
+                'formats': ['pdf', 'txt', 'json'],
+                'max_export_entries': 1000,
+                'pdf_page_size': 'A4'
+            },
+            'cache': {
+                'enable_vectorstore_cache': True,
+                'cache_directory': 'cache',
+                'max_cache_size_mb': 500
+            },
+            'logging': {
+                'level': 'INFO',
+                'file': 'logs/app.log',
+                'max_file_size_mb': 10,
+                'backup_count': 5
+            },
+            'security': {
+                'max_url_length': 2048,
+                'allowed_domains': ['youtube.com', 'youtu.be', 'm.youtube.com'],
+                'rate_limit_requests': 100,
+                'rate_limit_window_minutes': 60
+            }
+        }
+    def _override_with_env(self):
+        """Override configuration with environment variables."""
+        # OpenAI API Key
+        openai_key = os.getenv('OPENAI_API_KEY')
+        if openai_key:
+            if 'ai' not in self.config:
+                self.config['ai'] = {}
+            self.config['ai']['openai_api_key'] = openai_key
+        # Log level
+        log_level = os.getenv('LOG_LEVEL')
+        if log_level:
+            self.config['logging']['level'] = log_level.upper()
+        # Cache directory
+        cache_dir = os.getenv('CACHE_DIRECTORY')
+        if cache_dir:
+            self.config['cache']['cache_directory'] = cache_dir
+    def get(self, key: str, default: Any = None) -> Any:
+        """
+        Get configuration value using dot notation.
+        Args:
+            key (str): Configuration key (e.g., 'app.title')
+            default (Any): Default value if key not found
+        Returns:
+            Any: Configuration value
+        """
+        keys = key.split('.')
+        value = self.config
+        try:
+            for k in keys:
+                value = value[k]
+            return value
+        except (KeyError, TypeError):
+            return default
+    def set(self, key: str, value: Any):
+        """
+        Set configuration value using dot notation.
+        Args:
+            key (str): Configuration key (e.g., 'app.title')
+            value (Any): Value to set
+        """
+        keys = key.split('.')
+        config = self.config
+        for k in keys[:-1]:
+            if k not in config:
+                config[k] = {}
+            config = config[k]
+        config[keys[-1]] = value
+    def get_openai_api_key(self) -> Optional[str]:
+        """Get OpenAI API key from config or environment."""
+        return self.get('ai.openai_api_key') or os.getenv('OPENAI_API_KEY')
+    def get_app_config(self) -> Dict[str, Any]:
+        """Get application configuration."""
+        return self.get('app', {})
+    def get_ui_config(self) -> Dict[str, Any]:
+        """Get UI configuration."""
+        return self.get('ui', {})
+    def get_processing_config(self) -> Dict[str, Any]:
+        """Get processing configuration."""
+        return self.get('processing', {})
+    def get_ai_config(self) -> Dict[str, Any]:
+        """Get AI configuration."""
+        return self.get('ai', {})
+    def get_export_config(self) -> Dict[str, Any]:
+        """Get export configuration."""
+        return self.get('export', {})
+    def get_cache_config(self) -> Dict[str, Any]:
+        """Get cache configuration."""
+        return self.get('cache', {})
+    def get_logging_config(self) -> Dict[str, Any]:
+        """Get logging configuration."""
+        return self.get('logging', {})
+    def get_security_config(self) -> Dict[str, Any]:
+        """Get security configuration."""
+        return self.get('security', {})
+# Global settings instance
+settings = Settings()

src/logs/app.log ADDED Viewed

	@@ -0,0 +1,145 @@

+2025-07-13 22:55:30,859 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-13 22:55:37,615 - src.utils.youtube_handler - ERROR - Unexpected error getting transcript: 'FetchedTranscriptSnippet' object is not subscriptable
+2025-07-13 22:56:36,886 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-13 22:56:40,977 - src.utils.youtube_handler - ERROR - Unexpected error getting transcript: 'FetchedTranscriptSnippet' object is not subscriptable
+2025-07-13 22:56:54,360 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-13 22:57:04,282 - src.utils.youtube_handler - ERROR - Unexpected error getting transcript: 'FetchedTranscriptSnippet' object is not subscriptable
+2025-07-13 22:58:12,592 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-13 22:58:21,552 - src.utils.youtube_handler - ERROR - Unexpected error getting transcript: 'FetchedTranscriptSnippet' object is not subscriptable
+2025-07-13 22:58:38,183 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-13 22:58:41,834 - src.utils.youtube_handler - ERROR - Unexpected error getting transcript: 'FetchedTranscriptSnippet' object is not subscriptable
+2025-07-13 22:59:16,207 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-13 22:59:22,975 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-13 22:59:23,716 - src.utils.youtube_handler - ERROR - Unexpected error getting transcript: 'FetchedTranscriptSnippet' object is not subscriptable
+2025-07-13 22:59:26,323 - src.utils.youtube_handler - ERROR - Unexpected error getting transcript: 'FetchedTranscriptSnippet' object is not subscriptable
+2025-07-13 23:01:13,950 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-13 23:01:35,772 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-13 23:01:35,774 - openai._base_client - INFO - Retrying request to /embeddings in 0.378161 seconds
+2025-07-13 23:01:37,503 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-13 23:01:37,505 - openai._base_client - INFO - Retrying request to /embeddings in 0.796060 seconds
+2025-07-13 23:01:39,284 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-13 23:01:39,286 - src.utils.text_processor - ERROR - Error creating vector store: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
+2025-07-13 23:02:22,588 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-13 23:02:36,283 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-13 23:02:36,285 - openai._base_client - INFO - Retrying request to /embeddings in 0.379324 seconds
+2025-07-13 23:02:37,475 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-13 23:02:37,476 - openai._base_client - INFO - Retrying request to /embeddings in 0.943958 seconds
+2025-07-13 23:02:39,327 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-13 23:02:39,328 - src.utils.text_processor - ERROR - Error creating vector store: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
+2025-07-13 23:09:22,969 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-13 23:09:26,985 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-13 23:09:26,986 - openai._base_client - INFO - Retrying request to /embeddings in 0.395765 seconds
+2025-07-13 23:09:27,911 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-13 23:09:27,913 - openai._base_client - INFO - Retrying request to /embeddings in 0.940555 seconds
+2025-07-13 23:09:29,552 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-13 23:09:29,554 - src.utils.text_processor - WARNING - OpenAI embeddings failed: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
+2025-07-13 23:09:29,554 - src.utils.text_processor - INFO - Using simple text-based fallback
+2025-07-13 23:09:29,554 - src.utils.text_processor - INFO - Created simple text-based fallback vector store
+2025-07-13 23:09:29,555 - src.utils.text_processor - INFO - Using simple fallback QA system
+2025-07-13 23:15:01,397 - src.utils.logger - INFO - Custom CSS loaded successfully
+2025-07-13 23:15:05,056 - src.utils.logger - INFO - Custom CSS loaded successfully
+2025-07-13 23:15:15,923 - src.utils.logger - INFO - Custom CSS loaded successfully
+2025-07-13 23:15:17,491 - src.utils.logger - INFO - Custom CSS loaded successfully
+2025-07-13 23:15:19,654 - src.utils.logger - INFO - Custom CSS loaded successfully
+2025-07-13 23:15:23,535 - src.utils.logger - INFO - Custom CSS loaded successfully
+2025-07-13 23:16:06,979 - src.utils.logger - INFO - Custom CSS loaded successfully
+2025-07-13 23:21:32,219 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-13 23:21:43,012 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-13 23:21:43,013 - openai._base_client - INFO - Retrying request to /embeddings in 0.396331 seconds
+2025-07-13 23:21:44,678 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-13 23:21:44,680 - openai._base_client - INFO - Retrying request to /embeddings in 0.842338 seconds
+2025-07-13 23:21:47,127 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-13 23:21:47,128 - src.utils.text_processor - WARNING - OpenAI embeddings failed: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
+2025-07-13 23:21:47,129 - src.utils.text_processor - INFO - Using simple text-based fallback
+2025-07-13 23:21:47,129 - src.utils.text_processor - INFO - Created simple text-based fallback vector store
+2025-07-13 23:21:47,129 - src.utils.text_processor - INFO - Using simple fallback QA system
+2025-07-13 23:22:46,498 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-13 23:22:49,535 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-13 23:53:47,078 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-13 23:53:52,909 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-13 23:53:59,444 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-13 23:53:59,609 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-13 23:54:00,519 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-13 23:54:07,685 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-13 23:54:07,688 - openai._base_client - INFO - Retrying request to /embeddings in 0.454709 seconds
+2025-07-13 23:54:08,673 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-13 23:54:08,674 - openai._base_client - INFO - Retrying request to /embeddings in 0.918276 seconds
+2025-07-13 23:54:10,652 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-13 23:54:10,656 - src.utils.text_processor - WARNING - OpenAI embeddings failed: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
+2025-07-13 23:54:10,657 - src.utils.text_processor - INFO - Using simple text-based fallback
+2025-07-13 23:54:10,659 - src.utils.text_processor - INFO - Created simple text-based fallback vector store
+2025-07-13 23:54:10,660 - src.utils.text_processor - INFO - Using simple fallback QA system
+2025-07-13 23:54:22,185 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-13 23:54:24,094 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 00:42:29,448 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 00:42:47,515 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 00:42:47,673 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 00:42:48,260 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-14 00:42:52,222 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-14 00:42:52,224 - openai._base_client - INFO - Retrying request to /embeddings in 0.396998 seconds
+2025-07-14 00:42:53,417 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-14 00:42:53,419 - openai._base_client - INFO - Retrying request to /embeddings in 0.829603 seconds
+2025-07-14 00:42:54,708 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-14 00:42:54,713 - src.utils.text_processor - WARNING - OpenAI embeddings failed: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
+2025-07-14 00:42:54,715 - src.utils.text_processor - INFO - Using simple text-based fallback
+2025-07-14 00:42:54,717 - src.utils.text_processor - INFO - Created simple text-based fallback vector store
+2025-07-14 00:42:54,718 - src.utils.text_processor - INFO - Using simple fallback QA system
+2025-07-14 00:50:26,573 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:00:15,758 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:00:23,869 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:00:24,021 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:00:24,480 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-14 01:00:26,491 - src.utils.youtube_handler - INFO - Successfully got transcript in en
+2025-07-14 01:00:28,434 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-14 01:00:28,436 - openai._base_client - INFO - Retrying request to /embeddings in 0.464677 seconds
+2025-07-14 01:00:29,888 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-14 01:00:29,889 - openai._base_client - INFO - Retrying request to /embeddings in 0.932156 seconds
+2025-07-14 01:00:31,765 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
+2025-07-14 01:00:31,768 - src.utils.text_processor - WARNING - OpenAI embeddings failed: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
+2025-07-14 01:00:31,770 - src.utils.text_processor - INFO - Using simple text-based fallback
+2025-07-14 01:00:31,771 - src.utils.text_processor - INFO - Created simple text-based fallback vector store
+2025-07-14 01:00:31,772 - src.utils.text_processor - INFO - Using simple fallback QA system
+2025-07-14 01:01:09,650 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:32,106 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:32,361 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:33,814 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:33,972 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:34,156 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:34,479 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:34,750 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:34,892 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:35,077 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:35,399 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:38,181 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:38,429 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:39,552 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:39,800 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:39,822 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:39,989 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:40,370 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:01:41,960 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:18,892 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:53,458 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:53,729 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:54,391 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:54,504 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:54,695 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:54,820 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:55,091 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:55,861 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:55,967 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:56,154 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:56,488 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:56,756 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:56,902 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:57,250 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:58,213 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:58,299 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:58,452 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:58,584 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:58,780 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:59,089 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:14:59,926 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:15:00,032 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:15:00,208 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully
+2025-07-14 01:15:00,533 - src.utils.logger - INFO - Custom dark theme CSS loaded successfully

src/logs/youtube_chatbot_20250714.log ADDED Viewed

	@@ -0,0 +1,12 @@

+2025-07-14 00:58:58,526 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-14 00:59:00,597 - src.utils.youtube_handler - INFO - Successfully got transcript in en
+2025-07-14 00:59:00,784 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-14 00:59:02,824 - src.utils.youtube_handler - INFO - Successfully got transcript in en
+2025-07-14 00:59:03,041 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-14 00:59:05,007 - src.utils.youtube_handler - INFO - Successfully got transcript in en
+2025-07-14 00:59:05,209 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-14 00:59:07,507 - src.utils.youtube_handler - INFO - Successfully got transcript in en
+2025-07-14 01:12:20,696 - src.utils.youtube_handler - ERROR - Error getting video metadata: HTTP Error 400: Bad Request
+2025-07-14 01:12:20,697 - src.utils.youtube_handler - INFO - Rate limiting: sleeping for 2.69 seconds
+2025-07-14 01:12:24,675 - src.utils.youtube_handler - INFO - Rate limiting: sleeping for 1.72 seconds
+2025-07-14 01:12:27,701 - src.utils.youtube_handler - INFO - Successfully got transcript in en on attempt 1

src/src/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+# YouTube Transcript Chatbot Package
+__version__ = "1.0.0"
+__author__ = "YouTube Transcript Chatbot Team"

src/src/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (226 Bytes). View file

src/src/utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Utilities package

src/src/utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (160 Bytes). View file

src/src/utils/__pycache__/export_utils.cpython-310.pyc ADDED Viewed

Binary file (7.4 kB). View file

src/src/utils/__pycache__/logger.cpython-310.pyc ADDED Viewed

Binary file (1.38 kB). View file

src/src/utils/__pycache__/session_manager.cpython-310.pyc ADDED Viewed

Binary file (6.49 kB). View file

src/src/utils/__pycache__/text_processor.cpython-310.pyc ADDED Viewed

Binary file (12.7 kB). View file

src/src/utils/__pycache__/youtube_handler.cpython-310.pyc ADDED Viewed

Binary file (10.8 kB). View file

src/src/utils/cache_manager.py ADDED Viewed

	@@ -0,0 +1,374 @@

+"""
+Cache management utilities for vector stores and processed data.
+"""
+import os
+import pickle
+import hashlib
+import logging
+from datetime import datetime, timedelta
+from typing import Any, Optional, Dict
+from pathlib import Path
+import shutil
+logger = logging.getLogger(__name__)
+class CacheManager:
+    """Manages caching of vector stores and processed data."""
+    def __init__(self, cache_dir: str = "cache", max_size_mb: int = 500):
+        """
+        Initialize cache manager.
+        Args:
+            cache_dir (str): Cache directory path
+            max_size_mb (int): Maximum cache size in MB
+        """
+        self.cache_dir = Path(cache_dir)
+        self.max_size_bytes = max_size_mb * 1024 * 1024
+        self.ensure_cache_directory()
+    def ensure_cache_directory(self):
+        """Ensure cache directory exists."""
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        # Create subdirectories
+        (self.cache_dir / "vectorstores").mkdir(exist_ok=True)
+        (self.cache_dir / "transcripts").mkdir(exist_ok=True)
+        (self.cache_dir / "metadata").mkdir(exist_ok=True)
+    def _get_cache_key(self, data: str) -> str:
+        """
+        Generate cache key from data.
+        Args:
+            data (str): Data to generate key for
+        Returns:
+            str: Cache key
+        """
+        return hashlib.md5(data.encode()).hexdigest()
+    def _get_cache_path(self, cache_type: str, key: str) -> Path:
+        """
+        Get cache file path.
+        Args:
+            cache_type (str): Type of cache (vectorstores, transcripts, metadata)
+            key (str): Cache key
+        Returns:
+            Path: Cache file path
+        """
+        return self.cache_dir / cache_type / f"{key}.pkl"
+    def save_vectorstore(self, video_id: str, vectorstore: Any) -> bool:
+        """
+        Save vector store to cache.
+        Args:
+            video_id (str): Video ID
+            vectorstore (Any): Vector store object
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            cache_key = self._get_cache_key(video_id)
+            cache_path = self._get_cache_path("vectorstores", cache_key)
+            # Save vector store using FAISS's built-in save method
+            vectorstore.save_local(str(cache_path.with_suffix("")))
+            # Save metadata
+            metadata = {
+                'video_id': video_id,
+                'created_at': datetime.now().isoformat(),
+                'cache_key': cache_key
+            }
+            metadata_path = self._get_cache_path("metadata", cache_key)
+            with open(metadata_path, 'wb') as f:
+                pickle.dump(metadata, f)
+            logger.info(f"Vector store cached for video {video_id}")
+            self._cleanup_cache()
+            return True
+        except Exception as e:
+            logger.error(f"Error caching vector store: {e}")
+            return False
+    def load_vectorstore(self, video_id: str, embeddings: Any) -> Optional[Any]:
+        """
+        Load vector store from cache.
+        Args:
+            video_id (str): Video ID
+            embeddings (Any): Embeddings object for loading
+        Returns:
+            Optional[Any]: Vector store object or None if not found
+        """
+        try:
+            cache_key = self._get_cache_key(video_id)
+            cache_path = self._get_cache_path("vectorstores", cache_key)
+            if not cache_path.with_suffix("").exists():
+                return None
+            # Load vector store using FAISS's built-in load method
+            from langchain_community.vectorstores import FAISS
+            vectorstore = FAISS.load_local(str(cache_path.with_suffix("")), embeddings)
+            logger.info(f"Vector store loaded from cache for video {video_id}")
+            return vectorstore
+        except Exception as e:
+            logger.error(f"Error loading vector store from cache: {e}")
+            return None
+    def save_transcript(self, video_id: str, transcript_data: Dict[str, Any]) -> bool:
+        """
+        Save transcript data to cache.
+        Args:
+            video_id (str): Video ID
+            transcript_data (Dict[str, Any]): Transcript data
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            cache_key = self._get_cache_key(video_id)
+            cache_path = self._get_cache_path("transcripts", cache_key)
+            cache_data = {
+                'video_id': video_id,
+                'transcript_data': transcript_data,
+                'created_at': datetime.now().isoformat(),
+                'cache_key': cache_key
+            }
+            with open(cache_path, 'wb') as f:
+                pickle.dump(cache_data, f)
+            logger.info(f"Transcript cached for video {video_id}")
+            self._cleanup_cache()
+            return True
+        except Exception as e:
+            logger.error(f"Error caching transcript: {e}")
+            return False
+    def load_transcript(self, video_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Load transcript data from cache.
+        Args:
+            video_id (str): Video ID
+        Returns:
+            Optional[Dict[str, Any]]: Transcript data or None if not found
+        """
+        try:
+            cache_key = self._get_cache_key(video_id)
+            cache_path = self._get_cache_path("transcripts", cache_key)
+            if not cache_path.exists():
+                return None
+            with open(cache_path, 'rb') as f:
+                cache_data = pickle.load(f)
+            logger.info(f"Transcript loaded from cache for video {video_id}")
+            return cache_data['transcript_data']
+        except Exception as e:
+            logger.error(f"Error loading transcript from cache: {e}")
+            return None
+    def is_cached(self, video_id: str, cache_type: str = "vectorstores") -> bool:
+        """
+        Check if data is cached for video.
+        Args:
+            video_id (str): Video ID
+            cache_type (str): Type of cache to check
+        Returns:
+            bool: True if cached, False otherwise
+        """
+        try:
+            cache_key = self._get_cache_key(video_id)
+            if cache_type == "vectorstores":
+                cache_path = self._get_cache_path("vectorstores", cache_key)
+                return cache_path.with_suffix("").exists()
+            else:
+                cache_path = self._get_cache_path(cache_type, cache_key)
+                return cache_path.exists()
+        except Exception as e:
+            logger.error(f"Error checking cache: {e}")
+            return False
+    def delete_cache(self, video_id: str) -> bool:
+        """
+        Delete cached data for video.
+        Args:
+            video_id (str): Video ID
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            cache_key = self._get_cache_key(video_id)
+            # Delete vector store cache
+            vectorstore_path = self._get_cache_path("vectorstores", cache_key)
+            if vectorstore_path.with_suffix("").exists():
+                shutil.rmtree(vectorstore_path.with_suffix(""))
+            # Delete transcript cache
+            transcript_path = self._get_cache_path("transcripts", cache_key)
+            if transcript_path.exists():
+                transcript_path.unlink()
+            # Delete metadata cache
+            metadata_path = self._get_cache_path("metadata", cache_key)
+            if metadata_path.exists():
+                metadata_path.unlink()
+            logger.info(f"Cache deleted for video {video_id}")
+            return True
+        except Exception as e:
+            logger.error(f"Error deleting cache: {e}")
+            return False
+    def get_cache_size(self) -> Dict[str, Any]:
+        """
+        Get cache size information.
+        Returns:
+            Dict[str, Any]: Cache size information
+        """
+        try:
+            total_size = 0
+            file_count = 0
+            for root, dirs, files in os.walk(self.cache_dir):
+                for file in files:
+                    file_path = os.path.join(root, file)
+                    if os.path.exists(file_path):
+                        total_size += os.path.getsize(file_path)
+                        file_count += 1
+            return {
+                'total_size_bytes': total_size,
+                'total_size_mb': round(total_size / (1024 * 1024), 2),
+                'file_count': file_count,
+                'max_size_mb': self.max_size_bytes / (1024 * 1024),
+                'usage_percent': round((total_size / self.max_size_bytes) * 100, 2)
+            }
+        except Exception as e:
+            logger.error(f"Error getting cache size: {e}")
+            return {}
+    def _cleanup_cache(self):
+        """Clean up cache if it exceeds maximum size."""
+        try:
+            cache_info = self.get_cache_size()
+            if cache_info.get('total_size_bytes', 0) > self.max_size_bytes:
+                logger.info("Cache size exceeded, cleaning up...")
+                # Get all cache files with their modification times
+                cache_files = []
+                for root, dirs, files in os.walk(self.cache_dir):
+                    for file in files:
+                        file_path = os.path.join(root, file)
+                        if os.path.exists(file_path):
+                            mtime = os.path.getmtime(file_path)
+                            cache_files.append((file_path, mtime))
+                # Sort by modification time (oldest first)
+                cache_files.sort(key=lambda x: x[1])
+                # Delete oldest files until under limit
+                current_size = cache_info.get('total_size_bytes', 0)
+                target_size = self.max_size_bytes * 0.8  # Clean to 80% of max
+                for file_path, _ in cache_files:
+                    if current_size <= target_size:
+                        break
+                    try:
+                        file_size = os.path.getsize(file_path)
+                        os.remove(file_path)
+                        current_size -= file_size
+                        logger.debug(f"Deleted cache file: {file_path}")
+                    except Exception as e:
+                        logger.error(f"Error deleting cache file {file_path}: {e}")
+                logger.info("Cache cleanup completed")
+        except Exception as e:
+            logger.error(f"Error during cache cleanup: {e}")
+    def clear_all_cache(self) -> bool:
+        """
+        Clear all cached data.
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            if self.cache_dir.exists():
+                shutil.rmtree(self.cache_dir)
+                self.ensure_cache_directory()
+            logger.info("All cache cleared")
+            return True
+        except Exception as e:
+            logger.error(f"Error clearing cache: {e}")
+            return False
+    def get_cached_videos(self) -> List[Dict[str, Any]]:
+        """
+        Get list of cached videos.
+        Returns:
+            List[Dict[str, Any]]: List of cached video information
+        """
+        try:
+            cached_videos = []
+            metadata_dir = self.cache_dir / "metadata"
+            if not metadata_dir.exists():
+                return cached_videos
+            for metadata_file in metadata_dir.glob("*.pkl"):
+                try:
+                    with open(metadata_file, 'rb') as f:
+                        metadata = pickle.load(f)
+                    cached_videos.append({
+                        'video_id': metadata.get('video_id'),
+                        'cache_key': metadata.get('cache_key'),
+                        'created_at': metadata.get('created_at'),
+                        'file_size': metadata_file.stat().st_size
+                    })
+                except Exception as e:
+                    logger.error(f"Error reading metadata file {metadata_file}: {e}")
+            return cached_videos
+        except Exception as e:
+            logger.error(f"Error getting cached videos: {e}")
+            return []

src/src/utils/database.py ADDED Viewed

	@@ -0,0 +1,373 @@

+"""
+Database utilities for storing processed videos and conversations.
+"""
+import sqlite3
+import json
+import logging
+from datetime import datetime
+from typing import Dict, List, Any, Optional
+from pathlib import Path
+logger = logging.getLogger(__name__)
+class DatabaseManager:
+    """Manages SQLite database operations for the chatbot."""
+    def __init__(self, db_path: str = "data/chatbot.db"):
+        """
+        Initialize database manager.
+        Args:
+            db_path (str): Path to SQLite database file
+        """
+        self.db_path = db_path
+        self.ensure_db_directory()
+        self.init_database()
+    def ensure_db_directory(self):
+        """Ensure database directory exists."""
+        db_dir = Path(self.db_path).parent
+        db_dir.mkdir(parents=True, exist_ok=True)
+    def init_database(self):
+        """Initialize database tables."""
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                # Videos table
+                cursor.execute('''
+                    CREATE TABLE IF NOT EXISTS videos (
+                        id INTEGER PRIMARY KEY AUTOINCREMENT,
+                        video_id TEXT UNIQUE NOT NULL,
+                        url TEXT NOT NULL,
+                        title TEXT,
+                        author TEXT,
+                        duration INTEGER,
+                        views INTEGER,
+                        publish_date TEXT,
+                        thumbnail_url TEXT,
+                        transcript TEXT,
+                        metadata TEXT,
+                        processed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                        language TEXT DEFAULT 'en'
+                    )
+                ''')
+                # Conversations table
+                cursor.execute('''
+                    CREATE TABLE IF NOT EXISTS conversations (
+                        id INTEGER PRIMARY KEY AUTOINCREMENT,
+                        conversation_id TEXT NOT NULL,
+                        video_id TEXT,
+                        question TEXT NOT NULL,
+                        answer TEXT NOT NULL,
+                        source_documents TEXT,
+                        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                        FOREIGN KEY (video_id) REFERENCES videos (video_id)
+                    )
+                ''')
+                # Vector stores table (for caching)
+                cursor.execute('''
+                    CREATE TABLE IF NOT EXISTS vector_stores (
+                        id INTEGER PRIMARY KEY AUTOINCREMENT,
+                        video_id TEXT UNIQUE NOT NULL,
+                        vector_data BLOB,
+                        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                        FOREIGN KEY (video_id) REFERENCES videos (video_id)
+                    )
+                ''')
+                # User sessions table
+                cursor.execute('''
+                    CREATE TABLE IF NOT EXISTS user_sessions (
+                        id INTEGER PRIMARY KEY AUTOINCREMENT,
+                        session_id TEXT UNIQUE NOT NULL,
+                        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                        last_activity TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                        metadata TEXT
+                    )
+                ''')
+                conn.commit()
+                logger.info("Database initialized successfully")
+        except Exception as e:
+            logger.error(f"Error initializing database: {e}")
+            raise
+    def save_video(self, video_data: Dict[str, Any]) -> bool:
+        """
+        Save video information to database.
+        Args:
+            video_data (Dict[str, Any]): Video data including metadata and transcript
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    INSERT OR REPLACE INTO videos
+                    (video_id, url, title, author, duration, views, publish_date,
+                     thumbnail_url, transcript, metadata, language)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                ''', (
+                    video_data.get('video_id'),
+                    video_data.get('url'),
+                    video_data.get('title'),
+                    video_data.get('author'),
+                    video_data.get('duration'),
+                    video_data.get('views'),
+                    video_data.get('publish_date'),
+                    video_data.get('thumbnail_url'),
+                    video_data.get('transcript'),
+                    json.dumps(video_data.get('metadata', {})),
+                    video_data.get('language', 'en')
+                ))
+                conn.commit()
+                return True
+        except Exception as e:
+            logger.error(f"Error saving video: {e}")
+            return False
+    def get_video(self, video_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get video information from database.
+        Args:
+            video_id (str): Video ID
+        Returns:
+            Optional[Dict[str, Any]]: Video data or None if not found
+        """
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    SELECT video_id, url, title, author, duration, views,
+                           publish_date, thumbnail_url, transcript, metadata,
+                           processed_at, language
+                    FROM videos WHERE video_id = ?
+                ''', (video_id,))
+                row = cursor.fetchone()
+                if row:
+                    return {
+                        'video_id': row[0],
+                        'url': row[1],
+                        'title': row[2],
+                        'author': row[3],
+                        'duration': row[4],
+                        'views': row[5],
+                        'publish_date': row[6],
+                        'thumbnail_url': row[7],
+                        'transcript': row[8],
+                        'metadata': json.loads(row[9]) if row[9] else {},
+                        'processed_at': row[10],
+                        'language': row[11]
+                    }
+        except Exception as e:
+            logger.error(f"Error getting video: {e}")
+        return None
+    def save_conversation(self, conversation_data: Dict[str, Any]) -> bool:
+        """
+        Save conversation entry to database.
+        Args:
+            conversation_data (Dict[str, Any]): Conversation data
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    INSERT INTO conversations
+                    (conversation_id, video_id, question, answer, source_documents)
+                    VALUES (?, ?, ?, ?, ?)
+                ''', (
+                    conversation_data.get('conversation_id'),
+                    conversation_data.get('video_id'),
+                    conversation_data.get('question'),
+                    conversation_data.get('answer'),
+                    json.dumps(conversation_data.get('source_documents', []))
+                ))
+                conn.commit()
+                return True
+        except Exception as e:
+            logger.error(f"Error saving conversation: {e}")
+            return False
+    def get_conversations(self, video_id: str = None, conversation_id: str = None,
+                         limit: int = 100) -> List[Dict[str, Any]]:
+        """
+        Get conversations from database.
+        Args:
+            video_id (str): Optional video ID filter
+            conversation_id (str): Optional conversation ID filter
+            limit (int): Maximum number of results
+        Returns:
+            List[Dict[str, Any]]: List of conversations
+        """
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                query = '''
+                    SELECT conversation_id, video_id, question, answer,
+                           source_documents, created_at
+                    FROM conversations
+                '''
+                params = []
+                conditions = []
+                if video_id:
+                    conditions.append('video_id = ?')
+                    params.append(video_id)
+                if conversation_id:
+                    conditions.append('conversation_id = ?')
+                    params.append(conversation_id)
+                if conditions:
+                    query += ' WHERE ' + ' AND '.join(conditions)
+                query += ' ORDER BY created_at DESC LIMIT ?'
+                params.append(limit)
+                cursor.execute(query, params)
+                rows = cursor.fetchall()
+                conversations = []
+                for row in rows:
+                    conversations.append({
+                        'conversation_id': row[0],
+                        'video_id': row[1],
+                        'question': row[2],
+                        'answer': row[3],
+                        'source_documents': json.loads(row[4]) if row[4] else [],
+                        'created_at': row[5]
+                    })
+                return conversations
+        except Exception as e:
+            logger.error(f"Error getting conversations: {e}")
+            return []
+    def get_processed_videos(self, limit: int = 50) -> List[Dict[str, Any]]:
+        """
+        Get list of processed videos.
+        Args:
+            limit (int): Maximum number of results
+        Returns:
+            List[Dict[str, Any]]: List of processed videos
+        """
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    SELECT video_id, title, author, duration, processed_at
+                    FROM videos
+                    ORDER BY processed_at DESC
+                    LIMIT ?
+                ''', (limit,))
+                rows = cursor.fetchall()
+                videos = []
+                for row in rows:
+                    videos.append({
+                        'video_id': row[0],
+                        'title': row[1],
+                        'author': row[2],
+                        'duration': row[3],
+                        'processed_at': row[4]
+                    })
+                return videos
+        except Exception as e:
+            logger.error(f"Error getting processed videos: {e}")
+            return []
+    def delete_video(self, video_id: str) -> bool:
+        """
+        Delete video and associated conversations.
+        Args:
+            video_id (str): Video ID to delete
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                # Delete conversations first (foreign key constraint)
+                cursor.execute('DELETE FROM conversations WHERE video_id = ?', (video_id,))
+                cursor.execute('DELETE FROM vector_stores WHERE video_id = ?', (video_id,))
+                cursor.execute('DELETE FROM videos WHERE video_id = ?', (video_id,))
+                conn.commit()
+                return True
+        except Exception as e:
+            logger.error(f"Error deleting video: {e}")
+            return False
+    def get_database_stats(self) -> Dict[str, Any]:
+        """
+        Get database statistics.
+        Returns:
+            Dict[str, Any]: Database statistics
+        """
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                # Count videos
+                cursor.execute('SELECT COUNT(*) FROM videos')
+                video_count = cursor.fetchone()[0]
+                # Count conversations
+                cursor.execute('SELECT COUNT(*) FROM conversations')
+                conversation_count = cursor.fetchone()[0]
+                # Get database size
+                cursor.execute("SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()")
+                db_size = cursor.fetchone()[0]
+                return {
+                    'total_videos': video_count,
+                    'total_conversations': conversation_count,
+                    'database_size_bytes': db_size,
+                    'database_size_mb': round(db_size / (1024 * 1024), 2)
+                }
+        except Exception as e:
+            logger.error(f"Error getting database stats: {e}")
+            return {}

src/src/utils/export_utils.py ADDED Viewed

	@@ -0,0 +1,262 @@

+"""
+Export utilities for generating PDF, text, and other format exports.
+"""
+import io
+import json
+import logging
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+from reportlab.lib.pagesizes import letter, A4
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
+from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+from reportlab.lib.units import inch
+from reportlab.lib.colors import HexColor
+logger = logging.getLogger(__name__)
+class ExportUtils:
+    """Utilities for exporting chat history and transcripts in various formats."""
+    def __init__(self):
+        self.styles = getSampleStyleSheet()
+        self.setup_custom_styles()
+    def setup_custom_styles(self):
+        """Setup custom styles for PDF generation."""
+        self.styles.add(ParagraphStyle(
+            name='CustomTitle',
+            parent=self.styles['Heading1'],
+            fontSize=16,
+            spaceAfter=30,
+            textColor=HexColor('#2E86AB')
+        ))
+        self.styles.add(ParagraphStyle(
+            name='QuestionStyle',
+            parent=self.styles['Normal'],
+            fontSize=12,
+            spaceAfter=10,
+            textColor=HexColor('#A23B72'),
+            leftIndent=20
+        ))
+        self.styles.add(ParagraphStyle(
+            name='AnswerStyle',
+            parent=self.styles['Normal'],
+            fontSize=11,
+            spaceAfter=20,
+            leftIndent=40
+        ))
+    def export_to_pdf(self, chat_history: List[Dict[str, Any]],
+                     video_metadata: Dict[str, Any] = None) -> bytes:
+        """
+        Export chat history to PDF format.
+        Args:
+            chat_history (List[Dict[str, Any]]): Chat history entries
+            video_metadata (Dict[str, Any]): Video metadata
+        Returns:
+            bytes: PDF content as bytes
+        """
+        try:
+            buffer = io.BytesIO()
+            doc = SimpleDocTemplate(buffer, pagesize=A4)
+            story = []
+            # Title
+            title = "YouTube Transcript Q&A Session"
+            story.append(Paragraph(title, self.styles['CustomTitle']))
+            story.append(Spacer(1, 12))
+            # Video information
+            if video_metadata:
+                story.append(Paragraph("Video Information", self.styles['Heading2']))
+                story.append(Paragraph(f"<b>Title:</b> {video_metadata.get('title', 'N/A')}",
+                                     self.styles['Normal']))
+                story.append(Paragraph(f"<b>Author:</b> {video_metadata.get('author', 'N/A')}",
+                                     self.styles['Normal']))
+                story.append(Paragraph(f"<b>Duration:</b> {self._format_duration(video_metadata.get('length', 0))}",
+                                     self.styles['Normal']))
+                story.append(Spacer(1, 20))
+            # Export information
+            story.append(Paragraph(f"<b>Exported on:</b> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
+                                 self.styles['Normal']))
+            story.append(Paragraph(f"<b>Total Questions:</b> {len(chat_history)}",
+                                 self.styles['Normal']))
+            story.append(Spacer(1, 20))
+            # Chat history
+            story.append(Paragraph("Questions and Answers", self.styles['Heading2']))
+            story.append(Spacer(1, 12))
+            for i, entry in enumerate(chat_history, 1):
+                # Question
+                story.append(Paragraph(f"<b>Q{i}:</b> {entry['question']}",
+                                     self.styles['QuestionStyle']))
+                # Answer
+                story.append(Paragraph(f"<b>A{i}:</b> {entry['answer']}",
+                                     self.styles['AnswerStyle']))
+                # Timestamp
+                timestamp = datetime.fromisoformat(entry['timestamp']).strftime('%Y-%m-%d %H:%M:%S')
+                story.append(Paragraph(f"<i>Asked on: {timestamp}</i>",
+                                     self.styles['Normal']))
+                story.append(Spacer(1, 15))
+            doc.build(story)
+            buffer.seek(0)
+            return buffer.getvalue()
+        except Exception as e:
+            logger.error(f"Error generating PDF: {e}")
+            return b""
+    def export_to_text(self, chat_history: List[Dict[str, Any]],
+                      video_metadata: Dict[str, Any] = None) -> str:
+        """
+        Export chat history to plain text format.
+        Args:
+            chat_history (List[Dict[str, Any]]): Chat history entries
+            video_metadata (Dict[str, Any]): Video metadata
+        Returns:
+            str: Text content
+        """
+        try:
+            lines = []
+            lines.append("YouTube Transcript Q&A Session")
+            lines.append("=" * 50)
+            lines.append("")
+            # Video information
+            if video_metadata:
+                lines.append("Video Information:")
+                lines.append(f"Title: {video_metadata.get('title', 'N/A')}")
+                lines.append(f"Author: {video_metadata.get('author', 'N/A')}")
+                lines.append(f"Duration: {self._format_duration(video_metadata.get('length', 0))}")
+                lines.append("")
+            # Export information
+            lines.append(f"Exported on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+            lines.append(f"Total Questions: {len(chat_history)}")
+            lines.append("")
+            lines.append("Questions and Answers:")
+            lines.append("-" * 30)
+            lines.append("")
+            # Chat history
+            for i, entry in enumerate(chat_history, 1):
+                timestamp = datetime.fromisoformat(entry['timestamp']).strftime('%Y-%m-%d %H:%M:%S')
+                lines.append(f"Q{i}: {entry['question']}")
+                lines.append(f"A{i}: {entry['answer']}")
+                lines.append(f"Asked on: {timestamp}")
+                lines.append("")
+                lines.append("-" * 30)
+                lines.append("")
+            return "\n".join(lines)
+        except Exception as e:
+            logger.error(f"Error generating text export: {e}")
+            return ""
+    def export_to_json(self, chat_history: List[Dict[str, Any]],
+                      video_metadata: Dict[str, Any] = None) -> str:
+        """
+        Export chat history to JSON format.
+        Args:
+            chat_history (List[Dict[str, Any]]): Chat history entries
+            video_metadata (Dict[str, Any]): Video metadata
+        Returns:
+            str: JSON content
+        """
+        try:
+            export_data = {
+                'export_info': {
+                    'exported_at': datetime.now().isoformat(),
+                    'total_questions': len(chat_history),
+                    'format_version': '1.0'
+                },
+                'video_metadata': video_metadata or {},
+                'chat_history': chat_history
+            }
+            return json.dumps(export_data, indent=2, ensure_ascii=False)
+        except Exception as e:
+            logger.error(f"Error generating JSON export: {e}")
+            return ""
+    def export_transcript(self, transcript_text: str, video_metadata: Dict[str, Any] = None,
+                         format: str = 'txt') -> str:
+        """
+        Export transcript in specified format.
+        Args:
+            transcript_text (str): Transcript text
+            video_metadata (Dict[str, Any]): Video metadata
+            format (str): Export format ('txt', 'json')
+        Returns:
+            str: Exported transcript
+        """
+        try:
+            if format == 'txt':
+                lines = []
+                lines.append("YouTube Video Transcript")
+                lines.append("=" * 30)
+                lines.append("")
+                if video_metadata:
+                    lines.append(f"Title: {video_metadata.get('title', 'N/A')}")
+                    lines.append(f"Author: {video_metadata.get('author', 'N/A')}")
+                    lines.append(f"Duration: {self._format_duration(video_metadata.get('length', 0))}")
+                    lines.append("")
+                lines.append(f"Exported on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+                lines.append("")
+                lines.append("Transcript:")
+                lines.append("-" * 20)
+                lines.append("")
+                lines.append(transcript_text)
+                return "\n".join(lines)
+            elif format == 'json':
+                export_data = {
+                    'export_info': {
+                        'exported_at': datetime.now().isoformat(),
+                        'format_version': '1.0'
+                    },
+                    'video_metadata': video_metadata or {},
+                    'transcript': transcript_text
+                }
+                return json.dumps(export_data, indent=2, ensure_ascii=False)
+            return transcript_text
+        except Exception as e:
+            logger.error(f"Error exporting transcript: {e}")
+            return ""
+    def _format_duration(self, seconds: int) -> str:
+        """Format duration from seconds to HH:MM:SS format."""
+        if not seconds:
+            return "N/A"
+        hours = seconds // 3600
+        minutes = (seconds % 3600) // 60
+        seconds = seconds % 60
+        if hours > 0:
+            return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
+        else:
+            return f"{minutes:02d}:{seconds:02d}"

src/src/utils/logger.py ADDED Viewed

	@@ -0,0 +1,46 @@

+"""
+Logging configuration and utilities.
+"""
+import logging
+import os
+from datetime import datetime
+from logging.handlers import RotatingFileHandler
+def setup_logging(log_level: str = "INFO", log_file: str = None) -> logging.Logger:
+    """
+    Setup logging configuration.
+    Args:
+        log_level (str): Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
+        log_file (str): Optional log file path
+    Returns:
+        logging.Logger: Configured logger
+    """
+    # Create logs directory if it doesn't exist
+    if log_file:
+        os.makedirs(os.path.dirname(log_file), exist_ok=True)
+    else:
+        os.makedirs('logs', exist_ok=True)
+        log_file = f'logs/youtube_chatbot_{datetime.now().strftime("%Y%m%d")}.log'
+    # Configure logging
+    logging.basicConfig(
+        level=getattr(logging, log_level.upper()),
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        handlers=[
+            logging.StreamHandler(),  # Console output
+            RotatingFileHandler(
+                log_file,
+                maxBytes=10*1024*1024,  # 10MB
+                backupCount=5
+            )
+        ]
+    )
+    return logging.getLogger(__name__)
+def get_logger(name: str) -> logging.Logger:
+    """Get a logger with the specified name."""
+    return logging.getLogger(name)

src/src/utils/session_manager.py ADDED Viewed

	@@ -0,0 +1,182 @@

+"""
+Session management utilities for handling chat history and application state.
+"""
+import json
+import logging
+from datetime import datetime
+from typing import Dict, List, Any, Optional
+import streamlit as st
+logger = logging.getLogger(__name__)
+class SessionManager:
+    """Manages session state, chat history, and conversation persistence."""
+    def __init__(self):
+        self.initialize_session_state()
+    def initialize_session_state(self):
+        """Initialize Streamlit session state variables."""
+        if 'chat_history' not in st.session_state:
+            st.session_state.chat_history = []
+        if 'processed_videos' not in st.session_state:
+            st.session_state.processed_videos = {}
+        if 'current_video' not in st.session_state:
+            st.session_state.current_video = None
+        if 'qa_chain' not in st.session_state:
+            st.session_state.qa_chain = None
+        if 'vectorstore' not in st.session_state:
+            st.session_state.vectorstore = None
+        if 'video_metadata' not in st.session_state:
+            st.session_state.video_metadata = {}
+        if 'conversation_id' not in st.session_state:
+            st.session_state.conversation_id = self.generate_conversation_id()
+    def generate_conversation_id(self) -> str:
+        """Generate a unique conversation ID."""
+        return f"conv_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+    def add_to_chat_history(self, question: str, answer: str, video_id: str = None,
+                           source_docs: List[Any] = None):
+        """
+        Add a Q&A pair to chat history.
+        Args:
+            question (str): User question
+            answer (str): AI answer
+            video_id (str): Associated video ID
+            source_docs (List[Any]): Source documents used for answer
+        """
+        chat_entry = {
+            'timestamp': datetime.now().isoformat(),
+            'question': question,
+            'answer': answer,
+            'video_id': video_id,
+            'source_docs': [doc.page_content[:200] + "..." if len(doc.page_content) > 200
+                           else doc.page_content for doc in (source_docs or [])],
+            'conversation_id': st.session_state.conversation_id
+        }
+        st.session_state.chat_history.append(chat_entry)
+    def get_chat_history(self, video_id: str = None) -> List[Dict[str, Any]]:
+        """
+        Get chat history, optionally filtered by video ID.
+        Args:
+            video_id (str): Optional video ID to filter by
+        Returns:
+            List[Dict[str, Any]]: Chat history entries
+        """
+        if video_id:
+            return [entry for entry in st.session_state.chat_history
+                   if entry.get('video_id') == video_id]
+        return st.session_state.chat_history
+    def clear_chat_history(self, video_id: str = None):
+        """
+        Clear chat history, optionally for a specific video.
+        Args:
+            video_id (str): Optional video ID to clear history for
+        """
+        if video_id:
+            st.session_state.chat_history = [
+                entry for entry in st.session_state.chat_history
+                if entry.get('video_id') != video_id
+            ]
+        else:
+            st.session_state.chat_history = []
+    def save_processed_video(self, video_url: str, video_id: str, metadata: Dict[str, Any],
+                           transcript: str, qa_chain: Any, vectorstore: Any):
+        """
+        Save processed video information to session state.
+        Args:
+            video_url (str): Video URL
+            video_id (str): Video ID
+            metadata (Dict[str, Any]): Video metadata
+            transcript (str): Video transcript
+            qa_chain (Any): QA chain object
+            vectorstore (Any): Vector store object
+        """
+        st.session_state.processed_videos[video_id] = {
+            'url': video_url,
+            'metadata': metadata,
+            'transcript': transcript,
+            'processed_at': datetime.now().isoformat(),
+            'conversation_id': st.session_state.conversation_id
+        }
+        st.session_state.current_video = video_id
+        st.session_state.qa_chain = qa_chain
+        st.session_state.vectorstore = vectorstore
+        st.session_state.video_metadata = metadata
+    def get_processed_videos(self) -> Dict[str, Dict[str, Any]]:
+        """Get all processed videos."""
+        return st.session_state.processed_videos
+    def switch_to_video(self, video_id: str) -> bool:
+        """
+        Switch to a previously processed video.
+        Args:
+            video_id (str): Video ID to switch to
+        Returns:
+            bool: True if successful, False if video not found
+        """
+        if video_id in st.session_state.processed_videos:
+            st.session_state.current_video = video_id
+            # Note: QA chain and vectorstore would need to be recreated
+            # This is a simplified version - in a full implementation,
+            # you'd want to persist and reload these objects
+            return True
+        return False
+    def export_chat_history(self, format: str = 'json') -> str:
+        """
+        Export chat history in specified format.
+        Args:
+            format (str): Export format ('json', 'txt')
+        Returns:
+            str: Exported chat history
+        """
+        if format == 'json':
+            return json.dumps(st.session_state.chat_history, indent=2)
+        elif format == 'txt':
+            output = []
+            for entry in st.session_state.chat_history:
+                output.append(f"Timestamp: {entry['timestamp']}")
+                output.append(f"Question: {entry['question']}")
+                output.append(f"Answer: {entry['answer']}")
+                if entry.get('video_id'):
+                    output.append(f"Video ID: {entry['video_id']}")
+                output.append("-" * 50)
+            return "\n".join(output)
+        return ""
+    def get_session_stats(self) -> Dict[str, Any]:
+        """Get session statistics."""
+        return {
+            'total_questions': len(st.session_state.chat_history),
+            'processed_videos': len(st.session_state.processed_videos),
+            'current_video': st.session_state.current_video,
+            'conversation_id': st.session_state.conversation_id,
+            'session_start': min([entry['timestamp'] for entry in st.session_state.chat_history],
+                               default=datetime.now().isoformat())
+        }

src/src/utils/text_processor.py ADDED Viewed

	@@ -0,0 +1,377 @@

+"""
+Text processing utilities for document handling and vector store operations.
+"""
+import os
+import logging
+from typing import List, Optional, Dict, Any
+try:
+    from langchain_openai import OpenAIEmbeddings, OpenAI
+except ImportError:
+    from langchain_community.embeddings import OpenAIEmbeddings
+    from langchain_community.llms import OpenAI
+from langchain_community.vectorstores import FAISS
+from langchain_community.document_loaders import TextLoader
+from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
+from langchain.chains import RetrievalQA
+from langchain.docstore.document import Document
+import pickle
+logger = logging.getLogger(__name__)
+class SimpleVectorStore:
+    """
+    Simple text-based vector store that works without embeddings.
+    Uses basic text search and keyword matching.
+    """
+    def __init__(self, documents: List[Document]):
+        self.documents = documents
+        self.texts = [doc.page_content for doc in documents]
+    def as_retriever(self, search_type: str = "similarity", search_kwargs: dict = None):
+        """Return a simple retriever."""
+        return SimpleRetriever(self.documents, search_kwargs or {})
+class SimpleRetriever:
+    """Simple text-based retriever."""
+    def __init__(self, documents: List[Document], search_kwargs: dict):
+        self.documents = documents
+        self.k = search_kwargs.get('k', 4)
+    def get_relevant_documents(self, query: str) -> List[Document]:
+        """Get relevant documents using simple text matching."""
+        query_words = query.lower().split()
+        scored_docs = []
+        for doc in self.documents:
+            content = doc.page_content.lower()
+            score = sum(1 for word in query_words if word in content)
+            if score > 0:
+                scored_docs.append((doc, score))
+        # Sort by score and return top k
+        scored_docs.sort(key=lambda x: x[1], reverse=True)
+        return [doc for doc, _ in scored_docs[:self.k]]
+class FallbackQAChain:
+    """
+    Fallback QA chain that works without OpenAI API.
+    Provides basic text search and simple answers.
+    """
+    def __init__(self, vectorstore):
+        self.vectorstore = vectorstore
+        self.documents = []
+        # Extract documents from vectorstore
+        try:
+            if isinstance(vectorstore, SimpleVectorStore):
+                self.documents = vectorstore.documents
+            elif hasattr(vectorstore, 'docstore') and hasattr(vectorstore.docstore, '_dict'):
+                self.documents = list(vectorstore.docstore._dict.values())
+        except:
+            pass
+    def __call__(self, inputs: Dict[str, str]) -> Dict[str, Any]:
+        """
+        Process a query and return an answer.
+        Args:
+            inputs (Dict[str, str]): Input dictionary with 'query' key
+        Returns:
+            Dict[str, Any]: Result dictionary with 'result' and 'source_documents'
+        """
+        query = inputs.get('query', '').lower()
+        # Simple keyword-based search
+        relevant_docs = []
+        for doc in self.documents:
+            if hasattr(doc, 'page_content'):
+                content = doc.page_content.lower()
+                # Simple relevance scoring based on keyword matches
+                query_words = query.split()
+                matches = sum(1 for word in query_words if word in content)
+                if matches > 0:
+                    relevant_docs.append((doc, matches))
+        # Sort by relevance and take top results
+        relevant_docs.sort(key=lambda x: x[1], reverse=True)
+        top_docs = [doc for doc, _ in relevant_docs[:3]]
+        # Generate simple answer
+        if top_docs:
+            # Combine relevant text
+            combined_text = " ".join([doc.page_content[:200] for doc in top_docs])
+            answer = f"Based on the transcript, here's what I found: {combined_text[:500]}..."
+        else:
+            answer = "I couldn't find specific information about that in the transcript. Please try rephrasing your question or ask about different topics covered in the video."
+        return {
+            'result': answer,
+            'source_documents': top_docs
+        }
+class TextProcessor:
+    """Handles text processing, document splitting, and vector store operations."""
+    def __init__(self, openai_api_key: str):
+        """
+        Initialize TextProcessor with OpenAI API key.
+        Args:
+            openai_api_key (str): OpenAI API key
+        """
+        self.openai_api_key = openai_api_key
+        self.embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
+        self.llm = OpenAI(openai_api_key=openai_api_key, temperature=0.7)
+    def create_documents_from_text(self, text: str, metadata: Dict[str, Any] = None) -> List[Document]:
+        """
+        Create LangChain documents from text with metadata.
+        Args:
+            text (str): Input text
+            metadata (Dict[str, Any]): Document metadata
+        Returns:
+            List[Document]: List of LangChain documents
+        """
+        if metadata is None:
+            metadata = {}
+        # Use RecursiveCharacterTextSplitter for better text splitting
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200,
+            length_function=len,
+            separators=["\n\n", "\n", " ", ""]
+        )
+        # Create a document and split it
+        doc = Document(page_content=text, metadata=metadata)
+        docs = text_splitter.split_documents([doc])
+        return docs
+    def create_vector_store(self, documents: List[Document]) -> Optional[FAISS]:
+        """
+        Create FAISS vector store from documents with fallback options.
+        Args:
+            documents (List[Document]): List of documents
+        Returns:
+            Optional[FAISS]: FAISS vector store or None if failed
+        """
+        try:
+            if not documents:
+                logger.error("No documents provided for vector store creation")
+                return None
+            # Try with OpenAI embeddings first
+            try:
+                vectorstore = FAISS.from_documents(documents, self.embeddings)
+                return vectorstore
+            except Exception as openai_error:
+                logger.warning(f"OpenAI embeddings failed: {openai_error}")
+                # Fallback to simple text-based search
+                logger.info("Using simple text-based fallback")
+                return self._create_simple_fallback_store(documents)
+        except Exception as e:
+            logger.error(f"Error creating vector store: {e}")
+            return None
+    def _create_simple_fallback_store(self, documents: List[Document]) -> Optional['SimpleVectorStore']:
+        """
+        Create a simple fallback vector store using basic text search.
+        Args:
+            documents (List[Document]): List of documents
+        Returns:
+            Optional[SimpleVectorStore]: Simple vector store or None if failed
+        """
+        try:
+            # Create simple text-based vector store
+            simple_store = SimpleVectorStore(documents)
+            logger.info("Created simple text-based fallback vector store")
+            return simple_store
+        except Exception as e:
+            logger.error(f"Even fallback vector store creation failed: {e}")
+            return None
+    def save_vector_store(self, vectorstore: FAISS, path: str) -> bool:
+        """
+        Save vector store to disk.
+        Args:
+            vectorstore (FAISS): Vector store to save
+            path (str): Path to save the vector store
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            os.makedirs(os.path.dirname(path) if os.path.dirname(path) else '.', exist_ok=True)
+            vectorstore.save_local(path)
+            return True
+        except Exception as e:
+            logger.error(f"Error saving vector store: {e}")
+            return False
+    def load_vector_store(self, path: str) -> Optional[FAISS]:
+        """
+        Load vector store from disk.
+        Args:
+            path (str): Path to load the vector store from
+        Returns:
+            Optional[FAISS]: Loaded vector store or None if failed
+        """
+        try:
+            if not os.path.exists(path):
+                logger.error(f"Vector store path does not exist: {path}")
+                return None
+            vectorstore = FAISS.load_local(path, self.embeddings)
+            return vectorstore
+        except Exception as e:
+            logger.error(f"Error loading vector store: {e}")
+            return None
+    def create_qa_chain(self, vectorstore, chain_type: str = "stuff") -> Optional[RetrievalQA]:
+        """
+        Create QA chain from vector store with fallback options.
+        Args:
+            vectorstore: Vector store (FAISS or SimpleVectorStore)
+            chain_type (str): Type of chain to create
+        Returns:
+            Optional[RetrievalQA]: QA chain or None if failed
+        """
+        try:
+            # Check if it's a simple vector store (fallback mode)
+            if isinstance(vectorstore, SimpleVectorStore):
+                logger.info("Using simple fallback QA system")
+                return FallbackQAChain(vectorstore)
+            retriever = vectorstore.as_retriever(
+                search_type="similarity",
+                search_kwargs={"k": 4}
+            )
+            # Try with OpenAI LLM first
+            try:
+                qa_chain = RetrievalQA.from_chain_type(
+                    llm=self.llm,
+                    chain_type=chain_type,
+                    retriever=retriever,
+                    return_source_documents=True
+                )
+                return qa_chain
+            except Exception as openai_error:
+                logger.warning(f"OpenAI LLM failed: {openai_error}")
+                # Fallback to a simple text-based QA system
+                logger.info("Creating fallback QA system")
+                return FallbackQAChain(vectorstore)
+        except Exception as e:
+            logger.error(f"Error creating QA chain: {e}")
+            return None
+    def _create_fallback_qa_chain(self, vectorstore: FAISS):
+        """
+        Create a fallback QA chain that works without OpenAI API.
+        Args:
+            vectorstore (FAISS): Vector store
+        Returns:
+            FallbackQAChain: Simple QA chain
+        """
+        return FallbackQAChain(vectorstore)
+    def process_transcript(self, transcript_text: str, metadata: Dict[str, Any] = None) -> Dict[str, Any]:
+        """
+        Process transcript text and create QA chain.
+        Args:
+            transcript_text (str): Transcript text
+            metadata (Dict[str, Any]): Video metadata
+        Returns:
+            Dict[str, Any]: Processing result with QA chain and vector store
+        """
+        result = {
+            'success': False,
+            'qa_chain': None,
+            'vectorstore': None,
+            'documents': None,
+            'error': None
+        }
+        try:
+            # Create documents from transcript
+            documents = self.create_documents_from_text(transcript_text, metadata)
+            if not documents:
+                result['error'] = "Failed to create documents from transcript"
+                return result
+            # Create vector store
+            vectorstore = self.create_vector_store(documents)
+            if not vectorstore:
+                result['error'] = "Failed to create vector store"
+                return result
+            # Create QA chain
+            qa_chain = self.create_qa_chain(vectorstore)
+            if not qa_chain:
+                result['error'] = "Failed to create QA chain"
+                return result
+            result['success'] = True
+            result['qa_chain'] = qa_chain
+            result['vectorstore'] = vectorstore
+            result['documents'] = documents
+        except Exception as e:
+            result['error'] = f"Error processing transcript: {str(e)}"
+            logger.error(f"Error processing transcript: {e}")
+        return result
+    def ask_question(self, qa_chain: RetrievalQA, question: str) -> Dict[str, Any]:
+        """
+        Ask a question using the QA chain.
+        Args:
+            qa_chain (RetrievalQA): QA chain
+            question (str): Question to ask
+        Returns:
+            Dict[str, Any]: Answer and source documents
+        """
+        try:
+            result = qa_chain({"query": question})
+            return {
+                'success': True,
+                'answer': result['result'],
+                'source_documents': result.get('source_documents', []),
+                'error': None
+            }
+        except Exception as e:
+            logger.error(f"Error asking question: {e}")
+            return {
+                'success': False,
+                'answer': None,
+                'source_documents': [],
+                'error': f"Error processing question: {str(e)}"
+            }

src/src/utils/youtube_handler.py ADDED Viewed

	@@ -0,0 +1,369 @@

+"""
+YouTube video handling utilities for transcript extraction and metadata retrieval.
+"""
+import os
+import re
+import logging
+import time
+import random
+from typing import Optional, Dict, Any, List
+from pytube import YouTube
+from youtube_transcript_api import (
+    YouTubeTranscriptApi,
+    TranscriptsDisabled,
+    NoTranscriptFound,
+    VideoUnavailable,
+    CouldNotRetrieveTranscript
+)
+logger = logging.getLogger(__name__)
+class YouTubeHandler:
+    """Handles YouTube video operations including transcript extraction and metadata retrieval."""
+    def __init__(self):
+        self.supported_languages = ['en', 'es', 'fr', 'de', 'it', 'pt', 'ru', 'ja', 'ko', 'zh']
+        # Rate limiting to prevent IP blocking
+        self.last_request_time = 0
+        self.min_request_interval = 3.0  # Minimum 3 seconds between requests
+        self.max_retries = 3
+        self.base_delay = 2.0
+    def _rate_limit(self):
+        """Implement rate limiting to prevent IP blocking."""
+        current_time = time.time()
+        time_since_last_request = current_time - self.last_request_time
+        if time_since_last_request < self.min_request_interval:
+            sleep_time = self.min_request_interval - time_since_last_request
+            logger.info(f"Rate limiting: sleeping for {sleep_time:.2f} seconds")
+            time.sleep(sleep_time)
+        self.last_request_time = time.time()
+    def _exponential_backoff(self, attempt: int):
+        """Implement exponential backoff for retries."""
+        delay = self.base_delay * (2 ** attempt) + random.uniform(0, 1)
+        logger.info(f"Exponential backoff: attempt {attempt + 1}, sleeping for {delay:.2f} seconds")
+        time.sleep(delay)
+    def validate_youtube_url(self, url: str) -> bool:
+        """
+        Validate if the provided URL is a valid YouTube URL.
+        Args:
+            url (str): YouTube URL to validate
+        Returns:
+            bool: True if valid, False otherwise
+        """
+        youtube_regex = re.compile(
+            r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/'
+            r'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})'
+        )
+        return bool(youtube_regex.match(url))
+    def extract_video_id(self, url: str) -> Optional[str]:
+        """
+        Extract video ID from YouTube URL.
+        Args:
+            url (str): YouTube URL
+        Returns:
+            Optional[str]: Video ID if found, None otherwise
+        """
+        try:
+            yt = YouTube(url)
+            return yt.video_id
+        except Exception as e:
+            logger.error(f"Error extracting video ID: {e}")
+            return None
+    def get_video_metadata(self, url: str) -> Dict[str, Any]:
+        """
+        Get video metadata including title, description, duration, etc.
+        Args:
+            url (str): YouTube URL
+        Returns:
+            Dict[str, Any]: Video metadata
+        """
+        try:
+            yt = YouTube(url)
+            metadata = {
+                'title': yt.title,
+                'description': yt.description,
+                'length': yt.length,
+                'views': yt.views,
+                'rating': getattr(yt, 'rating', None),
+                'author': yt.author,
+                'publish_date': yt.publish_date,
+                'thumbnail_url': yt.thumbnail_url,
+                'video_id': yt.video_id
+            }
+            return metadata
+        except Exception as e:
+            logger.error(f"Error getting video metadata: {e}")
+            return {}
+    def get_available_transcripts(self, video_id: str) -> List[Dict[str, str]]:
+        """
+        Get list of available transcript languages for a video.
+        Args:
+            video_id (str): YouTube video ID
+        Returns:
+            List[Dict[str, str]]: List of available transcripts with language info
+        """
+        try:
+            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+            available = []
+            for transcript in transcript_list:
+                available.append({
+                    'language': transcript.language,
+                    'language_code': transcript.language_code,
+                    'is_generated': transcript.is_generated,
+                    'is_translatable': transcript.is_translatable
+                })
+            return available
+        except Exception as e:
+            logger.error(f"Error getting available transcripts: {e}")
+            return []
+    def get_youtube_transcript(self, url: str, language: str = 'en') -> Dict[str, Any]:
+        """
+        Extract transcript from YouTube video with comprehensive error handling and rate limiting.
+        Args:
+            url (str): YouTube video URL
+            language (str): Preferred language code (default: 'en')
+        Returns:
+            Dict[str, Any]: Dictionary containing transcript text and metadata
+        """
+        result = {
+            'success': False,
+            'transcript': '',
+            'error': None,
+            'metadata': {},
+            'available_languages': []
+        }
+        try:
+            if not self.validate_youtube_url(url):
+                result['error'] = "Invalid YouTube URL format"
+                return result
+            video_id = self.extract_video_id(url)
+            if not video_id:
+                result['error'] = "Could not extract video ID from URL"
+                return result
+            # Apply rate limiting before making requests
+            self._rate_limit()
+            # Get video metadata
+            result['metadata'] = self.get_video_metadata(url)
+            # Apply rate limiting before transcript requests
+            self._rate_limit()
+            # Get available transcripts
+            result['available_languages'] = self.get_available_transcripts(video_id)
+            # Try to get transcript with multiple strategies and retries
+            transcript_data = None
+            used_language = None
+            # Strategy 1: Try the standard approach with retries
+            for attempt in range(self.max_retries):
+                try:
+                    if attempt > 0:
+                        self._exponential_backoff(attempt - 1)
+                    self._rate_limit()
+                    transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+                    # Try preferred language first, then fallback to English, then any available
+                    languages_to_try = [language] if language != 'en' else []
+                    languages_to_try.extend(['en'])
+                    languages_to_try.extend([lang['language_code'] for lang in result['available_languages']
+                                           if lang['language_code'] not in languages_to_try])
+                    for lang in languages_to_try:
+                        try:
+                            transcript = transcript_list.find_transcript([lang])
+                            transcript_data = transcript.fetch()
+                            used_language = lang
+                            logger.info(f"Successfully got transcript in {lang} on attempt {attempt + 1}")
+                            break
+                        except (NoTranscriptFound, TranscriptsDisabled):
+                            continue
+                    if transcript_data:
+                        break
+                except Exception as e:
+                    logger.warning(f"Standard transcript method failed on attempt {attempt + 1}: {e}")
+                    if attempt == self.max_retries - 1:
+                        logger.error(f"All {self.max_retries} attempts failed for standard method")
+            # Strategy 2: Try alternative approach if first failed
+            if not transcript_data:
+                for attempt in range(self.max_retries):
+                    try:
+                        if attempt > 0:
+                            self._exponential_backoff(attempt - 1)
+                        self._rate_limit()
+                        # Try to get any available transcript without language preference
+                        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+                        available_transcripts = list(transcript_list)
+                        if available_transcripts:
+                            # Try manual transcripts first
+                            manual_transcripts = [t for t in available_transcripts if not t.is_generated]
+                            if manual_transcripts:
+                                transcript = manual_transcripts[0]
+                            else:
+                                transcript = available_transcripts[0]
+                            transcript_data = transcript.fetch()
+                            used_language = transcript.language_code
+                            logger.info(f"Got transcript using alternative method in {used_language} on attempt {attempt + 1}")
+                            break
+                    except Exception as e:
+                        logger.warning(f"Alternative transcript method failed on attempt {attempt + 1}: {e}")
+                        if attempt == self.max_retries - 1:
+                            logger.error(f"All {self.max_retries} attempts failed for alternative method")
+            # Strategy 3: Try basic method as last resort
+            if not transcript_data:
+                for attempt in range(self.max_retries):
+                    try:
+                        if attempt > 0:
+                            self._exponential_backoff(attempt - 1)
+                        self._rate_limit()
+                        # This is a last resort - try with minimal parameters
+                        transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
+                        used_language = 'auto-detected'
+                        logger.info(f"Got transcript using basic method on attempt {attempt + 1}")
+                        break
+                    except Exception as e:
+                        logger.warning(f"Basic transcript method failed on attempt {attempt + 1}: {e}")
+                        if attempt == self.max_retries - 1:
+                            logger.error(f"All {self.max_retries} attempts failed for basic method")
+            if transcript_data:
+                # Format transcript text - handle both dict and object formats
+                text_parts = []
+                formatted_transcript_data = []
+                for item in transcript_data:
+                    if hasattr(item, 'text'):
+                        # New format: object with attributes
+                        text_parts.append(item.text)
+                        formatted_transcript_data.append({
+                            'text': item.text,
+                            'start': getattr(item, 'start', 0),
+                            'duration': getattr(item, 'duration', 0)
+                        })
+                    elif isinstance(item, dict) and 'text' in item:
+                        # Old format: dictionary
+                        text_parts.append(item['text'])
+                        formatted_transcript_data.append(item)
+                    else:
+                        # Fallback: convert to string
+                        text_parts.append(str(item))
+                        formatted_transcript_data.append({'text': str(item), 'start': 0, 'duration': 0})
+                text = " ".join(text_parts)
+                result['transcript'] = text
+                result['success'] = True
+                result['used_language'] = used_language
+                result['transcript_data'] = formatted_transcript_data  # Raw transcript with timestamps
+            else:
+                result['error'] = "No transcript available in any supported language"
+        except TranscriptsDisabled:
+            result['error'] = "Transcripts are disabled for this video"
+        except NoTranscriptFound:
+            result['error'] = "No transcript found for this video"
+        except VideoUnavailable:
+            result['error'] = "This video is unavailable"
+        except CouldNotRetrieveTranscript as e:
+            error_msg = str(e).lower()
+            if "ip" in error_msg and "block" in error_msg:
+                result['error'] = "IP blocked by YouTube: Too many requests from your IP address"
+                result['suggestion'] = "Wait 10-15 minutes before trying again, or try a different network"
+                result['details'] = "YouTube has temporarily blocked your IP due to too many requests. This is common when testing or using cloud services."
+            elif "region" in error_msg or "country" in error_msg:
+                result['error'] = "Regional restriction: This video's transcripts are not available in your region"
+                result['suggestion'] = "Try using a VPN or try a different video"
+            elif "private" in error_msg:
+                result['error'] = "This video is private and transcripts cannot be accessed"
+            elif "disabled" in error_msg:
+                result['error'] = "Transcripts are disabled for this video"
+            elif "cloud provider" in error_msg:
+                result['error'] = "Cloud provider IP blocked: YouTube blocks most cloud service IPs"
+                result['suggestion'] = "Try from a different network or wait before retrying"
+                result['details'] = "YouTube automatically blocks IPs from cloud providers like AWS, Google Cloud, etc."
+            else:
+                result['error'] = f"Could not retrieve transcript: {str(e)}"
+            logger.warning(f"Could not retrieve transcript for video: {e}")
+        except Exception as e:
+            error_msg = str(e).lower()
+            if "ip" in error_msg and ("block" in error_msg or "ban" in error_msg):
+                result['error'] = "IP blocked by YouTube: Too many requests from your IP address"
+                result['suggestion'] = "Wait 10-15 minutes before trying again, or try a different network"
+                result['details'] = "YouTube has temporarily blocked your IP due to too many requests. This is common when testing or using cloud services."
+            elif "cloud provider" in error_msg or "aws" in error_msg or "google cloud" in error_msg or "azure" in error_msg:
+                result['error'] = "Cloud provider IP blocked: YouTube blocks most cloud service IPs"
+                result['suggestion'] = "Try from a different network or wait before retrying"
+                result['details'] = "YouTube automatically blocks IPs from cloud providers like AWS, Google Cloud, etc."
+            elif "region" in error_msg or "country" in error_msg:
+                result['error'] = "Regional restriction: This video's transcripts are not available in your region"
+                result['suggestion'] = "Try using a VPN or try a different video"
+            elif "private" in error_msg:
+                result['error'] = "This video is private and transcripts cannot be accessed"
+            elif "unavailable" in error_msg:
+                result['error'] = "This video is unavailable or has been removed"
+            elif "disabled" in error_msg:
+                result['error'] = "Transcripts are disabled for this video"
+            elif "too many requests" in error_msg:
+                result['error'] = "Rate limited: Too many requests to YouTube"
+                result['suggestion'] = "Wait a few minutes before trying again"
+                result['details'] = "You've made too many requests to YouTube. Please wait before trying again."
+            else:
+                result['error'] = f"Unexpected error: {str(e)}"
+            logger.error(f"Unexpected error getting transcript: {e}")
+        return result
+    def save_transcript_to_file(self, transcript_text: str, filename: str = "transcript.txt") -> bool:
+        """
+        Save transcript text to a file.
+        Args:
+            transcript_text (str): Transcript text to save
+            filename (str): Output filename
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            os.makedirs(os.path.dirname(filename) if os.path.dirname(filename) else '.', exist_ok=True)
+            with open(filename, "w", encoding="utf-8") as f:
+                f.write(transcript_text)
+            return True
+        except Exception as e:
+            logger.error(f"Error saving transcript to file: {e}")
+            return False

src/static/style.css ADDED Viewed

	@@ -0,0 +1,501 @@

+/* YouTube Transcript Chatbot - Custom Styles - Dark Theme */
+/* Global dark theme styling */
+* {
+    box-sizing: border-box;
+}
+body, html {
+    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif;
+    line-height: 1.6;
+    color: #e9ecef !important;
+    background-color: #1a1a1a !important;
+}
+/* Ensure all text elements have proper contrast for dark theme */
+p, span, div, label, h1, h2, h3, h4, h5, h6 {
+    color: #e9ecef !important;
+    text-rendering: optimizeLegibility;
+    -webkit-font-smoothing: antialiased;
+    -moz-osx-font-smoothing: grayscale;
+}
+/* Main container styling */
+.main-container {
+    max-width: 1200px;
+    margin: 0 auto;
+    padding: 20px;
+    background-color: #1a1a1a !important;
+}
+/* Header styling */
+.app-header {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+    padding: 2rem;
+    border-radius: 10px;
+    margin-bottom: 2rem;
+    text-align: center;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+}
+.app-header h1 {
+    margin: 0;
+    font-size: 2.5rem;
+    font-weight: 700;
+}
+.app-header p {
+    margin: 0.5rem 0 0 0;
+    font-size: 1.1rem;
+    opacity: 0.9;
+}
+/* Card styling - Dark Theme */
+.info-card {
+    background: #2d3748 !important;
+    border-radius: 10px;
+    padding: 1.5rem;
+    margin: 1rem 0;
+    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.3);
+    border-left: 4px solid #667eea;
+    color: #e9ecef !important;
+}
+.success-card {
+    background: #1a2e1a !important;
+    border-color: #28a745;
+    color: #90ee90 !important;
+}
+.error-card {
+    background: #2e1a1a !important;
+    border-color: #dc3545;
+    color: #ffb3b3 !important;
+}
+.warning-card {
+    background: #2e2a1a !important;
+    border-color: #ffc107;
+    color: #ffe066 !important;
+}
+/* Video metadata styling - Dark Theme */
+.video-metadata {
+    background: #2d3748 !important;
+    border-radius: 8px;
+    padding: 1rem;
+    margin: 1rem 0;
+    border: 1px solid #4a5568;
+}
+.video-metadata h4 {
+    color: #e9ecef !important;
+    margin-bottom: 0.5rem;
+}
+.metadata-item {
+    display: flex;
+    justify-content: space-between;
+    padding: 0.25rem 0;
+    border-bottom: 1px solid #4a5568;
+}
+.metadata-item:last-child {
+    border-bottom: none;
+}
+.metadata-label {
+    font-weight: 600;
+    color: #a0aec0 !important;
+}
+.metadata-value {
+    color: #e9ecef !important;
+}
+/* Chat history styling - Dark Theme */
+.chat-container {
+    max-height: 400px;
+    overflow-y: auto;
+    border: 1px solid #4a5568;
+    border-radius: 8px;
+    padding: 1rem;
+    background: #2d3748 !important;
+}
+.chat-message {
+    margin-bottom: 1rem;
+    padding: 0.75rem;
+    border-radius: 8px;
+}
+.chat-question {
+    background: #1a365d !important;
+    border-left: 4px solid #3182ce;
+    color: #e9ecef !important;
+}
+.chat-answer {
+    background: #322659 !important;
+    border-left: 4px solid #9f7aea;
+    margin-left: 1rem;
+    color: #e9ecef !important;
+}
+.chat-timestamp {
+    font-size: 0.8rem;
+    color: #a0aec0 !important;
+    margin-top: 0.5rem;
+}
+/* Button styling */
+.custom-button {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+    border: none;
+    padding: 0.75rem 1.5rem;
+    border-radius: 6px;
+    font-weight: 600;
+    cursor: pointer;
+    transition: all 0.3s ease;
+}
+.custom-button:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
+}
+.secondary-button {
+    background: #6c757d;
+}
+.success-button {
+    background: #28a745;
+}
+.danger-button {
+    background: #dc3545;
+}
+/* Loading animation */
+.loading-spinner {
+    display: inline-block;
+    width: 20px;
+    height: 20px;
+    border: 3px solid #f3f3f3;
+    border-top: 3px solid #667eea;
+    border-radius: 50%;
+    animation: spin 1s linear infinite;
+}
+@keyframes spin {
+    0% { transform: rotate(0deg); }
+    100% { transform: rotate(360deg); }
+}
+/* Progress bar */
+.progress-bar {
+    width: 100%;
+    height: 6px;
+    background: #e9ecef;
+    border-radius: 3px;
+    overflow: hidden;
+    margin: 1rem 0;
+}
+.progress-fill {
+    height: 100%;
+    background: linear-gradient(90deg, #667eea, #764ba2);
+    border-radius: 3px;
+    transition: width 0.3s ease;
+}
+/* Sidebar styling - Dark Theme */
+.sidebar-content {
+    background: #2d3748 !important;
+    padding: 1rem;
+    border-radius: 8px;
+    margin-bottom: 1rem;
+    border: 1px solid #4a5568;
+}
+.sidebar-section {
+    margin-bottom: 1.5rem;
+}
+.sidebar-section h4 {
+    color: #e9ecef !important;
+    margin-bottom: 0.5rem;
+    font-size: 1.1rem;
+}
+/* Form styling */
+.form-group {
+    margin-bottom: 1rem;
+}
+.form-label {
+    display: block;
+    margin-bottom: 0.5rem;
+    font-weight: 600;
+    color: #495057;
+}
+.form-input {
+    width: 100%;
+    padding: 0.75rem;
+    border: 1px solid #ced4da;
+    border-radius: 6px;
+    font-size: 1rem;
+    transition: border-color 0.3s ease;
+}
+.form-input:focus {
+    outline: none;
+    border-color: #667eea;
+    box-shadow: 0 0 0 2px rgba(102, 126, 234, 0.25);
+}
+/* Responsive design */
+@media (max-width: 768px) {
+    .app-header h1 {
+        font-size: 2rem;
+    }
+    .main-container {
+        padding: 10px;
+    }
+    .info-card {
+        padding: 1rem;
+    }
+    .chat-answer {
+        margin-left: 0.5rem;
+    }
+}
+/* Streamlit specific overrides - Dark Theme */
+/* Main app background */
+.stApp {
+    background-color: #1a1a1a !important;
+    color: #e9ecef !important;
+}
+.stApp > div {
+    background-color: #1a1a1a !important;
+}
+/* Main content area */
+.main .block-container {
+    background-color: #1a1a1a !important;
+    color: #e9ecef !important;
+}
+/* Sidebar styling */
+.css-1d391kg, .css-1lcbmhc, .css-1aumxhk {
+    background-color: #2d3748 !important;
+    color: #e9ecef !important;
+}
+/* Button styling */
+.stButton > button {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+    color: white !important;
+    border: none !important;
+    border-radius: 6px !important;
+    font-weight: 600 !important;
+    transition: all 0.3s ease !important;
+}
+.stButton > button:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.4) !important;
+    color: white !important;
+}
+/* Input fields */
+.stSelectbox > div > div {
+    border-radius: 6px !important;
+    background-color: #2d3748 !important;
+    color: #e9ecef !important;
+    border: 1px solid #4a5568 !important;
+}
+.stSelectbox label {
+    color: #e9ecef !important;
+}
+.stTextInput > div > div > input {
+    border-radius: 6px !important;
+    background-color: #2d3748 !important;
+    color: #e9ecef !important;
+    border: 1px solid #4a5568 !important;
+}
+.stTextInput label {
+    color: #e9ecef !important;
+}
+.stTextArea > div > div > textarea {
+    border-radius: 6px !important;
+    background-color: #2d3748 !important;
+    color: #e9ecef !important;
+    border: 1px solid #4a5568 !important;
+}
+.stTextArea label {
+    color: #e9ecef !important;
+}
+/* Success/Error message styling - Dark Theme */
+.stSuccess {
+    background: #1a2e1a !important;
+    border: 1px solid #28a745 !important;
+    border-radius: 6px !important;
+    color: #90ee90 !important;
+}
+.stSuccess p {
+    color: #90ee90 !important;
+}
+.stError {
+    background: #2e1a1a !important;
+    border: 1px solid #dc3545 !important;
+    border-radius: 6px !important;
+    color: #ffb3b3 !important;
+}
+.stError p {
+    color: #ffb3b3 !important;
+}
+.stWarning {
+    background: #2e2a1a !important;
+    border: 1px solid #ffc107 !important;
+    border-radius: 6px !important;
+    color: #ffe066 !important;
+}
+.stWarning p {
+    color: #ffe066 !important;
+}
+.stInfo {
+    background: #1a2a2e !important;
+    border: 1px solid #17a2b8 !important;
+    border-radius: 6px !important;
+    color: #66d9ef !important;
+}
+.stInfo p {
+    color: #66d9ef !important;
+}
+/* Additional dark theme overrides */
+.stMarkdown {
+    color: #e9ecef !important;
+}
+.stMarkdown p {
+    color: #e9ecef !important;
+}
+.stMarkdown h1, .stMarkdown h2, .stMarkdown h3, .stMarkdown h4, .stMarkdown h5, .stMarkdown h6 {
+    color: #e9ecef !important;
+}
+/* Expander styling */
+.streamlit-expanderHeader {
+    background-color: #2d3748 !important;
+    color: #e9ecef !important;
+    border: 1px solid #4a5568 !important;
+}
+.streamlit-expanderContent {
+    background-color: #2d3748 !important;
+    color: #e9ecef !important;
+    border: 1px solid #4a5568 !important;
+}
+/* Metric styling */
+.metric-container {
+    background-color: #2d3748 !important;
+    color: #e9ecef !important;
+}
+/* Code block styling */
+.stCode {
+    background-color: #2d3748 !important;
+    color: #e9ecef !important;
+    border: 1px solid #4a5568 !important;
+}
+/* DataFrame styling */
+.stDataFrame {
+    background-color: #2d3748 !important;
+    color: #e9ecef !important;
+}
+/* JSON display styling */
+.stJson {
+    background-color: #2d3748 !important;
+    color: #e9ecef !important;
+}
+/* Spinner styling */
+.stSpinner {
+    color: #667eea !important;
+}
+/* Progress bar styling */
+.stProgress .st-bo {
+    background-color: #667eea !important;
+}
+/* Custom classes for dark theme */
+.visible-text {
+    color: #e9ecef !important;
+    background-color: #2d3748 !important;
+    padding: 0.5rem !important;
+    border-radius: 4px !important;
+    border: 1px solid #4a5568 !important;
+}
+.high-contrast-text {
+    color: #ffffff !important;
+    background-color: #000000 !important;
+    font-weight: 600 !important;
+    padding: 0.5rem !important;
+    border-radius: 4px !important;
+}
+/* Override any remaining white backgrounds */
+div[data-testid="stSidebar"] {
+    background-color: #2d3748 !important;
+}
+div[data-testid="stSidebar"] > div {
+    background-color: #2d3748 !important;
+}
+.css-1lcbmhc {
+    background-color: #2d3748 !important;
+}
+.css-1d391kg {
+    background-color: #1a1a1a !important;
+}
+/* Force dark theme on all containers */
+.element-container {
+    background-color: transparent !important;
+    color: #e9ecef !important;
+}
+.stAlert {
+    color: #e9ecef !important;
+}

src/tests/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Tests package

src/tests/test_session_manager.py ADDED Viewed

	@@ -0,0 +1,272 @@

+"""
+Tests for session manager functionality.
+"""
+import unittest
+from unittest.mock import patch, MagicMock
+import sys
+import os
+from datetime import datetime
+# Add src to path for imports
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
+# Mock streamlit before importing session_manager
+sys.modules['streamlit'] = MagicMock()
+from src.utils.session_manager import SessionManager
+class TestSessionManager(unittest.TestCase):
+    """Test cases for SessionManager class."""
+    def setUp(self):
+        """Set up test fixtures."""
+        # Mock streamlit session_state
+        self.mock_st = MagicMock()
+        self.mock_st.session_state = {}
+        with patch('src.utils.session_manager.st', self.mock_st):
+            self.session_manager = SessionManager()
+    def test_initialization(self):
+        """Test SessionManager initialization."""
+        with patch('src.utils.session_manager.st', self.mock_st):
+            manager = SessionManager()
+            # Check that session state variables are initialized
+            expected_keys = [
+                'chat_history', 'processed_videos', 'current_video',
+                'qa_chain', 'vectorstore', 'video_metadata', 'conversation_id'
+            ]
+            for key in expected_keys:
+                self.assertIn(key, self.mock_st.session_state)
+    def test_generate_conversation_id(self):
+        """Test conversation ID generation."""
+        conv_id = self.session_manager.generate_conversation_id()
+        self.assertIsInstance(conv_id, str)
+        self.assertTrue(conv_id.startswith('conv_'))
+        self.assertEqual(len(conv_id), 19)  # conv_ + YYYYMMDD_HHMMSS
+    def test_add_to_chat_history(self):
+        """Test adding entries to chat history."""
+        with patch('src.utils.session_manager.st', self.mock_st):
+            self.mock_st.session_state = {
+                'chat_history': [],
+                'conversation_id': 'test_conv_123'
+            }
+            manager = SessionManager()
+            question = "What is this about?"
+            answer = "This is a test answer."
+            video_id = "test_video_123"
+            manager.add_to_chat_history(question, answer, video_id)
+            self.assertEqual(len(self.mock_st.session_state['chat_history']), 1)
+            entry = self.mock_st.session_state['chat_history'][0]
+            self.assertEqual(entry['question'], question)
+            self.assertEqual(entry['answer'], answer)
+            self.assertEqual(entry['video_id'], video_id)
+            self.assertEqual(entry['conversation_id'], 'test_conv_123')
+            self.assertIn('timestamp', entry)
+    def test_get_chat_history_all(self):
+        """Test getting all chat history."""
+        with patch('src.utils.session_manager.st', self.mock_st):
+            test_history = [
+                {'question': 'Q1', 'answer': 'A1', 'video_id': 'vid1'},
+                {'question': 'Q2', 'answer': 'A2', 'video_id': 'vid2'}
+            ]
+            self.mock_st.session_state = {'chat_history': test_history}
+            manager = SessionManager()
+            history = manager.get_chat_history()
+            self.assertEqual(history, test_history)
+    def test_get_chat_history_filtered(self):
+        """Test getting filtered chat history by video ID."""
+        with patch('src.utils.session_manager.st', self.mock_st):
+            test_history = [
+                {'question': 'Q1', 'answer': 'A1', 'video_id': 'vid1'},
+                {'question': 'Q2', 'answer': 'A2', 'video_id': 'vid2'},
+                {'question': 'Q3', 'answer': 'A3', 'video_id': 'vid1'}
+            ]
+            self.mock_st.session_state = {'chat_history': test_history}
+            manager = SessionManager()
+            history = manager.get_chat_history('vid1')
+            self.assertEqual(len(history), 2)
+            self.assertEqual(history[0]['video_id'], 'vid1')
+            self.assertEqual(history[1]['video_id'], 'vid1')
+    def test_clear_chat_history_all(self):
+        """Test clearing all chat history."""
+        with patch('src.utils.session_manager.st', self.mock_st):
+            test_history = [
+                {'question': 'Q1', 'answer': 'A1', 'video_id': 'vid1'},
+                {'question': 'Q2', 'answer': 'A2', 'video_id': 'vid2'}
+            ]
+            self.mock_st.session_state = {'chat_history': test_history}
+            manager = SessionManager()
+            manager.clear_chat_history()
+            self.assertEqual(self.mock_st.session_state['chat_history'], [])
+    def test_clear_chat_history_filtered(self):
+        """Test clearing chat history for specific video."""
+        with patch('src.utils.session_manager.st', self.mock_st):
+            test_history = [
+                {'question': 'Q1', 'answer': 'A1', 'video_id': 'vid1'},
+                {'question': 'Q2', 'answer': 'A2', 'video_id': 'vid2'},
+                {'question': 'Q3', 'answer': 'A3', 'video_id': 'vid1'}
+            ]
+            self.mock_st.session_state = {'chat_history': test_history}
+            manager = SessionManager()
+            manager.clear_chat_history('vid1')
+            remaining_history = self.mock_st.session_state['chat_history']
+            self.assertEqual(len(remaining_history), 1)
+            self.assertEqual(remaining_history[0]['video_id'], 'vid2')
+    def test_save_processed_video(self):
+        """Test saving processed video information."""
+        with patch('src.utils.session_manager.st', self.mock_st):
+            self.mock_st.session_state = {
+                'processed_videos': {},
+                'conversation_id': 'test_conv_123'
+            }
+            manager = SessionManager()
+            video_url = "https://youtube.com/watch?v=test123"
+            video_id = "test123"
+            metadata = {"title": "Test Video", "author": "Test Author"}
+            transcript = "This is a test transcript."
+            qa_chain = MagicMock()
+            vectorstore = MagicMock()
+            manager.save_processed_video(
+                video_url, video_id, metadata, transcript, qa_chain, vectorstore
+            )
+            # Check processed_videos
+            self.assertIn(video_id, self.mock_st.session_state['processed_videos'])
+            saved_video = self.mock_st.session_state['processed_videos'][video_id]
+            self.assertEqual(saved_video['url'], video_url)
+            self.assertEqual(saved_video['metadata'], metadata)
+            self.assertEqual(saved_video['transcript'], transcript)
+            self.assertEqual(saved_video['conversation_id'], 'test_conv_123')
+            self.assertIn('processed_at', saved_video)
+            # Check current session state
+            self.assertEqual(self.mock_st.session_state['current_video'], video_id)
+            self.assertEqual(self.mock_st.session_state['qa_chain'], qa_chain)
+            self.assertEqual(self.mock_st.session_state['vectorstore'], vectorstore)
+            self.assertEqual(self.mock_st.session_state['video_metadata'], metadata)
+    def test_get_processed_videos(self):
+        """Test getting processed videos."""
+        with patch('src.utils.session_manager.st', self.mock_st):
+            test_videos = {
+                'vid1': {'title': 'Video 1'},
+                'vid2': {'title': 'Video 2'}
+            }
+            self.mock_st.session_state = {'processed_videos': test_videos}
+            manager = SessionManager()
+            videos = manager.get_processed_videos()
+            self.assertEqual(videos, test_videos)
+    def test_switch_to_video_success(self):
+        """Test successful video switching."""
+        with patch('src.utils.session_manager.st', self.mock_st):
+            test_videos = {
+                'vid1': {'title': 'Video 1'},
+                'vid2': {'title': 'Video 2'}
+            }
+            self.mock_st.session_state = {'processed_videos': test_videos}
+            manager = SessionManager()
+            result = manager.switch_to_video('vid1')
+            self.assertTrue(result)
+            self.assertEqual(self.mock_st.session_state['current_video'], 'vid1')
+    def test_switch_to_video_failure(self):
+        """Test video switching failure."""
+        with patch('src.utils.session_manager.st', self.mock_st):
+            self.mock_st.session_state = {'processed_videos': {}}
+            manager = SessionManager()
+            result = manager.switch_to_video('nonexistent_vid')
+            self.assertFalse(result)
+    def test_export_chat_history_json(self):
+        """Test exporting chat history as JSON."""
+        with patch('src.utils.session_manager.st', self.mock_st):
+            test_history = [
+                {'question': 'Q1', 'answer': 'A1', 'timestamp': '2024-01-01T12:00:00'}
+            ]
+            self.mock_st.session_state = {'chat_history': test_history}
+            manager = SessionManager()
+            result = manager.export_chat_history('json')
+            self.assertIsInstance(result, str)
+            self.assertIn('Q1', result)
+            self.assertIn('A1', result)
+    def test_export_chat_history_txt(self):
+        """Test exporting chat history as text."""
+        with patch('src.utils.session_manager.st', self.mock_st):
+            test_history = [
+                {'question': 'Q1', 'answer': 'A1', 'timestamp': '2024-01-01T12:00:00'}
+            ]
+            self.mock_st.session_state = {'chat_history': test_history}
+            manager = SessionManager()
+            result = manager.export_chat_history('txt')
+            self.assertIsInstance(result, str)
+            self.assertIn('Question: Q1', result)
+            self.assertIn('Answer: A1', result)
+    def test_get_session_stats(self):
+        """Test getting session statistics."""
+        with patch('src.utils.session_manager.st', self.mock_st):
+            test_history = [
+                {'question': 'Q1', 'timestamp': '2024-01-01T12:00:00'},
+                {'question': 'Q2', 'timestamp': '2024-01-01T13:00:00'}
+            ]
+            test_videos = {'vid1': {}, 'vid2': {}}
+            self.mock_st.session_state = {
+                'chat_history': test_history,
+                'processed_videos': test_videos,
+                'current_video': 'vid1',
+                'conversation_id': 'test_conv_123'
+            }
+            manager = SessionManager()
+            stats = manager.get_session_stats()
+            self.assertEqual(stats['total_questions'], 2)
+            self.assertEqual(stats['processed_videos'], 2)
+            self.assertEqual(stats['current_video'], 'vid1')
+            self.assertEqual(stats['conversation_id'], 'test_conv_123')
+            self.assertIn('session_start', stats)
+if __name__ == '__main__':
+    unittest.main()

src/tests/test_text_processor.py ADDED Viewed

	@@ -0,0 +1,217 @@

+"""
+Tests for text processor functionality.
+"""
+import unittest
+from unittest.mock import patch, MagicMock
+import sys
+import os
+# Add src to path for imports
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
+from src.utils.text_processor import TextProcessor
+class TestTextProcessor(unittest.TestCase):
+    """Test cases for TextProcessor class."""
+    def setUp(self):
+        """Set up test fixtures."""
+        self.api_key = "test_api_key"
+        self.processor = TextProcessor(self.api_key)
+    def test_initialization(self):
+        """Test TextProcessor initialization."""
+        self.assertEqual(self.processor.openai_api_key, self.api_key)
+        self.assertIsNotNone(self.processor.embeddings)
+        self.assertIsNotNone(self.processor.llm)
+    def test_create_documents_from_text(self):
+        """Test document creation from text."""
+        text = "This is a test transcript. It has multiple sentences."
+        metadata = {"video_id": "test123", "title": "Test Video"}
+        documents = self.processor.create_documents_from_text(text, metadata)
+        self.assertIsInstance(documents, list)
+        self.assertGreater(len(documents), 0)
+        # Check first document
+        first_doc = documents[0]
+        self.assertIn("test transcript", first_doc.page_content.lower())
+        self.assertEqual(first_doc.metadata["video_id"], "test123")
+        self.assertEqual(first_doc.metadata["title"], "Test Video")
+    def test_create_documents_from_text_no_metadata(self):
+        """Test document creation without metadata."""
+        text = "Simple test text."
+        documents = self.processor.create_documents_from_text(text)
+        self.assertIsInstance(documents, list)
+        self.assertGreater(len(documents), 0)
+        self.assertEqual(documents[0].metadata, {})
+    @patch('src.utils.text_processor.FAISS')
+    def test_create_vector_store_success(self, mock_faiss):
+        """Test successful vector store creation."""
+        mock_vectorstore = MagicMock()
+        mock_faiss.from_documents.return_value = mock_vectorstore
+        documents = [MagicMock()]
+        result = self.processor.create_vector_store(documents)
+        self.assertEqual(result, mock_vectorstore)
+        mock_faiss.from_documents.assert_called_once_with(documents, self.processor.embeddings)
+    def test_create_vector_store_empty_documents(self):
+        """Test vector store creation with empty documents."""
+        result = self.processor.create_vector_store([])
+        self.assertIsNone(result)
+    @patch('src.utils.text_processor.FAISS')
+    def test_create_vector_store_failure(self, mock_faiss):
+        """Test vector store creation failure."""
+        mock_faiss.from_documents.side_effect = Exception("Test error")
+        documents = [MagicMock()]
+        result = self.processor.create_vector_store(documents)
+        self.assertIsNone(result)
+    @patch('src.utils.text_processor.RetrievalQA')
+    def test_create_qa_chain_success(self, mock_retrieval_qa):
+        """Test successful QA chain creation."""
+        mock_qa_chain = MagicMock()
+        mock_retrieval_qa.from_chain_type.return_value = mock_qa_chain
+        mock_vectorstore = MagicMock()
+        mock_retriever = MagicMock()
+        mock_vectorstore.as_retriever.return_value = mock_retriever
+        result = self.processor.create_qa_chain(mock_vectorstore)
+        self.assertEqual(result, mock_qa_chain)
+        mock_vectorstore.as_retriever.assert_called_once()
+        mock_retrieval_qa.from_chain_type.assert_called_once()
+    @patch('src.utils.text_processor.RetrievalQA')
+    def test_create_qa_chain_failure(self, mock_retrieval_qa):
+        """Test QA chain creation failure."""
+        mock_retrieval_qa.from_chain_type.side_effect = Exception("Test error")
+        mock_vectorstore = MagicMock()
+        result = self.processor.create_qa_chain(mock_vectorstore)
+        self.assertIsNone(result)
+    def test_ask_question_success(self):
+        """Test successful question asking."""
+        mock_qa_chain = MagicMock()
+        mock_qa_chain.return_value = {
+            'result': 'Test answer',
+            'source_documents': [MagicMock()]
+        }
+        question = "What is this about?"
+        result = self.processor.ask_question(mock_qa_chain, question)
+        self.assertTrue(result['success'])
+        self.assertEqual(result['answer'], 'Test answer')
+        self.assertIsNotNone(result['source_documents'])
+        self.assertIsNone(result['error'])
+        mock_qa_chain.assert_called_once_with({"query": question})
+    def test_ask_question_failure(self):
+        """Test question asking failure."""
+        mock_qa_chain = MagicMock()
+        mock_qa_chain.side_effect = Exception("Test error")
+        question = "What is this about?"
+        result = self.processor.ask_question(mock_qa_chain, question)
+        self.assertFalse(result['success'])
+        self.assertIsNone(result['answer'])
+        self.assertEqual(result['source_documents'], [])
+        self.assertIsNotNone(result['error'])
+    @patch.object(TextProcessor, 'create_qa_chain')
+    @patch.object(TextProcessor, 'create_vector_store')
+    @patch.object(TextProcessor, 'create_documents_from_text')
+    def test_process_transcript_success(self, mock_create_docs, mock_create_vs, mock_create_qa):
+        """Test successful transcript processing."""
+        # Setup mocks
+        mock_documents = [MagicMock()]
+        mock_vectorstore = MagicMock()
+        mock_qa_chain = MagicMock()
+        mock_create_docs.return_value = mock_documents
+        mock_create_vs.return_value = mock_vectorstore
+        mock_create_qa.return_value = mock_qa_chain
+        transcript_text = "Test transcript text"
+        metadata = {"video_id": "test123"}
+        result = self.processor.process_transcript(transcript_text, metadata)
+        self.assertTrue(result['success'])
+        self.assertEqual(result['qa_chain'], mock_qa_chain)
+        self.assertEqual(result['vectorstore'], mock_vectorstore)
+        self.assertEqual(result['documents'], mock_documents)
+        self.assertIsNone(result['error'])
+        mock_create_docs.assert_called_once_with(transcript_text, metadata)
+        mock_create_vs.assert_called_once_with(mock_documents)
+        mock_create_qa.assert_called_once_with(mock_vectorstore)
+    @patch.object(TextProcessor, 'create_documents_from_text')
+    def test_process_transcript_document_creation_failure(self, mock_create_docs):
+        """Test transcript processing with document creation failure."""
+        mock_create_docs.return_value = []
+        transcript_text = "Test transcript text"
+        result = self.processor.process_transcript(transcript_text)
+        self.assertFalse(result['success'])
+        self.assertIsNone(result['qa_chain'])
+        self.assertIsNone(result['vectorstore'])
+        self.assertIsNone(result['documents'])
+        self.assertEqual(result['error'], "Failed to create documents from transcript")
+    @patch.object(TextProcessor, 'create_vector_store')
+    @patch.object(TextProcessor, 'create_documents_from_text')
+    def test_process_transcript_vectorstore_creation_failure(self, mock_create_docs, mock_create_vs):
+        """Test transcript processing with vector store creation failure."""
+        mock_create_docs.return_value = [MagicMock()]
+        mock_create_vs.return_value = None
+        transcript_text = "Test transcript text"
+        result = self.processor.process_transcript(transcript_text)
+        self.assertFalse(result['success'])
+        self.assertIsNone(result['qa_chain'])
+        self.assertIsNone(result['vectorstore'])
+        self.assertIsNotNone(result['documents'])
+        self.assertEqual(result['error'], "Failed to create vector store")
+    @patch.object(TextProcessor, 'create_qa_chain')
+    @patch.object(TextProcessor, 'create_vector_store')
+    @patch.object(TextProcessor, 'create_documents_from_text')
+    def test_process_transcript_qa_chain_creation_failure(self, mock_create_docs, mock_create_vs, mock_create_qa):
+        """Test transcript processing with QA chain creation failure."""
+        mock_create_docs.return_value = [MagicMock()]
+        mock_create_vs.return_value = MagicMock()
+        mock_create_qa.return_value = None
+        transcript_text = "Test transcript text"
+        result = self.processor.process_transcript(transcript_text)
+        self.assertFalse(result['success'])
+        self.assertIsNone(result['qa_chain'])
+        self.assertIsNotNone(result['vectorstore'])
+        self.assertIsNotNone(result['documents'])
+        self.assertEqual(result['error'], "Failed to create QA chain")
+if __name__ == '__main__':
+    unittest.main()

src/tests/test_youtube_handler.py ADDED Viewed

	@@ -0,0 +1,115 @@

+"""
+Tests for YouTube handler functionality.
+"""
+import unittest
+from unittest.mock import patch, MagicMock
+import sys
+import os
+# Add src to path for imports
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
+from src.utils.youtube_handler import YouTubeHandler
+class TestYouTubeHandler(unittest.TestCase):
+    """Test cases for YouTubeHandler class."""
+    def setUp(self):
+        """Set up test fixtures."""
+        self.handler = YouTubeHandler()
+    def test_validate_youtube_url_valid(self):
+        """Test URL validation with valid URLs."""
+        valid_urls = [
+            "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
+            "https://youtu.be/dQw4w9WgXcQ",
+            "http://youtube.com/watch?v=dQw4w9WgXcQ",
+            "https://m.youtube.com/watch?v=dQw4w9WgXcQ"
+        ]
+        for url in valid_urls:
+            with self.subTest(url=url):
+                self.assertTrue(self.handler.validate_youtube_url(url))
+    def test_validate_youtube_url_invalid(self):
+        """Test URL validation with invalid URLs."""
+        invalid_urls = [
+            "https://www.google.com",
+            "not_a_url",
+            "https://vimeo.com/123456",
+            "",
+            None
+        ]
+        for url in invalid_urls:
+            with self.subTest(url=url):
+                if url is not None:
+                    self.assertFalse(self.handler.validate_youtube_url(url))
+    @patch('src.utils.youtube_handler.YouTube')
+    def test_extract_video_id_success(self, mock_youtube):
+        """Test successful video ID extraction."""
+        mock_yt = MagicMock()
+        mock_yt.video_id = "dQw4w9WgXcQ"
+        mock_youtube.return_value = mock_yt
+        video_id = self.handler.extract_video_id("https://www.youtube.com/watch?v=dQw4w9WgXcQ")
+        self.assertEqual(video_id, "dQw4w9WgXcQ")
+    @patch('src.utils.youtube_handler.YouTube')
+    def test_extract_video_id_failure(self, mock_youtube):
+        """Test video ID extraction failure."""
+        mock_youtube.side_effect = Exception("Invalid URL")
+        video_id = self.handler.extract_video_id("invalid_url")
+        self.assertIsNone(video_id)
+    @patch('src.utils.youtube_handler.YouTube')
+    def test_get_video_metadata_success(self, mock_youtube):
+        """Test successful video metadata retrieval."""
+        mock_yt = MagicMock()
+        mock_yt.title = "Test Video"
+        mock_yt.author = "Test Author"
+        mock_yt.length = 300
+        mock_yt.views = 1000
+        mock_yt.video_id = "dQw4w9WgXcQ"
+        mock_youtube.return_value = mock_yt
+        metadata = self.handler.get_video_metadata("https://www.youtube.com/watch?v=dQw4w9WgXcQ")
+        self.assertEqual(metadata['title'], "Test Video")
+        self.assertEqual(metadata['author'], "Test Author")
+        self.assertEqual(metadata['length'], 300)
+        self.assertEqual(metadata['views'], 1000)
+        self.assertEqual(metadata['video_id'], "dQw4w9WgXcQ")
+    @patch('src.utils.youtube_handler.YouTube')
+    def test_get_video_metadata_failure(self, mock_youtube):
+        """Test video metadata retrieval failure."""
+        mock_youtube.side_effect = Exception("Network error")
+        metadata = self.handler.get_video_metadata("https://www.youtube.com/watch?v=dQw4w9WgXcQ")
+        self.assertEqual(metadata, {})
+    def test_save_transcript_to_file(self):
+        """Test transcript file saving."""
+        test_text = "This is a test transcript."
+        test_file = "test_transcript.txt"
+        try:
+            result = self.handler.save_transcript_to_file(test_text, test_file)
+            self.assertTrue(result)
+            # Verify file was created and contains correct content
+            with open(test_file, 'r', encoding='utf-8') as f:
+                content = f.read()
+            self.assertEqual(content, test_text)
+        finally:
+            # Clean up
+            if os.path.exists(test_file):
+                os.remove(test_file)
+if __name__ == '__main__':
+    unittest.main()