Spaces:

midlajvalappil
/

AI-Powered-YouTube-Transcript-Tutor

Sleeping

App Files Files Community

midlajvalappil commited on Jul 17, 2025

Commit

7086bdb

verified ·

1 Parent(s): 1697da9

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +597 -35

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,602 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+"""
+AI-Powered YouTube Transcript Tutor - Main Streamlit Application
+Enhanced version with modern UI, error handling, and extended functionality.
+"""
+import os
+import sys
 import streamlit as st
+from datetime import datetime
+import time
+# Add src to path for imports
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
+# Import custom modules
+from src.utils.youtube_handler import YouTubeHandler
+from src.utils.text_processor import TextProcessor
+from src.utils.session_manager import SessionManager
+from src.utils.export_utils import ExportUtils
+from src.utils.logger import setup_logging
+from config.settings import settings
+# Load environment variables
+from dotenv import load_dotenv
+load_dotenv()
+# Setup logging
+logger = setup_logging(
+    log_level=settings.get('logging.level', 'INFO'),
+    log_file=settings.get('logging.file')
+)
+class YouTubeChatbotApp:
+    """Main application class for YouTube Transcript Chatbot."""
+    def __init__(self):
+        """Initialize the application."""
+        self.setup_page_config()
+        self.load_custom_css()
+        self.initialize_components()
+    def setup_page_config(self):
+        """Configure Streamlit page settings."""
+        app_config = settings.get_app_config()
+        st.set_page_config(
+            page_title=app_config.get('title', 'YouTube Transcript Tutor'),
+            page_icon="🎓",
+            layout="wide",
+            initial_sidebar_state="expanded"
+        )
+    def load_custom_css(self):
+        """Load custom CSS styling with dark theme."""
+        try:
+            with open('static/style.css', 'r') as f:
+                css_content = f.read()
+                st.markdown(f'<style>{css_content}</style>', unsafe_allow_html=True)
+                logger.info("Custom dark theme CSS loaded successfully")
+        except FileNotFoundError:
+            logger.warning("Custom CSS file not found, using fallback dark theme")
+            # Fallback CSS for dark theme
+            fallback_css = """
+            <style>
+            .stApp {
+                background-color: #1a1a1a !important;
+                color: #e9ecef !important;
+            }
+            .stMarkdown, .stText {
+                color: #e9ecef !important;
+            }
+            .stButton > button {
+                background-color: #667eea !important;
+                color: white !important;
+            }
+            body, html {
+                background-color: #1a1a1a !important;
+                color: #e9ecef !important;
+            }
+            div[data-testid="stSidebar"] {
+                background-color: #2d3748 !important;
+            }
+            </style>
+            """
+            st.markdown(fallback_css, unsafe_allow_html=True)
+    def initialize_components(self):
+        """Initialize application components."""
+        # Check for OpenAI API key
+        self.openai_api_key = settings.get_openai_api_key()
+        if not self.openai_api_key:
+            st.error("⚠️ OpenAI API key not found. Please set OPENAI_API_KEY environment variable.")
+            st.stop()
+        # Initialize components
+        self.youtube_handler = YouTubeHandler()
+        self.text_processor = TextProcessor(self.openai_api_key)
+        self.session_manager = SessionManager()
+        self.export_utils = ExportUtils()
+    def render_header(self):
+        """Render application header."""
+        app_config = settings.get_app_config()
+        st.markdown(f"""
+        <div class="app-header">
+            <h1>🎓 {app_config.get('title', 'AI-Powered YouTube Transcript Tutor')}</h1>
+            <p>{app_config.get('description', 'Ask questions from YouTube lecture transcripts using AI')}</p>
+        </div>
+        """, unsafe_allow_html=True)
+    def render_sidebar(self):
+        """Render sidebar with navigation and controls."""
+        with st.sidebar:
+            st.markdown("### 📋 Navigation")
+            # Session statistics
+            stats = self.session_manager.get_session_stats()
+            st.markdown(f"""
+            <div class="sidebar-content">
+                <h4>📊 Session Stats</h4>
+                <div class="metadata-item">
+                    <span class="metadata-label">Questions Asked:</span>
+                    <span class="metadata-value">{stats['total_questions']}</span>
+                </div>
+                <div class="metadata-item">
+                    <span class="metadata-label">Videos Processed:</span>
+                    <span class="metadata-value">{stats['processed_videos']}</span>
+                </div>
+            </div>
+            """, unsafe_allow_html=True)
+            # Processed videos
+            processed_videos = self.session_manager.get_processed_videos()
+            if processed_videos:
+                st.markdown("### 📹 Processed Videos")
+                for video_id, video_info in processed_videos.items():
+                    title = video_info['metadata'].get('title', 'Unknown Title')[:50] + "..."
+                    if st.button(f"📺 {title}", key=f"video_{video_id}"):
+                        self.session_manager.switch_to_video(video_id)
+                        st.rerun()
+            # Export options
+            if st.session_state.chat_history:
+                st.markdown("### 📤 Export Options")
+                export_format = st.selectbox(
+                    "Export Format",
+                    ["PDF", "Text", "JSON"],
+                    key="export_format"
+                )
+                if st.button("📥 Export Chat History"):
+                    self.export_chat_history(export_format.lower())
+            # Settings
+            st.markdown("### ⚙️ Settings")
+            # Language selection
+            processing_config = settings.get_processing_config()
+            supported_languages = processing_config.get('supported_languages', ['en'])
+            default_language = processing_config.get('default_language', 'en')
+            selected_language = st.selectbox(
+                "Transcript Language",
+                supported_languages,
+                index=supported_languages.index(default_language) if default_language in supported_languages else 0,
+                key="transcript_language"
+            )
+            # Clear history button
+            if st.button("🗑️ Clear Chat History", type="secondary"):
+                self.session_manager.clear_chat_history()
+                st.success("Chat history cleared!")
+                st.rerun()
+            # Working video examples
+            st.markdown("### 🎯 Example Videos")
+            st.markdown("Try these videos that usually work:")
+            example_videos = {
+                "🧮 Neural Networks": "https://www.youtube.com/watch?v=aircAruvnKk",
+                "📚 Khan Academy": "https://www.youtube.com/watch?v=WUvTyaaNkzM",
+                "🎓 TED-Ed": "https://www.youtube.com/watch?v=kBdfcR-8hEY"
+            }
+            for title, url in example_videos.items():
+                if st.button(title, key=f"example_{title}"):
+                    st.session_state.video_url = url
+                    st.rerun()
+            # Troubleshooting section
+            st.markdown("### 🔧 Troubleshooting")
+            with st.expander("Common Issues & Solutions"):
+                st.markdown("""
+                **"Could not retrieve transcript":**
+                - Video may be region-restricted
+                - Try videos from educational channels
+                - Ensure video has captions enabled
+                **"No transcript available":**
+                - Video doesn't have captions
+                - Try auto-generated captions videos
+                - Look for educational content
+                **"Video unavailable":**
+                - Video may be private/deleted
+                - Check the URL is correct
+                - Try a different video
+                """)
+    def render_video_input_section(self):
+        """Render video input and processing section."""
+        st.markdown("### 🎬 Video Processing")
+        col1, col2 = st.columns([3, 1])
+        with col1:
+            video_url = st.text_input(
+                "Enter YouTube Video URL",
+                placeholder="https://www.youtube.com/watch?v=...",
+                help="Paste a YouTube video URL to extract and process its transcript"
+            )
+        with col2:
+            st.markdown("<br>", unsafe_allow_html=True)  # Add spacing
+            process_button = st.button("🚀 Process Video", type="primary")
+        if process_button and video_url:
+            self.process_video(video_url)
+        elif process_button and not video_url:
+            st.warning("⚠️ Please enter a valid YouTube URL.")
+    def process_video(self, video_url: str):
+        """
+        Process YouTube video and create QA chain.
+        Args:
+            video_url (str): YouTube video URL
+        """
+        # Validate URL
+        if not self.youtube_handler.validate_youtube_url(video_url):
+            st.error("❌ Invalid YouTube URL format. Please check the URL and try again.")
+            return
+        # Create progress indicators
+        progress_bar = st.progress(0)
+        status_text = st.empty()
+        try:
+            # Step 1: Extract transcript
+            status_text.text("🔍 Extracting video transcript...")
+            progress_bar.progress(25)
+            language = st.session_state.get('transcript_language', 'en')
+            transcript_result = self.youtube_handler.get_youtube_transcript(video_url, language)
+            if not transcript_result['success']:
+                error_msg = transcript_result['error']
+                st.error(f"❌ {error_msg}")
+                # Provide specific suggestions based on error type
+                if "ip blocked" in error_msg.lower() or "cloud provider" in error_msg.lower():
+                    st.warning("🚫 **YouTube has temporarily blocked your IP address**")
+                    st.info("💡 **How to fix this:**")
+                    st.markdown("""
+                    **Immediate solutions:**
+                    - ⏰ **Wait 10-15 minutes** before trying again
+                    - 🌐 **Try a different network** (mobile hotspot, different WiFi)
+                    - 🔄 **Restart your router** to get a new IP address
+                    **Why this happens:**
+                    - Too many requests to YouTube in a short time
+                    - Using cloud services (AWS, Google Cloud, etc.)
+                    - YouTube's anti-bot protection
+                    **Prevention:**
+                    - Wait between video processing attempts
+                    - Don't process multiple videos rapidly
+                    """)
+                    # Show a countdown timer suggestion
+                    st.info("⏱️ **Recommended:** Wait 15 minutes, then try one of the example videos below.")
+                elif "rate limited" in error_msg.lower() or "too many requests" in error_msg.lower():
+                    st.warning("⚡ **Rate Limited: Too many requests**")
+                    st.info("💡 **Solution:** Wait 5-10 minutes before trying again.")
+                elif "region" in error_msg.lower():
+                    st.info("💡 **Suggestions to fix this issue:**")
+                    st.markdown("""
+                    - Try a different video that's available in your region
+                    - Look for videos from creators in your country
+                    - Try educational channels like Khan Academy, Coursera, or TED-Ed
+                    - Some videos may work better than others depending on regional settings
+                    """)
+                elif "private" in error_msg.lower():
+                    st.info("💡 **This video is private.** Try a public video instead.")
+                elif "disabled" in error_msg.lower():
+                    st.info("💡 **Captions are disabled for this video.** Try finding a video with captions enabled.")
+                elif "unavailable" in error_msg.lower():
+                    st.info("💡 **This video is unavailable.** It may have been deleted or made private.")
+                else:
+                    st.info("💡 **Try these alternatives:**")
+                    st.markdown("""
+                    - Make sure the video is public and has captions
+                    - Try a different YouTube video
+                    - Look for educational content which usually has transcripts
+                    - Check if the video URL is correct
+                    """)
+                # Show some example working videos
+                st.markdown("### 🎯 **Try these example videos that usually work:**")
+                example_videos = [
+                    "https://www.youtube.com/watch?v=aircAruvnKk",  # 3Blue1Brown
+                    "https://www.youtube.com/watch?v=WUvTyaaNkzM",  # Khan Academy
+                    "https://www.youtube.com/watch?v=kBdfcR-8hEY",  # TED-Ed
+                ]
+                for i, example_url in enumerate(example_videos, 1):
+                    if st.button(f"📺 Try Example Video {i}", key=f"example_{i}"):
+                        st.session_state.video_url = example_url
+                        st.rerun()
+                return
+            # Step 2: Display video metadata
+            progress_bar.progress(50)
+            status_text.text("📊 Processing video metadata...")
+            metadata = transcript_result['metadata']
+            if metadata:
+                self.display_video_metadata(metadata)
+            # Step 3: Process transcript
+            progress_bar.progress(75)
+            status_text.text("🧠 Creating AI knowledge base...")
+            processing_result = self.text_processor.process_transcript(
+                transcript_result['transcript'],
+                metadata
+            )
+            if not processing_result['success']:
+                st.error(f"❌ {processing_result['error']}")
+                return
+            # Step 4: Save to session
+            progress_bar.progress(100)
+            status_text.text("✅ Video processed successfully!")
+            video_id = metadata.get('video_id', 'unknown')
+            self.session_manager.save_processed_video(
+                video_url,
+                video_id,
+                metadata,
+                transcript_result['transcript'],
+                processing_result['qa_chain'],
+                processing_result['vectorstore']
+            )
+            # Success message
+            st.success("🎉 Video processed successfully! You can now ask questions about the content.")
+            # Show transcript download option
+            if st.button("📥 Download Transcript"):
+                self.download_transcript(transcript_result['transcript'], metadata)
+        except Exception as e:
+            logger.error(f"Error processing video: {e}")
+            st.error(f"❌ An unexpected error occurred: {str(e)}")
+        finally:
+            progress_bar.empty()
+            status_text.empty()
+    def display_video_metadata(self, metadata: dict):
+        """
+        Display video metadata in a formatted card.
+        Args:
+            metadata (dict): Video metadata
+        """
+        st.markdown(f"""
+        <div class="video-metadata" style="background-color: #2d3748 !important; border: 1px solid #4a5568 !important; color: #e9ecef !important;">
+            <h4 style="color: #e9ecef !important;">📹 Video Information</h4>
+            <div class="metadata-item">
+                <span class="metadata-label" style="color: #a0aec0 !important;">Title:</span>
+                <span class="metadata-value" style="color: #e9ecef !important;">{metadata.get('title', 'N/A')}</span>
+            </div>
+            <div class="metadata-item">
+                <span class="metadata-label" style="color: #a0aec0 !important;">Author:</span>
+                <span class="metadata-value" style="color: #e9ecef !important;">{metadata.get('author', 'N/A')}</span>
+            </div>
+            <div class="metadata-item">
+                <span class="metadata-label" style="color: #a0aec0 !important;">Duration:</span>
+                <span class="metadata-value" style="color: #e9ecef !important;">{self.format_duration(metadata.get('length', 0))}</span>
+            </div>
+            <div class="metadata-item">
+                <span class="metadata-label" style="color: #a0aec0 !important;">Views:</span>
+                <span class="metadata-value" style="color: #e9ecef !important;">{metadata.get('views', 'N/A'):,}</span>
+            </div>
+        </div>
+        """, unsafe_allow_html=True)
+    def format_duration(self, seconds: int) -> str:
+        """Format duration from seconds to HH:MM:SS."""
+        if not seconds:
+            return "N/A"
+        hours = seconds // 3600
+        minutes = (seconds % 3600) // 60
+        seconds = seconds % 60
+        if hours > 0:
+            return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
+        else:
+            return f"{minutes:02d}:{seconds:02d}"
+    def render_qa_section(self):
+        """Render question and answer section."""
+        if 'qa_chain' not in st.session_state or st.session_state.qa_chain is None:
+            st.info("👆 Please process a YouTube video first to start asking questions.")
+            return
+        st.markdown("### 💬 Ask Questions")
+        # Question input
+        col1, col2 = st.columns([4, 1])
+        with col1:
+            user_question = st.text_input(
+                "Your Question",
+                placeholder="Ask anything about the video content...",
+                key="user_question"
+            )
+        with col2:
+            st.markdown("<br>", unsafe_allow_html=True)
+            ask_button = st.button("🤔 Ask", type="primary")
+        if ask_button and user_question:
+            self.process_question(user_question)
+        elif ask_button and not user_question:
+            st.warning("⚠️ Please enter a question.")
+    def process_question(self, question: str):
+        """
+        Process user question and generate answer.
+        Args:
+            question (str): User question
+        """
+        with st.spinner("🤔 Thinking..."):
+            try:
+                result = self.text_processor.ask_question(st.session_state.qa_chain, question)
+                if result['success']:
+                    # Display answer with dark theme
+                    st.markdown("### 💡 Answer")
+                    st.markdown(f"""
+                    <div class="info-card" style="background: #2d3748 !important; border: 1px solid #4a5568 !important; color: #e9ecef !important;">
+                        <p style="color: #e9ecef !important; margin: 0 !important; line-height: 1.6 !important;">{result['answer']}</p>
+                    </div>
+                    """, unsafe_allow_html=True)
+                    # Add to chat history
+                    video_id = st.session_state.get('current_video')
+                    self.session_manager.add_to_chat_history(
+                        question,
+                        result['answer'],
+                        video_id,
+                        result.get('source_documents', [])
+                    )
+                    # Show source documents if available
+                    if result.get('source_documents'):
+                        with st.expander("📚 Source References"):
+                            for i, doc in enumerate(result['source_documents'], 1):
+                                st.markdown(f"**Reference {i}:**")
+                                st.text(doc.page_content[:300] + "..." if len(doc.page_content) > 300 else doc.page_content)
+                else:
+                    st.error(f"❌ {result['error']}")
+            except Exception as e:
+                logger.error(f"Error processing question: {e}")
+                st.error(f"❌ An error occurred while processing your question: {str(e)}")
+    def render_chat_history(self):
+        """Render chat history section."""
+        chat_history = self.session_manager.get_chat_history()
+        if not chat_history:
+            return
+        st.markdown("### 📜 Chat History")
+        # Limit displayed history
+        ui_config = settings.get_ui_config()
+        max_display = ui_config.get('max_chat_history_display', 50)
+        recent_history = chat_history[-max_display:] if len(chat_history) > max_display else chat_history
+        for entry in reversed(recent_history):
+            with st.expander(f"Q: {entry['question'][:50]}..." if len(entry['question']) > 50 else f"Q: {entry['question']}"):
+                st.markdown(f"**Question:** {entry['question']}")
+                st.markdown(f"**Answer:** {entry['answer']}")
+                st.markdown(f"**Time:** {datetime.fromisoformat(entry['timestamp']).strftime('%Y-%m-%d %H:%M:%S')}")
+    def export_chat_history(self, format: str):
+        """
+        Export chat history in specified format.
+        Args:
+            format (str): Export format (pdf, txt, json)
+        """
+        try:
+            chat_history = self.session_manager.get_chat_history()
+            video_metadata = st.session_state.get('video_metadata', {})
+            if format == 'pdf':
+                pdf_data = self.export_utils.export_to_pdf(chat_history, video_metadata)
+                if pdf_data:
+                    st.download_button(
+                        label="📥 Download PDF",
+                        data=pdf_data,
+                        file_name=f"chat_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
+                        mime="application/pdf"
+                    )
+            elif format == 'txt':
+                text_data = self.export_utils.export_to_text(chat_history, video_metadata)
+                if text_data:
+                    st.download_button(
+                        label="📥 Download Text",
+                        data=text_data,
+                        file_name=f"chat_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
+                        mime="text/plain"
+                    )
+            elif format == 'json':
+                json_data = self.export_utils.export_to_json(chat_history, video_metadata)
+                if json_data:
+                    st.download_button(
+                        label="📥 Download JSON",
+                        data=json_data,
+                        file_name=f"chat_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
+                        mime="application/json"
+                    )
+        except Exception as e:
+            logger.error(f"Error exporting chat history: {e}")
+            st.error(f"❌ Error exporting chat history: {str(e)}")
+    def download_transcript(self, transcript_text: str, metadata: dict):
+        """
+        Provide transcript download functionality.
+        Args:
+            transcript_text (str): Transcript text
+            metadata (dict): Video metadata
+        """
+        try:
+            transcript_export = self.export_utils.export_transcript(transcript_text, metadata, 'txt')
+            st.download_button(
+                label="📥 Download Transcript",
+                data=transcript_export,
+                file_name=f"transcript_{metadata.get('video_id', 'unknown')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
+                mime="text/plain"
+            )
+        except Exception as e:
+            logger.error(f"Error preparing transcript download: {e}")
+            st.error(f"❌ Error preparing transcript download: {str(e)}")
+    def run(self):
+        """Run the main application."""
+        try:
+            self.render_header()
+            self.render_sidebar()
+            # Main content area
+            self.render_video_input_section()
+            st.markdown("---")
+            self.render_qa_section()
+            st.markdown("---")
+            self.render_chat_history()
+        except Exception as e:
+            logger.error(f"Application error: {e}")
+            st.error(f"❌ An application error occurred: {str(e)}")
+def main():
+    """Main function to run the application."""
+    app = YouTubeChatbotApp()
+    app.run()
+if __name__ == "__main__":
+    main()