File size: 10,578 Bytes
ebe7149
790b0cd
ebe7149
790b0cd
0225e5c
 
 
790b0cd
ebe7149
790b0cd
 
0225e5c
 
 
ebe7149
750dd7d
ebe7149
 
0225e5c
ebe7149
 
 
 
 
0225e5c
ebe7149
0225e5c
 
 
ebe7149
0225e5c
 
 
 
 
ebe7149
0225e5c
 
3d28edf
 
0225e5c
 
 
 
 
 
 
 
 
 
ebe7149
0225e5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebe7149
0225e5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebe7149
0225e5c
ebe7149
 
 
 
 
 
 
0225e5c
 
 
 
ebe7149
 
 
 
0225e5c
 
ebe7149
 
 
0225e5c
 
 
 
 
ebe7149
0225e5c
 
ebe7149
0225e5c
 
ebe7149
0225e5c
 
 
 
ebe7149
 
0225e5c
 
 
 
 
 
ebe7149
 
 
 
 
0225e5c
 
 
 
 
 
ebe7149
 
0225e5c
 
 
 
 
 
 
 
 
 
 
ebe7149
 
 
 
 
 
0225e5c
 
 
 
 
 
 
 
 
 
 
 
 
ebe7149
0225e5c
ebe7149
0225e5c
ebe7149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0225e5c
ebe7149
0225e5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebe7149
0225e5c
 
 
 
 
ebe7149
0225e5c
 
ebe7149
 
0225e5c
 
 
 
 
ebe7149
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# -*- coding: utf-8 -*-
"""
ChatYT Streamlit App (LCEL Chain Version)

This Streamlit app enables you to:
* Summarise YouTube videos
* Ask questions about the topics discussed in the video

It uses LangChain Expression Language (LCEL) with Google's Gemini APIs.
"""

import streamlit as st
import yt_dlp
import os
# Corrected import: Document is now in langchain_core.documents
from langchain_core.documents import Document
# Corrected import: RecursiveCharacterTextSplitter is in its own package
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
# Corrected import: ChatPromptTemplate is now in langchain_core.prompts
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import google.generativeai as genai
import time

# --- App Configuration ---
st.set_page_config(
    page_title="ChatYT (LangChain)",
    page_icon="๐Ÿ“บ",
    layout="wide",
)

st.title("๐Ÿ“บ ChatYT: Chat with any YouTube Video")
st.caption("Summarize and ask questions about any YouTube video using LangChain and Google Gemini.")

# --- API Key Handling ---
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")


if not GEMINI_API_KEY:
    GEMINI_API_KEY = st.sidebar.text_input(
        "Enter your Gemini API Key:", type="password"
    )

if not GEMINI_API_KEY:
    st.error("Please provide your Gemini API Key in the sidebar to continue.")
    st.stop()

# Configure the genai library (still needed for file upload)
try:
    genai.configure(api_key=GEMINI_API_KEY)
except Exception as e:
    st.error(f"Failed to configure Gemini API: {e}")
    st.stop()


# --- Core Functions (Caching to improve performance) ---

@st.cache_data(show_spinner="Downloading audio...")
def download_audio(link, file_name='audio.mp3'):
    """
    Downloads the audio from a YouTube link.
    """
    with yt_dlp.YoutubeDL({'extract_audio': True,
                           'format': 'worstaudio',
                           'overwrites': True,
                           'outtmpl': file_name}) as video:
        info_dict = video.extract_info(link, download=True)
        video_title = info_dict['title']
    return file_name, video_title

@st.cache_data(show_spinner="Compressing audio...")
def compress_audio(input_file, output_file="compressed.mp3"):
    """
    Compresses the audio file for faster API uploads.
    """
    os.system(f"ffmpeg -y -i {input_file} -ar 16000 -ac 1 {output_file}")
    return output_file

@st.cache_data(show_spinner="Transcribing video...")
def speech_to_text(audio_file):
    """
    Transcribes audio using the Gemini API.
    (This function uses the base genai library for file upload)
    """
    try:
        model = genai.GenerativeModel("gemini-2.5-flash")
        audio_file_uploaded = genai.upload_file(path=audio_file, mime_type="audio/mpeg")
        
        prompt = "Please transcribe this audio file. Provide only the text transcription."
        response = model.generate_content([prompt, audio_file_uploaded])
        
        genai.delete_file(audio_file_uploaded.name)
        
        if response.candidates and response.candidates[0].content.parts:
            return response.candidates[0].content.parts[0].text
        else:
            return f"Error: Could not transcribe audio. Response: {response}"
    except Exception as e:
        st.error(f"An error occurred during transcription: {e}")
        try:
            if 'audio_file_uploaded' in locals():
                genai.delete_file(audio_file_uploaded.name)
        except Exception as e_del:
            st.warning(f"Error cleaning up file: {e_del}")
        return f"Error: {e}"

@st.cache_data(show_spinner="Summarizing text...")
def summarize_text_api(text):
    """
    Summarizes the text using a LangChain chain.
    """
    # 1. Define the LLM
    llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash",
                               temperature=0.3,
                               google_api_key=GEMINI_API_KEY)
    
    # 2. Define the Prompt
    prompt_template = """Please provide a concise, high-level summary of the following text:
    ---
    {text}
    ---
    Provide only the summary."""
    summarize_prompt = ChatPromptTemplate.from_template(prompt_template)
    
    # 3. Define the Chain
    summarize_chain = summarize_prompt | llm | StrOutputParser()
    
    try:
        # 4. Invoke the Chain
        response = summarize_chain.invoke({"text": text})
        return response
    except Exception as e:
        st.error(f"An error occurred during summarization: {e}")
        return f"Error: {e}"

@st.cache_data(show_spinner="Generating embeddings...")
def generate_embeddings_db(text):
    """
    Splits text, generates embeddings via API, and stores in ChromaDB.
    Returns the Chroma database object.
    """
    doc = Document(page_content=text, metadata={"source": "youtube"})
    # This now uses the imported RecursiveCharacterTextSplitter
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = splitter.split_documents([doc])
    
    try:
        embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",
                                                  google_api_key=GEMINI_API_KEY)
        db = Chroma.from_documents(chunks, embeddings)
        return db
    except Exception as e:
        st.error(f"An error occurred during embedding generation: {e}")
        return None

def format_docs(docs):
    """Helper function to format retrieved documents into a string."""
    if not docs:
        return "No relevant context found."
    return "\n\n---\n\n".join(doc.page_content for doc in docs)

# --- Streamlit UI Components ---

# Initialize session state variables
if "summary" not in st.session_state:
    st.session_state.summary = ""
if "rag_chain" not in st.session_state:
    st.session_state.rag_chain = None
if "video_title" not in st.session_state:
    st.session_state.video_title = ""
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

url = st.text_input("Enter YouTube URL:", key="youtube_url")

if st.button("Process Video", key="process_video"):
    if url:
        with st.spinner("Processing video... This may take a few minutes."):
            try:
                # Reset state
                st.session_state.summary = ""
                st.session_state.rag_chain = None
                st.session_state.video_title = ""
                st.session_state.chat_history = []

                # 1. Download
                audio_file, video_title = download_audio(url)
                st.session_state.video_title = video_title
                
                # 2. Compress
                compressed_audio = compress_audio(audio_file)
                
                # 3. Transcribe
                text = speech_to_text(compressed_audio)
                if "Error:" in text:
                    st.error(f"Failed to transcribe: {text}")
                    st.stop()
                
                # 4. Summarize (using the new chain function)
                summary = summarize_text_api(text)
                st.session_state.summary = summary
                
                # 5. Embed and create DB
                db = generate_embeddings_db(text)
                
                if db:
                    # 6. Create RAG Chain and store it in session state
                    llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash",
                                               temperature=0.3,
                                               google_api_key=GEMINI_API_KEY)
                    
                    retriever = db.as_retriever(search_kwargs={"k": 3})
                    
                    PROMPT_TEMPLATE = """Answer the following questions based only on the following context:
                    {context}
                    ---
                    Answer the question based on the above context:
                    {question}
                    """
                    prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
                    
                    # This is the RAG chain
                    rag_chain = (
                        {"context": retriever | format_docs, "question": RunnablePassthrough()}
                        | prompt
                        | llm
                        | StrOutputParser()
                    )
                    
                    st.session_state.rag_chain = rag_chain
                    st.success("Video processed and Q&A chat is ready!")
                else:
                    st.error("Failed to create vector database.")
                
                # Clean up local files
                try:
                    os.remove(audio_file)
                    os.remove(compressed_audio)
                except OSError as e:
                    st.warning(f"Could not clean up audio files: {e}")
                    
            except Exception as e:
                st.error(f"An error occurred during video processing: {e}")
    else:
        st.warning("Please enter a YouTube URL.")

# --- Display Summary and Chat Interface ---

if st.session_state.summary:
    st.subheader(f"Summary for: *{st.session_state.video_title}*")
    st.markdown(st.session_state.summary)
    
    st.subheader("Ask Questions")
    
    # Display chat history
    for author, message in st.session_state.chat_history:
        with st.chat_message(author):
            st.markdown(message)
            
    # Chat input
    if prompt := st.chat_input("Ask a question about the video..."):
        if st.session_state.rag_chain:
            # Add user message to history
            st.session_state.chat_history.append(("user", prompt))
            with st.chat_message("user"):
                st.markdown(prompt)
                
            # Generate and display bot response by invoking the chain
            with st.chat_message("assistant"):
                with st.spinner("Thinking..."):
                    # Here we just invoke the chain with the prompt!
                    answer = st.session_state.rag_chain.invoke(prompt)
                    st.markdown(answer)
                    
            # Add bot message to history
            st.session_state.chat_history.append(("assistant", answer))
        else:
            st.error("The Q&A chain is not ready. Please process a video first.")