File size: 10,578 Bytes
ebe7149 790b0cd ebe7149 790b0cd 0225e5c 790b0cd ebe7149 790b0cd 0225e5c ebe7149 750dd7d ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c 3d28edf 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 0225e5c ebe7149 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 |
# -*- coding: utf-8 -*-
"""
ChatYT Streamlit App (LCEL Chain Version)
This Streamlit app enables you to:
* Summarise YouTube videos
* Ask questions about the topics discussed in the video
It uses LangChain Expression Language (LCEL) with Google's Gemini APIs.
"""
import streamlit as st
import yt_dlp
import os
# Corrected import: Document is now in langchain_core.documents
from langchain_core.documents import Document
# Corrected import: RecursiveCharacterTextSplitter is in its own package
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
# Corrected import: ChatPromptTemplate is now in langchain_core.prompts
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import google.generativeai as genai
import time
# --- App Configuration ---
st.set_page_config(
page_title="ChatYT (LangChain)",
page_icon="๐บ",
layout="wide",
)
st.title("๐บ ChatYT: Chat with any YouTube Video")
st.caption("Summarize and ask questions about any YouTube video using LangChain and Google Gemini.")
# --- API Key Handling ---
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
if not GEMINI_API_KEY:
GEMINI_API_KEY = st.sidebar.text_input(
"Enter your Gemini API Key:", type="password"
)
if not GEMINI_API_KEY:
st.error("Please provide your Gemini API Key in the sidebar to continue.")
st.stop()
# Configure the genai library (still needed for file upload)
try:
genai.configure(api_key=GEMINI_API_KEY)
except Exception as e:
st.error(f"Failed to configure Gemini API: {e}")
st.stop()
# --- Core Functions (Caching to improve performance) ---
@st.cache_data(show_spinner="Downloading audio...")
def download_audio(link, file_name='audio.mp3'):
"""
Downloads the audio from a YouTube link.
"""
with yt_dlp.YoutubeDL({'extract_audio': True,
'format': 'worstaudio',
'overwrites': True,
'outtmpl': file_name}) as video:
info_dict = video.extract_info(link, download=True)
video_title = info_dict['title']
return file_name, video_title
@st.cache_data(show_spinner="Compressing audio...")
def compress_audio(input_file, output_file="compressed.mp3"):
"""
Compresses the audio file for faster API uploads.
"""
os.system(f"ffmpeg -y -i {input_file} -ar 16000 -ac 1 {output_file}")
return output_file
@st.cache_data(show_spinner="Transcribing video...")
def speech_to_text(audio_file):
"""
Transcribes audio using the Gemini API.
(This function uses the base genai library for file upload)
"""
try:
model = genai.GenerativeModel("gemini-2.5-flash")
audio_file_uploaded = genai.upload_file(path=audio_file, mime_type="audio/mpeg")
prompt = "Please transcribe this audio file. Provide only the text transcription."
response = model.generate_content([prompt, audio_file_uploaded])
genai.delete_file(audio_file_uploaded.name)
if response.candidates and response.candidates[0].content.parts:
return response.candidates[0].content.parts[0].text
else:
return f"Error: Could not transcribe audio. Response: {response}"
except Exception as e:
st.error(f"An error occurred during transcription: {e}")
try:
if 'audio_file_uploaded' in locals():
genai.delete_file(audio_file_uploaded.name)
except Exception as e_del:
st.warning(f"Error cleaning up file: {e_del}")
return f"Error: {e}"
@st.cache_data(show_spinner="Summarizing text...")
def summarize_text_api(text):
"""
Summarizes the text using a LangChain chain.
"""
# 1. Define the LLM
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash",
temperature=0.3,
google_api_key=GEMINI_API_KEY)
# 2. Define the Prompt
prompt_template = """Please provide a concise, high-level summary of the following text:
---
{text}
---
Provide only the summary."""
summarize_prompt = ChatPromptTemplate.from_template(prompt_template)
# 3. Define the Chain
summarize_chain = summarize_prompt | llm | StrOutputParser()
try:
# 4. Invoke the Chain
response = summarize_chain.invoke({"text": text})
return response
except Exception as e:
st.error(f"An error occurred during summarization: {e}")
return f"Error: {e}"
@st.cache_data(show_spinner="Generating embeddings...")
def generate_embeddings_db(text):
"""
Splits text, generates embeddings via API, and stores in ChromaDB.
Returns the Chroma database object.
"""
doc = Document(page_content=text, metadata={"source": "youtube"})
# This now uses the imported RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents([doc])
try:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",
google_api_key=GEMINI_API_KEY)
db = Chroma.from_documents(chunks, embeddings)
return db
except Exception as e:
st.error(f"An error occurred during embedding generation: {e}")
return None
def format_docs(docs):
"""Helper function to format retrieved documents into a string."""
if not docs:
return "No relevant context found."
return "\n\n---\n\n".join(doc.page_content for doc in docs)
# --- Streamlit UI Components ---
# Initialize session state variables
if "summary" not in st.session_state:
st.session_state.summary = ""
if "rag_chain" not in st.session_state:
st.session_state.rag_chain = None
if "video_title" not in st.session_state:
st.session_state.video_title = ""
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
url = st.text_input("Enter YouTube URL:", key="youtube_url")
if st.button("Process Video", key="process_video"):
if url:
with st.spinner("Processing video... This may take a few minutes."):
try:
# Reset state
st.session_state.summary = ""
st.session_state.rag_chain = None
st.session_state.video_title = ""
st.session_state.chat_history = []
# 1. Download
audio_file, video_title = download_audio(url)
st.session_state.video_title = video_title
# 2. Compress
compressed_audio = compress_audio(audio_file)
# 3. Transcribe
text = speech_to_text(compressed_audio)
if "Error:" in text:
st.error(f"Failed to transcribe: {text}")
st.stop()
# 4. Summarize (using the new chain function)
summary = summarize_text_api(text)
st.session_state.summary = summary
# 5. Embed and create DB
db = generate_embeddings_db(text)
if db:
# 6. Create RAG Chain and store it in session state
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash",
temperature=0.3,
google_api_key=GEMINI_API_KEY)
retriever = db.as_retriever(search_kwargs={"k": 3})
PROMPT_TEMPLATE = """Answer the following questions based only on the following context:
{context}
---
Answer the question based on the above context:
{question}
"""
prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
# This is the RAG chain
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
st.session_state.rag_chain = rag_chain
st.success("Video processed and Q&A chat is ready!")
else:
st.error("Failed to create vector database.")
# Clean up local files
try:
os.remove(audio_file)
os.remove(compressed_audio)
except OSError as e:
st.warning(f"Could not clean up audio files: {e}")
except Exception as e:
st.error(f"An error occurred during video processing: {e}")
else:
st.warning("Please enter a YouTube URL.")
# --- Display Summary and Chat Interface ---
if st.session_state.summary:
st.subheader(f"Summary for: *{st.session_state.video_title}*")
st.markdown(st.session_state.summary)
st.subheader("Ask Questions")
# Display chat history
for author, message in st.session_state.chat_history:
with st.chat_message(author):
st.markdown(message)
# Chat input
if prompt := st.chat_input("Ask a question about the video..."):
if st.session_state.rag_chain:
# Add user message to history
st.session_state.chat_history.append(("user", prompt))
with st.chat_message("user"):
st.markdown(prompt)
# Generate and display bot response by invoking the chain
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
# Here we just invoke the chain with the prompt!
answer = st.session_state.rag_chain.invoke(prompt)
st.markdown(answer)
# Add bot message to history
st.session_state.chat_history.append(("assistant", answer))
else:
st.error("The Q&A chain is not ready. Please process a video first.")
|