Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -135,6 +135,20 @@ if "llm" not in st.session_state:
|
|
| 135 |
max_tokens=2048
|
| 136 |
)
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
# Sidebar for URL and YouTube input
|
| 139 |
with st.sidebar:
|
| 140 |
st.header("Enter Web URL")
|
|
@@ -220,9 +234,11 @@ def fetch_youtube_captions_api(video_id, api_key):
|
|
| 220 |
|
| 221 |
# Note: Downloading captions requires OAuth 2.0 authentication
|
| 222 |
st.warning(
|
| 223 |
-
"
|
| 224 |
-
"
|
| 225 |
-
"
|
|
|
|
|
|
|
| 226 |
)
|
| 227 |
return None
|
| 228 |
|
|
@@ -268,7 +284,7 @@ def extract_subtitles_with_ytdlp(video_url):
|
|
| 268 |
return None
|
| 269 |
|
| 270 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 271 |
-
ydl.params['logger'] =
|
| 272 |
info = ydl.extract_info(video_url, download=False)
|
| 273 |
available_subs = info.get('subtitles', {})
|
| 274 |
auto_subs = info.get('automatic_captions', {})
|
|
@@ -319,12 +335,18 @@ def process_content(text, embeddings, source):
|
|
| 319 |
docs = text_splitter.create_documents([text], metadatas=[{"source": source}])
|
| 320 |
if docs:
|
| 321 |
st.text(f"Document metadata: {docs[0].metadata}")
|
|
|
|
|
|
|
|
|
|
| 322 |
vectorstore = FAISS.from_documents(docs, embeddings)
|
| 323 |
st.text(f"Vector store created with {len(docs)} documents.")
|
| 324 |
return vectorstore
|
| 325 |
|
| 326 |
# Function to create QA chain
|
| 327 |
def create_qa_chain(vectorstore, llm):
|
|
|
|
|
|
|
|
|
|
| 328 |
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})
|
| 329 |
qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
|
| 330 |
llm=llm,
|
|
@@ -447,7 +469,11 @@ if ask_clicked and query:
|
|
| 447 |
try:
|
| 448 |
if "qa_chain" not in st.session_state or st.session_state.qa_chain is None:
|
| 449 |
st.session_state.qa_chain = create_qa_chain(st.session_state.vectorstore, st.session_state.llm)
|
|
|
|
|
|
|
|
|
|
| 450 |
|
|
|
|
| 451 |
result = st.session_state.qa_chain({"question": query}, return_only_outputs=True)
|
| 452 |
|
| 453 |
if not result.get("answer"):
|
|
|
|
| 135 |
max_tokens=2048
|
| 136 |
)
|
| 137 |
|
| 138 |
+
# Custom logger for yt-dlp to redirect logs to Streamlit
|
| 139 |
+
class StreamlitLogger:
|
| 140 |
+
def debug(self, msg):
|
| 141 |
+
st.text(f"[yt-dlp DEBUG] {msg}")
|
| 142 |
+
|
| 143 |
+
def info(self, msg):
|
| 144 |
+
st.info(f"[yt-dlp INFO] {msg}")
|
| 145 |
+
|
| 146 |
+
def warning(self, msg):
|
| 147 |
+
st.warning(f"[yt-dlp WARNING] {msg}")
|
| 148 |
+
|
| 149 |
+
def error(self, msg):
|
| 150 |
+
st.error(f"[yt-dlp ERROR] {msg}")
|
| 151 |
+
|
| 152 |
# Sidebar for URL and YouTube input
|
| 153 |
with st.sidebar:
|
| 154 |
st.header("Enter Web URL")
|
|
|
|
| 234 |
|
| 235 |
# Note: Downloading captions requires OAuth 2.0 authentication
|
| 236 |
st.warning(
|
| 237 |
+
"English captions are available for this video but cannot be fetched with an API key alone. "
|
| 238 |
+
"Downloading captions requires OAuth 2.0 authentication, which is not supported in Hugging Face Spaces without user interaction. "
|
| 239 |
+
"To fetch captions:\n"
|
| 240 |
+
"- Test locally with OAuth 2.0 setup (see instructions in the documentation).\n"
|
| 241 |
+
"- Or try a video with transcripts available (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)."
|
| 242 |
)
|
| 243 |
return None
|
| 244 |
|
|
|
|
| 284 |
return None
|
| 285 |
|
| 286 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 287 |
+
ydl.params['logger'] = StreamlitLogger()
|
| 288 |
info = ydl.extract_info(video_url, download=False)
|
| 289 |
available_subs = info.get('subtitles', {})
|
| 290 |
auto_subs = info.get('automatic_captions', {})
|
|
|
|
| 335 |
docs = text_splitter.create_documents([text], metadatas=[{"source": source}])
|
| 336 |
if docs:
|
| 337 |
st.text(f"Document metadata: {docs[0].metadata}")
|
| 338 |
+
else:
|
| 339 |
+
st.error("No documents created from the content.")
|
| 340 |
+
return None
|
| 341 |
vectorstore = FAISS.from_documents(docs, embeddings)
|
| 342 |
st.text(f"Vector store created with {len(docs)} documents.")
|
| 343 |
return vectorstore
|
| 344 |
|
| 345 |
# Function to create QA chain
|
| 346 |
def create_qa_chain(vectorstore, llm):
|
| 347 |
+
if vectorstore is None:
|
| 348 |
+
st.error("Vector store is not initialized. Cannot create QA chain.")
|
| 349 |
+
return None
|
| 350 |
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})
|
| 351 |
qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
|
| 352 |
llm=llm,
|
|
|
|
| 469 |
try:
|
| 470 |
if "qa_chain" not in st.session_state or st.session_state.qa_chain is None:
|
| 471 |
st.session_state.qa_chain = create_qa_chain(st.session_state.vectorstore, st.session_state.llm)
|
| 472 |
+
if st.session_state.qa_chain is None:
|
| 473 |
+
st.error("Failed to create QA chain.")
|
| 474 |
+
st.stop()
|
| 475 |
|
| 476 |
+
st.text(f"Querying with question: {query}")
|
| 477 |
result = st.session_state.qa_chain({"question": query}, return_only_outputs=True)
|
| 478 |
|
| 479 |
if not result.get("answer"):
|