Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,12 +13,16 @@ from bs4 import SoupStrainer
|
|
| 13 |
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
|
| 14 |
import yt_dlp
|
| 15 |
import re
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Load environment variables (optional)
|
| 18 |
load_dotenv()
|
| 19 |
|
| 20 |
# Hardcoded Groq API key
|
| 21 |
GROQ_API_KEY = "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"
|
|
|
|
|
|
|
| 22 |
|
| 23 |
# Custom CSS
|
| 24 |
st.markdown("""
|
|
@@ -123,7 +127,7 @@ if "llm" not in st.session_state:
|
|
| 123 |
st.session_state.llm = ChatGroq(
|
| 124 |
api_key=GROQ_API_KEY,
|
| 125 |
model="llama3-70b-8192",
|
| 126 |
-
max_tokens=512 #
|
| 127 |
)
|
| 128 |
|
| 129 |
# Sidebar for URL and YouTube input
|
|
@@ -203,6 +207,45 @@ def fetch_youtube_transcript(video_id):
|
|
| 203 |
st.error(f"Error fetching transcript with youtube-transcript-api: {str(e)}")
|
| 204 |
return None
|
| 205 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
# Function to extract subtitles using yt-dlp with cookies
|
| 207 |
def extract_subtitles_with_ytdlp(video_url):
|
| 208 |
ydl_opts = {
|
|
@@ -360,6 +403,10 @@ if process_youtube_clicked:
|
|
| 360 |
st.text("Fetching Closed Captions...Started...β
β
β
")
|
| 361 |
transcript_text = extract_subtitles_with_ytdlp(youtube_url)
|
| 362 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
if not transcript_text:
|
| 364 |
st.error(
|
| 365 |
"No transcripts or closed captions available. "
|
|
@@ -369,6 +416,7 @@ if process_youtube_clicked:
|
|
| 369 |
"Solutions:\n"
|
| 370 |
"- Ensure captions are enabled for the video by checking the video settings on YouTube (gear icon > Subtitles/CC > Enable if available).\n"
|
| 371 |
"- Regenerate and upload a fresh cookies.txt file (see instructions above).\n"
|
|
|
|
| 372 |
"- Try a different video (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ, which has transcripts available).\n"
|
| 373 |
"- Test locally to rule out Hugging Face Spaces IP restrictions by running: pip install -r requirements.txt && streamlit run app.py"
|
| 374 |
)
|
|
|
|
| 13 |
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
|
| 14 |
import yt_dlp
|
| 15 |
import re
|
| 16 |
+
from googleapiclient.discovery import build
|
| 17 |
+
from googleapiclient.errors import HttpError
|
| 18 |
|
| 19 |
# Load environment variables (optional)
|
| 20 |
load_dotenv()
|
| 21 |
|
| 22 |
# Hardcoded Groq API key
|
| 23 |
GROQ_API_KEY = "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"
|
| 24 |
+
# YouTube API key (to be set in Hugging Face Spaces secrets)
|
| 25 |
+
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
|
| 26 |
|
| 27 |
# Custom CSS
|
| 28 |
st.markdown("""
|
|
|
|
| 127 |
st.session_state.llm = ChatGroq(
|
| 128 |
api_key=GROQ_API_KEY,
|
| 129 |
model="llama3-70b-8192",
|
| 130 |
+
max_tokens=512 # Keep reduced to minimize resource usage
|
| 131 |
)
|
| 132 |
|
| 133 |
# Sidebar for URL and YouTube input
|
|
|
|
| 207 |
st.error(f"Error fetching transcript with youtube-transcript-api: {str(e)}")
|
| 208 |
return None
|
| 209 |
|
| 210 |
+
# Function to fetch captions using YouTube Data API (limited to listing with API key)
|
| 211 |
+
def fetch_youtube_captions_api(video_id, api_key):
|
| 212 |
+
if not api_key:
|
| 213 |
+
st.warning("YOUTUBE_API_KEY not set. Skipping YouTube Data API fallback.")
|
| 214 |
+
return None
|
| 215 |
+
try:
|
| 216 |
+
youtube = build('youtube', 'v3', developerKey=api_key)
|
| 217 |
+
captions = youtube.captions().list(
|
| 218 |
+
part='snippet',
|
| 219 |
+
videoId=video_id
|
| 220 |
+
).execute()
|
| 221 |
+
|
| 222 |
+
caption_id = None
|
| 223 |
+
for item in captions.get('items', []):
|
| 224 |
+
if item['snippet']['language'] == 'en':
|
| 225 |
+
caption_id = item['id']
|
| 226 |
+
break
|
| 227 |
+
elif item['snippet']['language'] in ['en-US', 'en-GB']:
|
| 228 |
+
caption_id = item['id']
|
| 229 |
+
break
|
| 230 |
+
|
| 231 |
+
if not caption_id:
|
| 232 |
+
st.warning("No English captions found via YouTube Data API.")
|
| 233 |
+
return None
|
| 234 |
+
|
| 235 |
+
# Note: Downloading captions requires OAuth 2.0 authentication
|
| 236 |
+
st.warning(
|
| 237 |
+
"English captions are available for this video but cannot be fetched with an API key alone. "
|
| 238 |
+
"Downloading captions requires OAuth 2.0 authentication, which is not supported in Hugging Face Spaces without user interaction. "
|
| 239 |
+
"To fetch captions:\n"
|
| 240 |
+
"- Test locally with OAuth 2.0 setup (see https://developers.google.com/youtube/v3/guides/auth/installed-apps for instructions).\n"
|
| 241 |
+
"- Or try a video with transcripts available (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)."
|
| 242 |
+
)
|
| 243 |
+
return None
|
| 244 |
+
|
| 245 |
+
except HttpError as e:
|
| 246 |
+
st.error(f"Error fetching captions with YouTube Data API: {str(e)}")
|
| 247 |
+
return None
|
| 248 |
+
|
| 249 |
# Function to extract subtitles using yt-dlp with cookies
|
| 250 |
def extract_subtitles_with_ytdlp(video_url):
|
| 251 |
ydl_opts = {
|
|
|
|
| 403 |
st.text("Fetching Closed Captions...Started...β
β
β
")
|
| 404 |
transcript_text = extract_subtitles_with_ytdlp(youtube_url)
|
| 405 |
|
| 406 |
+
if not transcript_text and YOUTUBE_API_KEY:
|
| 407 |
+
st.text("Fetching Captions via YouTube Data API...Started...β
β
β
")
|
| 408 |
+
transcript_text = fetch_youtube_captions_api(video_id, YOUTUBE_API_KEY)
|
| 409 |
+
|
| 410 |
if not transcript_text:
|
| 411 |
st.error(
|
| 412 |
"No transcripts or closed captions available. "
|
|
|
|
| 416 |
"Solutions:\n"
|
| 417 |
"- Ensure captions are enabled for the video by checking the video settings on YouTube (gear icon > Subtitles/CC > Enable if available).\n"
|
| 418 |
"- Regenerate and upload a fresh cookies.txt file (see instructions above).\n"
|
| 419 |
+
"- Ensure YOUTUBE_API_KEY is set in Spaces secrets (Settings > Secrets > Add YOUTUBE_API_KEY).\n"
|
| 420 |
"- Try a different video (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ, which has transcripts available).\n"
|
| 421 |
"- Test locally to rule out Hugging Face Spaces IP restrictions by running: pip install -r requirements.txt && streamlit run app.py"
|
| 422 |
)
|