Spaces:
Sleeping
Sleeping
Enhance Dockerfile for improved network reliability and add cookies directory; update Streamlit app for better YouTube authentication handling and user guidance
Browse files- Dockerfile +4 -0
- src/streamlit_app.py +83 -22
Dockerfile
CHANGED
|
@@ -19,6 +19,7 @@ RUN apt-get update --allow-releaseinfo-change || (sleep 2 && apt-get update) &&
|
|
| 19 |
# Set pip to have more retries and timeout
|
| 20 |
ENV PIP_DEFAULT_TIMEOUT=100
|
| 21 |
ENV PIP_RETRIES=3
|
|
|
|
| 22 |
|
| 23 |
# Copy requirements and install Python dependencies
|
| 24 |
COPY requirements.txt ./
|
|
@@ -26,6 +27,9 @@ RUN pip install --upgrade pip && \
|
|
| 26 |
pip install --no-cache-dir -r requirements.txt || \
|
| 27 |
(sleep 2 && pip install --no-cache-dir -r requirements.txt)
|
| 28 |
|
|
|
|
|
|
|
|
|
|
| 29 |
# Copy source code
|
| 30 |
COPY src/ ./src/
|
| 31 |
|
|
|
|
| 19 |
# Set pip to have more retries and timeout
|
| 20 |
ENV PIP_DEFAULT_TIMEOUT=100
|
| 21 |
ENV PIP_RETRIES=3
|
| 22 |
+
ENV PYTHONUNBUFFERED=1
|
| 23 |
|
| 24 |
# Copy requirements and install Python dependencies
|
| 25 |
COPY requirements.txt ./
|
|
|
|
| 27 |
pip install --no-cache-dir -r requirements.txt || \
|
| 28 |
(sleep 2 && pip install --no-cache-dir -r requirements.txt)
|
| 29 |
|
| 30 |
+
# Create cookies directory for user uploads
|
| 31 |
+
RUN mkdir -p /app/cookies
|
| 32 |
+
|
| 33 |
# Copy source code
|
| 34 |
COPY src/ ./src/
|
| 35 |
|
src/streamlit_app.py
CHANGED
|
@@ -56,23 +56,40 @@ def download_video(url, video_path="video.mp4", cookies_file=None):
|
|
| 56 |
"""Download a video from a URL"""
|
| 57 |
ydl_opts = {
|
| 58 |
"outtmpl": video_path,
|
|
|
|
|
|
|
|
|
|
| 59 |
}
|
| 60 |
|
| 61 |
-
#
|
|
|
|
| 62 |
if cookies_file and os.path.exists(cookies_file):
|
| 63 |
ydl_opts["cookiefile"] = cookies_file
|
| 64 |
-
else:
|
| 65 |
-
# Fall back to browser cookies
|
| 66 |
-
ydl_opts["cookiesfrombrowser"] = ("chrome",)
|
| 67 |
|
| 68 |
try:
|
|
|
|
|
|
|
|
|
|
| 69 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 70 |
ydl.download([url])
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
except Exception as e:
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
return False
|
| 77 |
|
| 78 |
def extract_audio(video_path="video.mp4", audio_path="audio.wav"):
|
|
@@ -271,6 +288,20 @@ st.set_page_config(
|
|
| 271 |
|
| 272 |
st.title("🎤 English Accent Detection Tool")
|
| 273 |
st.markdown("""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
This app analyzes a speaker's English accent from a video or audio source.
|
| 275 |
It provides:
|
| 276 |
- Classification of the accent (British, American, etc.)
|
|
@@ -282,31 +313,47 @@ It provides:
|
|
| 282 |
tab1, tab2 = st.tabs(["Video URL", "Upload Audio"])
|
| 283 |
|
| 284 |
with tab1:
|
| 285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
|
| 287 |
# Add file uploader for cookies.txt
|
| 288 |
cookies_file = None
|
| 289 |
-
uploaded_cookies = st.file_uploader("
|
|
|
|
|
|
|
| 290 |
|
| 291 |
if uploaded_cookies is not None:
|
| 292 |
# Save the uploaded cookies file to a temporary file
|
| 293 |
cookies_file = f"cookies_{int(time.time())}.txt"
|
| 294 |
with open(cookies_file, "wb") as f:
|
| 295 |
f.write(uploaded_cookies.getbuffer())
|
|
|
|
| 296 |
|
| 297 |
with st.expander("Having trouble with YouTube videos?"):
|
| 298 |
st.markdown("""
|
| 299 |
-
|
|
|
|
|
|
|
| 300 |
|
| 301 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
|
|
|
| 310 |
""")
|
| 311 |
|
| 312 |
if st.button("Analyze Video"):
|
|
@@ -374,13 +421,27 @@ with tab1:
|
|
| 374 |
st.error(f"Error during analysis: {str(e)}")
|
| 375 |
|
| 376 |
with tab2:
|
| 377 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
|
| 379 |
if uploaded_file is not None:
|
|
|
|
|
|
|
| 380 |
st.audio(uploaded_file)
|
| 381 |
|
| 382 |
-
|
| 383 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
try:
|
| 385 |
results = process_uploaded_audio(uploaded_file)
|
| 386 |
|
|
|
|
| 56 |
"""Download a video from a URL"""
|
| 57 |
ydl_opts = {
|
| 58 |
"outtmpl": video_path,
|
| 59 |
+
"quiet": False,
|
| 60 |
+
"no_warnings": False,
|
| 61 |
+
"verbose": True # More detailed output for debugging
|
| 62 |
}
|
| 63 |
|
| 64 |
+
# Only use cookies if explicitly provided via file upload
|
| 65 |
+
# Don't try to access browser cookies in Docker container
|
| 66 |
if cookies_file and os.path.exists(cookies_file):
|
| 67 |
ydl_opts["cookiefile"] = cookies_file
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
try:
|
| 70 |
+
# Special handling for YouTube URLs to try without cookies first
|
| 71 |
+
is_youtube = "youtube" in url.lower() or "youtu.be" in url.lower()
|
| 72 |
+
|
| 73 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 74 |
ydl.download([url])
|
| 75 |
+
|
| 76 |
+
if os.path.exists(video_path):
|
| 77 |
+
return True
|
| 78 |
+
else:
|
| 79 |
+
st.error(f"Video downloaded but file not found: {video_path}")
|
| 80 |
+
return False
|
| 81 |
except Exception as e:
|
| 82 |
+
error_msg = str(e)
|
| 83 |
+
st.error(f"Download error: {error_msg}")
|
| 84 |
+
|
| 85 |
+
# Provide specific guidance based on error type
|
| 86 |
+
if is_youtube and ("bot" in error_msg.lower() or "sign in" in error_msg.lower()):
|
| 87 |
+
st.warning("YouTube requires authentication. Please upload a cookies.txt file or try a direct video link.")
|
| 88 |
+
elif "not find" in error_msg.lower() and "cookies" in error_msg.lower():
|
| 89 |
+
st.warning("Browser cookies could not be accessed. Please upload a cookies.txt file.")
|
| 90 |
+
elif "network" in error_msg.lower() or "timeout" in error_msg.lower():
|
| 91 |
+
st.warning("Network error. Please check your internet connection and try again.")
|
| 92 |
+
|
| 93 |
return False
|
| 94 |
|
| 95 |
def extract_audio(video_path="video.mp4", audio_path="audio.wav"):
|
|
|
|
| 288 |
|
| 289 |
st.title("🎤 English Accent Detection Tool")
|
| 290 |
st.markdown("""
|
| 291 |
+
This application analyzes a speaker's English accent from video URLs or audio uploads,
|
| 292 |
+
providing detailed insights for hiring evaluation purposes.
|
| 293 |
+
""")
|
| 294 |
+
|
| 295 |
+
# Add container for tips
|
| 296 |
+
with st.container():
|
| 297 |
+
st.info("""
|
| 298 |
+
💡 **Tips for best results:**
|
| 299 |
+
- Use **Loom** or **Vimeo** videos (more reliable than YouTube)
|
| 300 |
+
- For YouTube videos, you may need to provide cookies
|
| 301 |
+
- Audio clips of 15-30 seconds work best
|
| 302 |
+
- Clear speech with minimal background noise is ideal
|
| 303 |
+
""")
|
| 304 |
+
st.markdown("""
|
| 305 |
This app analyzes a speaker's English accent from a video or audio source.
|
| 306 |
It provides:
|
| 307 |
- Classification of the accent (British, American, etc.)
|
|
|
|
| 313 |
tab1, tab2 = st.tabs(["Video URL", "Upload Audio"])
|
| 314 |
|
| 315 |
with tab1:
|
| 316 |
+
st.markdown("### 🎬 Analyze video from URL")
|
| 317 |
+
url = st.text_input("Enter a public video URL",
|
| 318 |
+
placeholder="https://www.loom.com/..., https://vimeo.com/..., or direct MP4 link")
|
| 319 |
+
|
| 320 |
+
# Recommend alternative sources
|
| 321 |
+
st.caption("⚠️ **Note**: YouTube videos often require authentication. For best results, use Loom, Vimeo or direct video links.")
|
| 322 |
|
| 323 |
# Add file uploader for cookies.txt
|
| 324 |
cookies_file = None
|
| 325 |
+
uploaded_cookies = st.file_uploader("Upload cookies.txt file for YouTube (if needed)",
|
| 326 |
+
type="txt",
|
| 327 |
+
help="Only needed for YouTube videos that require authentication")
|
| 328 |
|
| 329 |
if uploaded_cookies is not None:
|
| 330 |
# Save the uploaded cookies file to a temporary file
|
| 331 |
cookies_file = f"cookies_{int(time.time())}.txt"
|
| 332 |
with open(cookies_file, "wb") as f:
|
| 333 |
f.write(uploaded_cookies.getbuffer())
|
| 334 |
+
st.success("Cookies file uploaded successfully!")
|
| 335 |
|
| 336 |
with st.expander("Having trouble with YouTube videos?"):
|
| 337 |
st.markdown("""
|
| 338 |
+
### YouTube Authentication Issues
|
| 339 |
+
|
| 340 |
+
YouTube's anti-bot measures often block automated video downloads. To solve this:
|
| 341 |
|
| 342 |
+
#### Option 1: Use Alternative Video Sources (Recommended)
|
| 343 |
+
These typically work without authentication issues:
|
| 344 |
+
- [Loom](https://www.loom.com/) - Great for screen recordings
|
| 345 |
+
- [Vimeo](https://vimeo.com/) - High-quality video hosting
|
| 346 |
+
- [Streamable](https://streamable.com/) - Simple video sharing
|
| 347 |
+
- Any direct MP4 link
|
| 348 |
|
| 349 |
+
#### Option 2: Upload Cookies for YouTube
|
| 350 |
+
1. Install a browser extension like [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc)
|
| 351 |
+
2. Login to YouTube in your browser
|
| 352 |
+
3. Use the extension to export cookies to a .txt file
|
| 353 |
+
4. Upload the cookies.txt file using the uploader above
|
| 354 |
+
|
| 355 |
+
#### Option 3: Use Audio Upload Instead
|
| 356 |
+
The 'Upload Audio' tab allows direct analysis of audio files without URL issues.
|
| 357 |
""")
|
| 358 |
|
| 359 |
if st.button("Analyze Video"):
|
|
|
|
| 421 |
st.error(f"Error during analysis: {str(e)}")
|
| 422 |
|
| 423 |
with tab2:
|
| 424 |
+
st.markdown("### 🎵 Upload Audio File")
|
| 425 |
+
st.caption("**Recommended option!** Direct audio upload is more reliable than video URLs.")
|
| 426 |
+
|
| 427 |
+
uploaded_file = st.file_uploader("Upload an audio file",
|
| 428 |
+
type=["wav", "mp3", "m4a", "ogg", "flac"],
|
| 429 |
+
help="Support for WAV, MP3, M4A, OGG and FLAC formats")
|
| 430 |
|
| 431 |
if uploaded_file is not None:
|
| 432 |
+
# Show a preview of the audio
|
| 433 |
+
st.markdown("#### Audio Preview:")
|
| 434 |
st.audio(uploaded_file)
|
| 435 |
|
| 436 |
+
st.markdown("#### Ready for Analysis")
|
| 437 |
+
col1, col2 = st.columns([1, 3])
|
| 438 |
+
with col1:
|
| 439 |
+
analyze_button = st.button("Analyze Audio", type="primary", use_container_width=True)
|
| 440 |
+
with col2:
|
| 441 |
+
st.caption("Tip: 15-30 seconds of clear speech works best for accent detection")
|
| 442 |
+
|
| 443 |
+
if analyze_button:
|
| 444 |
+
with st.spinner("Analyzing audio... (this may take 15-30 seconds)"):
|
| 445 |
try:
|
| 446 |
results = process_uploaded_audio(uploaded_file)
|
| 447 |
|