Spaces:
Runtime error
Runtime error
Claude commited on
feat: Add ZeroGPU, SOTA model fallbacks, and UX improvements
Browse files- Add ZeroGPU support (@spaces.GPU decorator) for faster processing
- Add model fallback chain: Qwen2.5-72B -> Llama-3.1-70B
- Auto-validate YouTube URLs with helpful error messages
- Auto-enable frame analysis (removed manual checkbox)
- Better error handling with specific messages
- Comprehensive YAML frontmatter with tags, models, scopes
- Add spaces package to dependencies
- Update tests for new URL validation (36 tests pass)
- README.md +42 -18
- app.py +144 -66
- pyproject.toml +1 -0
- requirements.txt +1 -0
- tests/test_app.py +55 -7
- uv.lock +30 -24
README.md
CHANGED
|
@@ -5,37 +5,61 @@ colorFrom: blue
|
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: "6.2.0"
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
-
|
|
|
|
|
|
|
| 11 |
hf_oauth: true
|
|
|
|
|
|
|
|
|
|
| 12 |
hf_oauth_expiration_minutes: 480
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
---
|
| 14 |
|
| 15 |
# Video Analyzer
|
| 16 |
|
| 17 |
-
|
| 18 |
|
| 19 |
## Features
|
| 20 |
|
| 21 |
-
- **YouTube Video Download**:
|
| 22 |
-
- **Speech-to-Text**:
|
| 23 |
-
- **Visual Analysis**:
|
| 24 |
-
- **Knowledge Base**:
|
| 25 |
-
- **RAG Chatbot**: Ask questions about your
|
| 26 |
|
| 27 |
## How to Use
|
| 28 |
|
| 29 |
-
1. **Sign in** with your HuggingFace account
|
| 30 |
-
2. **Analyze
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
- Click "Analyze Video" to process
|
| 34 |
-
3. **Chat with Videos** tab:
|
| 35 |
-
- Ask questions about videos you've analyzed
|
| 36 |
-
- The AI will search the knowledge base and provide answers
|
| 37 |
|
| 38 |
-
##
|
| 39 |
|
| 40 |
-
-
|
| 41 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: "6.2.0"
|
| 8 |
+
python_version: "3.11"
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
| 11 |
+
license: mit
|
| 12 |
+
suggested_hardware: zero-a10g
|
| 13 |
+
suggested_storage: small
|
| 14 |
hf_oauth: true
|
| 15 |
+
hf_oauth_scopes:
|
| 16 |
+
- read-repos
|
| 17 |
+
- inference-api
|
| 18 |
hf_oauth_expiration_minutes: 480
|
| 19 |
+
tags:
|
| 20 |
+
- video
|
| 21 |
+
- youtube
|
| 22 |
+
- transcription
|
| 23 |
+
- whisper
|
| 24 |
+
- rag
|
| 25 |
+
- chatbot
|
| 26 |
+
models:
|
| 27 |
+
- openai/whisper-base
|
| 28 |
+
- Salesforce/blip-image-captioning-base
|
| 29 |
+
- Qwen/Qwen2.5-72B-Instruct
|
| 30 |
+
short_description: Download, transcribe, and chat with YouTube videos using AI
|
| 31 |
---
|
| 32 |
|
| 33 |
# Video Analyzer
|
| 34 |
|
| 35 |
+
Download, transcribe, and chat with YouTube videos using AI.
|
| 36 |
|
| 37 |
## Features
|
| 38 |
|
| 39 |
+
- **YouTube Video Download**: Supports videos, playlists, and shorts
|
| 40 |
+
- **Speech-to-Text**: Automatic transcription using OpenAI Whisper
|
| 41 |
+
- **Visual Analysis**: Key frame extraction and captioning with BLIP
|
| 42 |
+
- **Knowledge Base**: Vector storage with ChromaDB for semantic search
|
| 43 |
+
- **RAG Chatbot**: Ask questions about your videos using Qwen2.5-72B
|
| 44 |
|
| 45 |
## How to Use
|
| 46 |
|
| 47 |
+
1. **Sign in** with your HuggingFace account
|
| 48 |
+
2. **Paste** a YouTube URL in the Analyze tab
|
| 49 |
+
3. **Wait** for processing (transcription + frame analysis)
|
| 50 |
+
4. **Chat** about the video content in the Chat tab
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
+
## Tech Stack
|
| 53 |
|
| 54 |
+
- **Gradio**: Web UI framework
|
| 55 |
+
- **Whisper**: Speech recognition
|
| 56 |
+
- **BLIP**: Image captioning
|
| 57 |
+
- **ChromaDB**: Vector database
|
| 58 |
+
- **Sentence Transformers**: Text embeddings
|
| 59 |
+
- **HuggingFace Inference API**: SOTA language model
|
| 60 |
+
|
| 61 |
+
## Limitations
|
| 62 |
+
|
| 63 |
+
- Works best with videos under 10 minutes
|
| 64 |
+
- Requires HuggingFace login for authentication
|
| 65 |
+
- Knowledge base is session-based (resets on Space restart)
|
app.py
CHANGED
|
@@ -16,6 +16,13 @@ from PIL import Image
|
|
| 16 |
from sentence_transformers import SentenceTransformer
|
| 17 |
from transformers import BlipForConditionalGeneration, BlipProcessor, pipeline
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
# Initialize ChromaDB client (persistent storage)
|
| 20 |
chroma_client = chromadb.Client()
|
| 21 |
collection = chroma_client.get_or_create_collection(
|
|
@@ -73,8 +80,11 @@ def get_vision_model():
|
|
| 73 |
return processor, model
|
| 74 |
|
| 75 |
|
| 76 |
-
#
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
|
| 80 |
def download_video(url: str, output_dir: str) -> list[dict]:
|
|
@@ -247,25 +257,52 @@ def search_knowledge(query: str, n_results: int = 5) -> list[dict]:
|
|
| 247 |
return matches
|
| 248 |
|
| 249 |
|
| 250 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
url: str,
|
| 252 |
-
analyze_frames: bool,
|
| 253 |
num_frames: int,
|
| 254 |
profile: gr.OAuthProfile | None,
|
| 255 |
progress: gr.Progress = gr.Progress(),
|
| 256 |
) -> str:
|
|
|
|
| 257 |
if profile is None:
|
| 258 |
-
return "Please log in
|
| 259 |
|
| 260 |
-
|
| 261 |
-
|
|
|
|
|
|
|
| 262 |
|
| 263 |
try:
|
| 264 |
progress(0, desc="Loading models...")
|
| 265 |
whisper_model = get_whisper_model()
|
| 266 |
-
|
| 267 |
-
if analyze_frames:
|
| 268 |
-
vision_processor, vision_model = get_vision_model()
|
| 269 |
|
| 270 |
with tempfile.TemporaryDirectory() as tmpdir:
|
| 271 |
progress(0.1, desc="Downloading video...")
|
|
@@ -302,17 +339,16 @@ def process_youtube(
|
|
| 302 |
video_result.append("### Transcript")
|
| 303 |
video_result.append(transcript)
|
| 304 |
|
| 305 |
-
# Analyze frames
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
frames = extract_frames(video_path, num_frames)
|
| 309 |
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
|
| 317 |
# Store in vector DB
|
| 318 |
progress(base_progress + 0.8 * (1/total), desc=f"Storing in knowledge base: {item['title']}")
|
|
@@ -322,10 +358,39 @@ def process_youtube(
|
|
| 322 |
results.append("\n\n".join(video_result))
|
| 323 |
|
| 324 |
progress(1.0, desc="Done!")
|
| 325 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
|
| 327 |
except Exception as e:
|
| 328 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
|
| 330 |
|
| 331 |
def chat_with_videos(
|
|
@@ -361,41 +426,59 @@ def chat_with_videos(
|
|
| 361 |
|
| 362 |
context = "\n\n".join(context_parts)
|
| 363 |
|
| 364 |
-
# Generate response using
|
| 365 |
-
|
| 366 |
-
client = InferenceClient(token=oauth_token.token)
|
| 367 |
|
| 368 |
-
|
| 369 |
You have access to transcripts and visual descriptions from analyzed videos.
|
| 370 |
Answer based only on the provided context. If the context doesn't contain enough information, say so.
|
| 371 |
Be concise but thorough."""
|
| 372 |
|
| 373 |
-
|
| 374 |
|
| 375 |
Video Content:
|
| 376 |
{context}
|
| 377 |
|
| 378 |
Question: {message}"""
|
| 379 |
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
|
| 400 |
|
| 401 |
def get_knowledge_stats() -> str:
|
|
@@ -473,34 +556,29 @@ def create_demo() -> gr.Blocks:
|
|
| 473 |
lines=1,
|
| 474 |
)
|
| 475 |
|
| 476 |
-
gr.Markdown("### Analysis Options")
|
| 477 |
with gr.Row():
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
|
|
|
| 482 |
)
|
| 483 |
num_frames = gr.Slider(
|
| 484 |
-
label="
|
| 485 |
-
minimum=
|
| 486 |
maximum=10,
|
| 487 |
value=5,
|
| 488 |
step=1,
|
|
|
|
| 489 |
)
|
| 490 |
|
| 491 |
-
submit_btn = gr.Button(
|
| 492 |
-
"Analyze Video",
|
| 493 |
-
variant="primary",
|
| 494 |
-
size="lg",
|
| 495 |
-
)
|
| 496 |
-
|
| 497 |
with gr.Column(scale=1):
|
| 498 |
-
gr.Markdown("###
|
| 499 |
gr.Markdown(
|
| 500 |
-
"
|
| 501 |
-
"
|
| 502 |
-
"
|
| 503 |
-
"
|
| 504 |
)
|
| 505 |
gr.Markdown("### Knowledge Base")
|
| 506 |
kb_status_analyze = gr.Markdown()
|
|
@@ -512,13 +590,13 @@ def create_demo() -> gr.Blocks:
|
|
| 512 |
|
| 513 |
gr.Markdown("### Results")
|
| 514 |
output = gr.Markdown(
|
| 515 |
-
value="*
|
| 516 |
)
|
| 517 |
|
| 518 |
# Wire up analyze tab
|
| 519 |
submit_btn.click(
|
| 520 |
fn=process_youtube,
|
| 521 |
-
inputs=[url_input,
|
| 522 |
outputs=[output],
|
| 523 |
).then(
|
| 524 |
fn=get_knowledge_stats,
|
|
|
|
| 16 |
from sentence_transformers import SentenceTransformer
|
| 17 |
from transformers import BlipForConditionalGeneration, BlipProcessor, pipeline
|
| 18 |
|
| 19 |
+
# Try to import spaces for ZeroGPU support
|
| 20 |
+
try:
|
| 21 |
+
import spaces
|
| 22 |
+
ZEROGPU_AVAILABLE = True
|
| 23 |
+
except ImportError:
|
| 24 |
+
ZEROGPU_AVAILABLE = False
|
| 25 |
+
|
| 26 |
# Initialize ChromaDB client (persistent storage)
|
| 27 |
chroma_client = chromadb.Client()
|
| 28 |
collection = chroma_client.get_or_create_collection(
|
|
|
|
| 80 |
return processor, model
|
| 81 |
|
| 82 |
|
| 83 |
+
# Chat models - tested and working with HF Inference API
|
| 84 |
+
CHAT_MODELS = [
|
| 85 |
+
"Qwen/Qwen2.5-72B-Instruct", # Primary - works with token
|
| 86 |
+
"meta-llama/Llama-3.1-70B-Instruct", # Fallback
|
| 87 |
+
]
|
| 88 |
|
| 89 |
|
| 90 |
def download_video(url: str, output_dir: str) -> list[dict]:
|
|
|
|
| 257 |
return matches
|
| 258 |
|
| 259 |
|
| 260 |
+
def is_valid_youtube_url(url: str) -> tuple[bool, str]:
|
| 261 |
+
"""Validate and normalize YouTube URL."""
|
| 262 |
+
url = url.strip()
|
| 263 |
+
if not url:
|
| 264 |
+
return False, "Please enter a YouTube URL."
|
| 265 |
+
|
| 266 |
+
# Common YouTube URL patterns
|
| 267 |
+
valid_patterns = [
|
| 268 |
+
"youtube.com/watch",
|
| 269 |
+
"youtube.com/playlist",
|
| 270 |
+
"youtube.com/shorts",
|
| 271 |
+
"youtu.be/",
|
| 272 |
+
"youtube.com/embed",
|
| 273 |
+
"youtube.com/v/",
|
| 274 |
+
]
|
| 275 |
+
|
| 276 |
+
if not any(pattern in url.lower() for pattern in valid_patterns):
|
| 277 |
+
if "youtube" in url.lower() or "youtu" in url.lower():
|
| 278 |
+
return False, "Invalid YouTube URL format. Please use a full video or playlist URL."
|
| 279 |
+
return False, "Please enter a valid YouTube URL (e.g., https://youtube.com/watch?v=...)"
|
| 280 |
+
|
| 281 |
+
if not url.startswith(("http://", "https://")):
|
| 282 |
+
url = "https://" + url
|
| 283 |
+
|
| 284 |
+
return True, url
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
def _process_youtube_impl(
|
| 288 |
url: str,
|
|
|
|
| 289 |
num_frames: int,
|
| 290 |
profile: gr.OAuthProfile | None,
|
| 291 |
progress: gr.Progress = gr.Progress(),
|
| 292 |
) -> str:
|
| 293 |
+
"""Internal implementation of video processing."""
|
| 294 |
if profile is None:
|
| 295 |
+
return "**Please log in first** using the button at the top right."
|
| 296 |
|
| 297 |
+
is_valid, result = is_valid_youtube_url(url)
|
| 298 |
+
if not is_valid:
|
| 299 |
+
return result
|
| 300 |
+
url = result # Use normalized URL
|
| 301 |
|
| 302 |
try:
|
| 303 |
progress(0, desc="Loading models...")
|
| 304 |
whisper_model = get_whisper_model()
|
| 305 |
+
vision_processor, vision_model = get_vision_model()
|
|
|
|
|
|
|
| 306 |
|
| 307 |
with tempfile.TemporaryDirectory() as tmpdir:
|
| 308 |
progress(0.1, desc="Downloading video...")
|
|
|
|
| 339 |
video_result.append("### Transcript")
|
| 340 |
video_result.append(transcript)
|
| 341 |
|
| 342 |
+
# Analyze frames (always enabled for better context)
|
| 343 |
+
progress(base_progress + 0.6 * (1/total), desc=f"Analyzing frames: {item['title']}")
|
| 344 |
+
frames = extract_frames(video_path, num_frames)
|
|
|
|
| 345 |
|
| 346 |
+
if frames:
|
| 347 |
+
video_result.append("\n### Visual Context")
|
| 348 |
+
for j, frame in enumerate(frames):
|
| 349 |
+
caption = describe_frame(frame, vision_processor, vision_model)
|
| 350 |
+
visual_contexts.append(caption)
|
| 351 |
+
video_result.append(f"**Frame {j+1}:** {caption}")
|
| 352 |
|
| 353 |
# Store in vector DB
|
| 354 |
progress(base_progress + 0.8 * (1/total), desc=f"Storing in knowledge base: {item['title']}")
|
|
|
|
| 358 |
results.append("\n\n".join(video_result))
|
| 359 |
|
| 360 |
progress(1.0, desc="Done!")
|
| 361 |
+
if results:
|
| 362 |
+
summary = "\n\n---\n\n".join(results)
|
| 363 |
+
summary += "\n\n---\n\n**Analysis complete!** Switch to the Chat tab to ask questions about this video."
|
| 364 |
+
return summary
|
| 365 |
+
return "No content found to analyze."
|
| 366 |
|
| 367 |
except Exception as e:
|
| 368 |
+
error_msg = str(e)
|
| 369 |
+
if "unavailable" in error_msg.lower():
|
| 370 |
+
return "Video unavailable. It may be private, age-restricted, or removed."
|
| 371 |
+
if "copyright" in error_msg.lower():
|
| 372 |
+
return "Video blocked due to copyright restrictions."
|
| 373 |
+
return f"Error analyzing video: {error_msg}"
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
# Apply ZeroGPU decorator if available
|
| 377 |
+
if ZEROGPU_AVAILABLE:
|
| 378 |
+
@spaces.GPU(duration=300) # 5 minute timeout for video processing
|
| 379 |
+
def process_youtube(
|
| 380 |
+
url: str,
|
| 381 |
+
num_frames: int,
|
| 382 |
+
profile: gr.OAuthProfile | None,
|
| 383 |
+
progress: gr.Progress = gr.Progress(),
|
| 384 |
+
) -> str:
|
| 385 |
+
return _process_youtube_impl(url, num_frames, profile, progress)
|
| 386 |
+
else:
|
| 387 |
+
def process_youtube(
|
| 388 |
+
url: str,
|
| 389 |
+
num_frames: int,
|
| 390 |
+
profile: gr.OAuthProfile | None,
|
| 391 |
+
progress: gr.Progress = gr.Progress(),
|
| 392 |
+
) -> str:
|
| 393 |
+
return _process_youtube_impl(url, num_frames, profile, progress)
|
| 394 |
|
| 395 |
|
| 396 |
def chat_with_videos(
|
|
|
|
| 426 |
|
| 427 |
context = "\n\n".join(context_parts)
|
| 428 |
|
| 429 |
+
# Generate response using HF Inference API with fallback models
|
| 430 |
+
client = InferenceClient(token=oauth_token.token)
|
|
|
|
| 431 |
|
| 432 |
+
system_prompt = """You are a helpful assistant that answers questions about video content.
|
| 433 |
You have access to transcripts and visual descriptions from analyzed videos.
|
| 434 |
Answer based only on the provided context. If the context doesn't contain enough information, say so.
|
| 435 |
Be concise but thorough."""
|
| 436 |
|
| 437 |
+
user_prompt = f"""Based on the following video content, answer the question.
|
| 438 |
|
| 439 |
Video Content:
|
| 440 |
{context}
|
| 441 |
|
| 442 |
Question: {message}"""
|
| 443 |
|
| 444 |
+
messages = [
|
| 445 |
+
{"role": "system", "content": system_prompt},
|
| 446 |
+
{"role": "user", "content": user_prompt},
|
| 447 |
+
]
|
| 448 |
+
|
| 449 |
+
last_error = None
|
| 450 |
+
used_model = None
|
| 451 |
+
|
| 452 |
+
for model in CHAT_MODELS:
|
| 453 |
+
try:
|
| 454 |
+
response = client.chat.completions.create(
|
| 455 |
+
model=model,
|
| 456 |
+
messages=messages,
|
| 457 |
+
max_tokens=1024,
|
| 458 |
+
)
|
| 459 |
+
answer = response.choices[0].message.content
|
| 460 |
+
used_model = model.split("/")[-1] # Get model name without org
|
| 461 |
+
break
|
| 462 |
+
except Exception as e:
|
| 463 |
+
last_error = e
|
| 464 |
+
continue
|
| 465 |
+
else:
|
| 466 |
+
# All models failed
|
| 467 |
+
error_msg = str(last_error) if last_error else "Unknown error"
|
| 468 |
+
if "401" in error_msg or "unauthorized" in error_msg.lower():
|
| 469 |
+
return "Authentication error. Please try logging out and back in."
|
| 470 |
+
if "429" in error_msg or "rate" in error_msg.lower():
|
| 471 |
+
return "Rate limit exceeded. Please wait a moment and try again."
|
| 472 |
+
if "503" in error_msg or "unavailable" in error_msg.lower():
|
| 473 |
+
return "Model service temporarily unavailable. Please try again later."
|
| 474 |
+
return f"Could not generate response. Error: {error_msg}"
|
| 475 |
+
|
| 476 |
+
# Add sources and model info
|
| 477 |
+
sources = list(set(m["title"] for m in matches))
|
| 478 |
+
answer += f"\n\n*Sources: {', '.join(sources)}*"
|
| 479 |
+
answer += f"\n*Model: {used_model}*"
|
| 480 |
+
|
| 481 |
+
return answer
|
| 482 |
|
| 483 |
|
| 484 |
def get_knowledge_stats() -> str:
|
|
|
|
| 556 |
lines=1,
|
| 557 |
)
|
| 558 |
|
|
|
|
| 559 |
with gr.Row():
|
| 560 |
+
submit_btn = gr.Button(
|
| 561 |
+
"Analyze Video",
|
| 562 |
+
variant="primary",
|
| 563 |
+
size="lg",
|
| 564 |
+
scale=3,
|
| 565 |
)
|
| 566 |
num_frames = gr.Slider(
|
| 567 |
+
label="Frames to analyze",
|
| 568 |
+
minimum=3,
|
| 569 |
maximum=10,
|
| 570 |
value=5,
|
| 571 |
step=1,
|
| 572 |
+
scale=1,
|
| 573 |
)
|
| 574 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 575 |
with gr.Column(scale=1):
|
| 576 |
+
gr.Markdown("### What happens")
|
| 577 |
gr.Markdown(
|
| 578 |
+
"1. Download video\n"
|
| 579 |
+
"2. Transcribe audio (Whisper)\n"
|
| 580 |
+
"3. Analyze key frames (BLIP)\n"
|
| 581 |
+
"4. Store in knowledge base"
|
| 582 |
)
|
| 583 |
gr.Markdown("### Knowledge Base")
|
| 584 |
kb_status_analyze = gr.Markdown()
|
|
|
|
| 590 |
|
| 591 |
gr.Markdown("### Results")
|
| 592 |
output = gr.Markdown(
|
| 593 |
+
value="*Paste a YouTube URL and click Analyze Video*",
|
| 594 |
)
|
| 595 |
|
| 596 |
# Wire up analyze tab
|
| 597 |
submit_btn.click(
|
| 598 |
fn=process_youtube,
|
| 599 |
+
inputs=[url_input, num_frames],
|
| 600 |
outputs=[output],
|
| 601 |
).then(
|
| 602 |
fn=get_knowledge_stats,
|
pyproject.toml
CHANGED
|
@@ -16,6 +16,7 @@ dependencies = [
|
|
| 16 |
"Pillow>=10.0.0",
|
| 17 |
"chromadb>=0.4.0",
|
| 18 |
"sentence-transformers>=2.2.0",
|
|
|
|
| 19 |
]
|
| 20 |
|
| 21 |
[project.optional-dependencies]
|
|
|
|
| 16 |
"Pillow>=10.0.0",
|
| 17 |
"chromadb>=0.4.0",
|
| 18 |
"sentence-transformers>=2.2.0",
|
| 19 |
+
"spaces>=0.19.0",
|
| 20 |
]
|
| 21 |
|
| 22 |
[project.optional-dependencies]
|
requirements.txt
CHANGED
|
@@ -9,3 +9,4 @@ opencv-python-headless>=4.8.0
|
|
| 9 |
Pillow>=10.0.0
|
| 10 |
chromadb>=0.4.0
|
| 11 |
sentence-transformers>=2.2.0
|
|
|
|
|
|
| 9 |
Pillow>=10.0.0
|
| 10 |
chromadb>=0.4.0
|
| 11 |
sentence-transformers>=2.2.0
|
| 12 |
+
spaces>=0.19.0
|
tests/test_app.py
CHANGED
|
@@ -334,6 +334,54 @@ class TestChatWithVideos:
|
|
| 334 |
assert "no videos have been analyzed" in result.lower()
|
| 335 |
|
| 336 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
class TestProcessYoutube:
|
| 338 |
"""Tests for the process_youtube function."""
|
| 339 |
|
|
@@ -342,7 +390,7 @@ class TestProcessYoutube:
|
|
| 342 |
from app import process_youtube
|
| 343 |
|
| 344 |
mock_progress = MagicMock()
|
| 345 |
-
result = process_youtube("https://youtube.com/test",
|
| 346 |
assert "log in" in result.lower()
|
| 347 |
|
| 348 |
def test_empty_url_returns_prompt(self):
|
|
@@ -350,13 +398,13 @@ class TestProcessYoutube:
|
|
| 350 |
from app import process_youtube
|
| 351 |
|
| 352 |
mock_progress = MagicMock()
|
| 353 |
-
result = process_youtube("",
|
| 354 |
-
assert "enter
|
| 355 |
|
| 356 |
-
def
|
| 357 |
-
"""Test returns
|
| 358 |
from app import process_youtube
|
| 359 |
|
| 360 |
mock_progress = MagicMock()
|
| 361 |
-
result = process_youtube("
|
| 362 |
-
assert "
|
|
|
|
| 334 |
assert "no videos have been analyzed" in result.lower()
|
| 335 |
|
| 336 |
|
| 337 |
+
class TestUrlValidation:
|
| 338 |
+
"""Tests for YouTube URL validation."""
|
| 339 |
+
|
| 340 |
+
def test_valid_watch_url(self):
|
| 341 |
+
"""Test valid watch URL."""
|
| 342 |
+
from app import is_valid_youtube_url
|
| 343 |
+
|
| 344 |
+
is_valid, result = is_valid_youtube_url("https://youtube.com/watch?v=abc123")
|
| 345 |
+
assert is_valid is True
|
| 346 |
+
|
| 347 |
+
def test_valid_short_url(self):
|
| 348 |
+
"""Test valid short URL."""
|
| 349 |
+
from app import is_valid_youtube_url
|
| 350 |
+
|
| 351 |
+
is_valid, result = is_valid_youtube_url("https://youtu.be/abc123")
|
| 352 |
+
assert is_valid is True
|
| 353 |
+
|
| 354 |
+
def test_valid_playlist_url(self):
|
| 355 |
+
"""Test valid playlist URL."""
|
| 356 |
+
from app import is_valid_youtube_url
|
| 357 |
+
|
| 358 |
+
is_valid, result = is_valid_youtube_url("https://youtube.com/playlist?list=abc")
|
| 359 |
+
assert is_valid is True
|
| 360 |
+
|
| 361 |
+
def test_empty_url(self):
|
| 362 |
+
"""Test empty URL returns error."""
|
| 363 |
+
from app import is_valid_youtube_url
|
| 364 |
+
|
| 365 |
+
is_valid, result = is_valid_youtube_url("")
|
| 366 |
+
assert is_valid is False
|
| 367 |
+
assert "enter" in result.lower()
|
| 368 |
+
|
| 369 |
+
def test_invalid_url(self):
|
| 370 |
+
"""Test invalid URL returns error."""
|
| 371 |
+
from app import is_valid_youtube_url
|
| 372 |
+
|
| 373 |
+
is_valid, result = is_valid_youtube_url("https://example.com/video")
|
| 374 |
+
assert is_valid is False
|
| 375 |
+
|
| 376 |
+
def test_url_without_protocol(self):
|
| 377 |
+
"""Test URL without protocol gets normalized."""
|
| 378 |
+
from app import is_valid_youtube_url
|
| 379 |
+
|
| 380 |
+
is_valid, result = is_valid_youtube_url("youtube.com/watch?v=abc123")
|
| 381 |
+
assert is_valid is True
|
| 382 |
+
assert result.startswith("https://")
|
| 383 |
+
|
| 384 |
+
|
| 385 |
class TestProcessYoutube:
|
| 386 |
"""Tests for the process_youtube function."""
|
| 387 |
|
|
|
|
| 390 |
from app import process_youtube
|
| 391 |
|
| 392 |
mock_progress = MagicMock()
|
| 393 |
+
result = process_youtube("https://youtube.com/watch?v=test", 5, None, mock_progress)
|
| 394 |
assert "log in" in result.lower()
|
| 395 |
|
| 396 |
def test_empty_url_returns_prompt(self):
|
|
|
|
| 398 |
from app import process_youtube
|
| 399 |
|
| 400 |
mock_progress = MagicMock()
|
| 401 |
+
result = process_youtube("", 5, MagicMock(), mock_progress)
|
| 402 |
+
assert "enter" in result.lower()
|
| 403 |
|
| 404 |
+
def test_invalid_url_returns_error(self):
|
| 405 |
+
"""Test returns error for invalid URL."""
|
| 406 |
from app import process_youtube
|
| 407 |
|
| 408 |
mock_progress = MagicMock()
|
| 409 |
+
result = process_youtube("not-a-url", 5, MagicMock(), mock_progress)
|
| 410 |
+
assert "valid youtube url" in result.lower()
|
uv.lock
CHANGED
|
@@ -2004,30 +2004,16 @@ wheels = [
|
|
| 2004 |
|
| 2005 |
[[package]]
|
| 2006 |
name = "psutil"
|
| 2007 |
-
version = "
|
| 2008 |
-
source = { registry = "https://pypi.org/simple" }
|
| 2009 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 2010 |
-
wheels = [
|
| 2011 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 2012 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 2013 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 2014 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 2015 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 2016 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 2017 |
-
{ url = "https://files.pythonhosted.org/packages/44/86/98da45dff471b93ef5ce5bcaefa00e3038295a7880a77cf74018243d37fb/psutil-7.2.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:2f2f53fd114e7946dfba3afb98c9b7c7f376009447360ca15bfb73f2066f84c7", size = 129692, upload-time = "2025-12-23T20:26:40.623Z" },
|
| 2018 |
-
{ url = "https://files.pythonhosted.org/packages/50/ee/10eae91ba4ad071c92db3c178ba861f30406342de9f0ddbe6d51fd741236/psutil-7.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e65c41d7e60068f60ce43b31a3a7fc90deb0dfd34ffc824a2574c2e5279b377e", size = 130110, upload-time = "2025-12-23T20:26:42.569Z" },
|
| 2019 |
-
{ url = "https://files.pythonhosted.org/packages/87/3a/2b2897443d56fedbbc34ac68a0dc7d55faa05d555372a2f989109052f86d/psutil-7.2.0-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cc66d21366850a4261412ce994ae9976bba9852dafb4f2fa60db68ed17ff5281", size = 181487, upload-time = "2025-12-23T20:26:44.633Z" },
|
| 2020 |
-
{ url = "https://files.pythonhosted.org/packages/11/66/44308428f7333db42c5ea7390c52af1b38f59b80b80c437291f58b5dfdad/psutil-7.2.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e025d67b42b8f22b096d5d20f5171de0e0fefb2f0ce983a13c5a1b5ed9872706", size = 184320, upload-time = "2025-12-23T20:26:46.83Z" },
|
| 2021 |
-
{ url = "https://files.pythonhosted.org/packages/18/28/d2feadc7f18e501c5ce687c377db7dca924585418fd694272b8e488ea99f/psutil-7.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:45f6b91f7ad63414d6454fd609e5e3556d0e1038d5d9c75a1368513bdf763f57", size = 140372, upload-time = "2025-12-23T20:26:49.334Z" },
|
| 2022 |
-
{ url = "https://files.pythonhosted.org/packages/b2/1d/48381f5fd0425aa054c4ee3de24f50de3d6c347019f3aec75f357377d447/psutil-7.2.0-cp314-cp314t-win_arm64.whl", hash = "sha256:87b18a19574139d60a546e88b5f5b9cbad598e26cdc790d204ab95d7024f03ee", size = 135400, upload-time = "2025-12-23T20:26:51.585Z" },
|
| 2023 |
-
{ url = "https://files.pythonhosted.org/packages/40/c5/a49160bf3e165b7b93a60579a353cf5d939d7f878fe5fd369110f1d18043/psutil-7.2.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:977a2fcd132d15cb05b32b2d85b98d087cad039b0ce435731670ba74da9e6133", size = 128116, upload-time = "2025-12-23T20:26:53.516Z" },
|
| 2024 |
-
{ url = "https://files.pythonhosted.org/packages/10/a1/c75feb480f60cd768fb6ed00ac362a16a33e5076ec8475a22d8162fb2659/psutil-7.2.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:24151011c21fadd94214d7139d7c6c54569290d7e553989bdf0eab73b13beb8c", size = 128925, upload-time = "2025-12-23T20:26:55.573Z" },
|
| 2025 |
-
{ url = "https://files.pythonhosted.org/packages/12/ff/e93136587c00a543f4bc768b157fac2c47cd77b180d4f4e5c6efb6ea53a2/psutil-7.2.0-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:91f211ba9279e7c61d9d8f84b713cfc38fa161cb0597d5cb3f1ca742f6848254", size = 154666, upload-time = "2025-12-23T20:26:57.312Z" },
|
| 2026 |
-
{ url = "https://files.pythonhosted.org/packages/b8/dd/4c2de9c3827c892599d277a69d2224136800870a8a88a80981de905de28d/psutil-7.2.0-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f37415188b7ea98faf90fed51131181646c59098b077550246e2e092e127418b", size = 156109, upload-time = "2025-12-23T20:26:58.851Z" },
|
| 2027 |
-
{ url = "https://files.pythonhosted.org/packages/81/3f/090943c682d3629968dd0b04826ddcbc760ee1379021dbe316e2ddfcd01b/psutil-7.2.0-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0d12c7ce6ed1128cd81fd54606afa054ac7dbb9773469ebb58cf2f171c49f2ac", size = 148081, upload-time = "2025-12-23T20:27:01.318Z" },
|
| 2028 |
-
{ url = "https://files.pythonhosted.org/packages/c4/88/c39648ebb8ec182d0364af53cdefe6eddb5f3872ba718b5855a8ff65d6d4/psutil-7.2.0-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ca0faef7976530940dcd39bc5382d0d0d5eb023b186a4901ca341bd8d8684151", size = 147376, upload-time = "2025-12-23T20:27:03.347Z" },
|
| 2029 |
-
{ url = "https://files.pythonhosted.org/packages/01/a2/5b39e08bd9b27476bc7cce7e21c71a481ad60b81ffac49baf02687a50d7f/psutil-7.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:abdb74137ca232d20250e9ad471f58d500e7743bc8253ba0bfbf26e570c0e437", size = 136910, upload-time = "2025-12-23T20:27:05.289Z" },
|
| 2030 |
-
{ url = "https://files.pythonhosted.org/packages/59/54/53839db1258c1eaeb4ded57ff202144ebc75b23facc05a74fd98d338b0c6/psutil-7.2.0-cp37-abi3-win_arm64.whl", hash = "sha256:284e71038b3139e7ab3834b63b3eb5aa5565fcd61a681ec746ef9a0a8c457fd2", size = 133807, upload-time = "2025-12-23T20:27:06.825Z" },
|
| 2031 |
]
|
| 2032 |
|
| 2033 |
[[package]]
|
|
@@ -3014,6 +3000,24 @@ wheels = [
|
|
| 3014 |
{ url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
|
| 3015 |
]
|
| 3016 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3017 |
[[package]]
|
| 3018 |
name = "starlette"
|
| 3019 |
version = "0.50.0"
|
|
@@ -3380,6 +3384,7 @@ dependencies = [
|
|
| 3380 |
{ name = "opencv-python-headless" },
|
| 3381 |
{ name = "pillow" },
|
| 3382 |
{ name = "sentence-transformers" },
|
|
|
|
| 3383 |
{ name = "torch" },
|
| 3384 |
{ name = "transformers" },
|
| 3385 |
{ name = "yt-dlp" },
|
|
@@ -3407,6 +3412,7 @@ requires-dist = [
|
|
| 3407 |
{ name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.1.0" },
|
| 3408 |
{ name = "pytest-playwright", marker = "extra == 'dev'", specifier = ">=0.4.0" },
|
| 3409 |
{ name = "sentence-transformers", specifier = ">=2.2.0" },
|
|
|
|
| 3410 |
{ name = "torch", specifier = ">=2.0.0" },
|
| 3411 |
{ name = "transformers", specifier = ">=4.36.0" },
|
| 3412 |
{ name = "yt-dlp", specifier = ">=2024.1.0" },
|
|
|
|
| 2004 |
|
| 2005 |
[[package]]
|
| 2006 |
name = "psutil"
|
| 2007 |
+
version = "5.9.8"
|
| 2008 |
+
source = { registry = "https://pypi.org/simple" }
|
| 2009 |
+
sdist = { url = "https://files.pythonhosted.org/packages/90/c7/6dc0a455d111f68ee43f27793971cf03fe29b6ef972042549db29eec39a2/psutil-5.9.8.tar.gz", hash = "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c", size = 503247, upload-time = "2024-01-19T20:47:09.517Z" }
|
| 2010 |
+
wheels = [
|
| 2011 |
+
{ url = "https://files.pythonhosted.org/packages/e7/e3/07ae864a636d70a8a6f58da27cb1179192f1140d5d1da10886ade9405797/psutil-5.9.8-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81", size = 248702, upload-time = "2024-01-19T20:47:36.303Z" },
|
| 2012 |
+
{ url = "https://files.pythonhosted.org/packages/b3/bd/28c5f553667116b2598b9cc55908ec435cb7f77a34f2bff3e3ca765b0f78/psutil-5.9.8-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421", size = 285242, upload-time = "2024-01-19T20:47:39.65Z" },
|
| 2013 |
+
{ url = "https://files.pythonhosted.org/packages/c5/4f/0e22aaa246f96d6ac87fe5ebb9c5a693fbe8877f537a1022527c47ca43c5/psutil-5.9.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4", size = 288191, upload-time = "2024-01-19T20:47:43.078Z" },
|
| 2014 |
+
{ url = "https://files.pythonhosted.org/packages/6e/f5/2aa3a4acdc1e5940b59d421742356f133185667dd190b166dbcfcf5d7b43/psutil-5.9.8-cp37-abi3-win32.whl", hash = "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0", size = 251252, upload-time = "2024-01-19T20:47:52.88Z" },
|
| 2015 |
+
{ url = "https://files.pythonhosted.org/packages/93/52/3e39d26feae7df0aa0fd510b14012c3678b36ed068f7d78b8d8784d61f0e/psutil-5.9.8-cp37-abi3-win_amd64.whl", hash = "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf", size = 255090, upload-time = "2024-01-19T20:47:56.019Z" },
|
| 2016 |
+
{ url = "https://files.pythonhosted.org/packages/05/33/2d74d588408caedd065c2497bdb5ef83ce6082db01289a1e1147f6639802/psutil-5.9.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8", size = 249898, upload-time = "2024-01-19T20:47:59.238Z" },
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2017 |
]
|
| 2018 |
|
| 2019 |
[[package]]
|
|
|
|
| 3000 |
{ url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
|
| 3001 |
]
|
| 3002 |
|
| 3003 |
+
[[package]]
|
| 3004 |
+
name = "spaces"
|
| 3005 |
+
version = "0.45.0"
|
| 3006 |
+
source = { registry = "https://pypi.org/simple" }
|
| 3007 |
+
dependencies = [
|
| 3008 |
+
{ name = "gradio" },
|
| 3009 |
+
{ name = "httpx" },
|
| 3010 |
+
{ name = "packaging" },
|
| 3011 |
+
{ name = "psutil" },
|
| 3012 |
+
{ name = "pydantic" },
|
| 3013 |
+
{ name = "requests" },
|
| 3014 |
+
{ name = "typing-extensions" },
|
| 3015 |
+
]
|
| 3016 |
+
sdist = { url = "https://files.pythonhosted.org/packages/ae/58/d6a47a6ceb19585fd8d37e1c6d3d463791475760cac44ac33c6aaed17fe0/spaces-0.45.0.tar.gz", hash = "sha256:be411a0425b9f068150c3a4886783b0c1b3e6c15427e5aec306678483423a069", size = 85694, upload-time = "2025-12-19T20:43:18.765Z" }
|
| 3017 |
+
wheels = [
|
| 3018 |
+
{ url = "https://files.pythonhosted.org/packages/b7/38/19d478be4926d6d09a7168679b738243a9b9ab2ea4720e98f0edc4a9f396/spaces-0.45.0-py3-none-any.whl", hash = "sha256:d3f7e1032d345244dadf389e59f528fb22e5a57efda9d4d24242664c6a82e872", size = 105702, upload-time = "2025-12-19T20:43:17.635Z" },
|
| 3019 |
+
]
|
| 3020 |
+
|
| 3021 |
[[package]]
|
| 3022 |
name = "starlette"
|
| 3023 |
version = "0.50.0"
|
|
|
|
| 3384 |
{ name = "opencv-python-headless" },
|
| 3385 |
{ name = "pillow" },
|
| 3386 |
{ name = "sentence-transformers" },
|
| 3387 |
+
{ name = "spaces" },
|
| 3388 |
{ name = "torch" },
|
| 3389 |
{ name = "transformers" },
|
| 3390 |
{ name = "yt-dlp" },
|
|
|
|
| 3412 |
{ name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.1.0" },
|
| 3413 |
{ name = "pytest-playwright", marker = "extra == 'dev'", specifier = ">=0.4.0" },
|
| 3414 |
{ name = "sentence-transformers", specifier = ">=2.2.0" },
|
| 3415 |
+
{ name = "spaces", specifier = ">=0.19.0" },
|
| 3416 |
{ name = "torch", specifier = ">=2.0.0" },
|
| 3417 |
{ name = "transformers", specifier = ">=4.36.0" },
|
| 3418 |
{ name = "yt-dlp", specifier = ">=2024.1.0" },
|