File size: 2,074 Bytes
d619c43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import re

def format_timestamp(seconds):
    """Formats time in seconds to hours:minutes:seconds format."""
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    secs = int(seconds % 60)
    
    if hours > 0:
        return f"{hours:02d}:{minutes:02d}:{secs:02d}"
    else:
        return f"{minutes:02d}:{secs:02d}"

def extract_video_id(video_id_or_url):
    """
    Extracts video ID from a string that can be either an ID or full YouTube URL.
    
    Supported formats:
    - Simple ID (e.g., dQw4w9WgXcQ)
    - https://www.youtube.com/watch?v=dQw4w9WgXcQ
    - https://youtu.be/dQw4w9WgXcQ
    - https://youtube.com/shorts/dQw4w9WgXcQ
    - https://www.youtube.com/embed/dQw4w9WgXcQ
    - https://youtube.com/live/dQw4w9WgXcQ
    
    Returns:
    - Video ID or original string if ID not found
    """
    print(f"Processing input value: {video_id_or_url}")
    
    # If input string is empty or None, return empty string
    if not video_id_or_url:
        print("Empty video ID")
        return ""
    
    # Check for simple ID (without special characters)
    if re.match(r'^[a-zA-Z0-9_-]{11}$', video_id_or_url):
        print(f"Found simple ID: {video_id_or_url}")
        return video_id_or_url
    
    # Check for nested URLs (when URL is part of another URL)
    inner_url_match = re.search(r'https?://(?:www\.)?(?:youtube\.com|youtu\.be).*?(?=&|$|\s)', video_id_or_url)
    if inner_url_match:
        inner_url = inner_url_match.group(0)
        print(f"Found nested URL: {inner_url}")
        video_id_or_url = inner_url
    
    # Check for standard youtube.com/watch?v= link
    match = re.search(r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/|youtube\.com/embed/|youtube\.com/live/)([a-zA-Z0-9_-]{11})', video_id_or_url)
    if match:
        video_id = match.group(1)
        print(f"Extracted ID from URL: {video_id}")
        return video_id
    
    # If failed to extract ID, return original string
    print(f"Failed to extract ID, returning original value: {video_id_or_url}")
    return video_id_or_url