Spaces:

aseelflihan
/

syncmaster5

Sleeping

File size: 9,605 Bytes

88fbdc0

import os
import mimetypes
import tempfile
from pathlib import Path
from typing import Optional, List, Dict
import librosa
import numpy as np

def format_timestamp(seconds: float) -> str:
    """
    Format seconds into MM:SS.mmm format
    
    Args:
        seconds: Time in seconds
        
    Returns:
        Formatted timestamp string
    """
    minutes = int(seconds // 60)
    remaining_seconds = seconds % 60
    return f"{minutes:02d}:{remaining_seconds:06.3f}"

def validate_audio_file(file_path: str) -> bool:
    """
    Validate if the file is a supported audio format
    
    Args:
        file_path: Path to the audio file
        
    Returns:
        True if valid, False otherwise
    """
    try:
        if not os.path.exists(file_path):
            return False
        
        # Check file extension
        supported_extensions = ['.mp3', '.wav', '.m4a', '.flac', '.ogg']
        file_extension = Path(file_path).suffix.lower()
        
        if file_extension not in supported_extensions:
            return False
        
        # Check MIME type
        mime_type, _ = mimetypes.guess_type(file_path)
        if mime_type and not mime_type.startswith('audio/'):
            return False
        
        # Try to load with librosa to verify it's a valid audio file
        try:
            librosa.load(file_path, duration=1.0)  # Load just 1 second for validation
            return True
        except:
            return False
            
    except Exception:
        return False

def get_audio_info(file_path: str) -> Dict:
    """
    Get information about the audio file
    
    Args:
        file_path: Path to the audio file
        
    Returns:
        Dictionary with audio information
    """
    try:
        # Load audio file
        y, sr = librosa.load(file_path)
        
        duration = len(y) / sr
        
        return {
            'duration': duration,
            'sample_rate': sr,
            'channels': 1 if len(y.shape) == 1 else y.shape[0],
            'file_size': os.path.getsize(file_path),
            'format': Path(file_path).suffix.lower()
        }
        
    except Exception as e:
        return {
            'error': str(e),
            'duration': 0,
            'sample_rate': 0,
            'channels': 0,
            'file_size': 0,
            'format': 'unknown'
        }

def clean_text(text: str) -> str:
    """
    Clean and normalize text for better processing
    
    Args:
        text: Input text
        
    Returns:
        Cleaned text
    """
    if not text:
        return ""
    
    # Remove extra whitespace
    text = ' '.join(text.split())
    
    # Remove common transcription artifacts
    text = text.replace('[Music]', '')
    text = text.replace('[Applause]', '')
    text = text.replace('[Laughter]', '')
    text = text.replace('(Music)', '')
    text = text.replace('(Applause)', '')
    text = text.replace('(Laughter)', '')
    
    # Clean up extra spaces
    text = ' '.join(text.split())
    
    return text.strip()

def split_text_into_chunks(text: str, max_chars_per_chunk: int = 100) -> List[str]:
    """
    Split text into chunks suitable for video display
    
    Args:
        text: Input text
        max_chars_per_chunk: Maximum characters per chunk
        
    Returns:
        List of text chunks
    """
    if not text:
        return []
    
    words = text.split()
    chunks = []
    current_chunk = []
    current_length = 0
    
    for word in words:
        word_length = len(word) + 1  # +1 for space
        
        if current_length + word_length > max_chars_per_chunk and current_chunk:
            # Add current chunk and start new one
            chunks.append(' '.join(current_chunk))
            current_chunk = [word]
            current_length = len(word)
        else:
            current_chunk.append(word)
            current_length += word_length
    
    # Add final chunk
    if current_chunk:
        chunks.append(' '.join(current_chunk))
    
    return chunks

def convert_color_hex_to_rgb(hex_color: str) -> tuple:
    """
    Convert hex color to RGB tuple
    
    Args:
        hex_color: Hex color string (e.g., '#FF0000')
        
    Returns:
        RGB tuple (r, g, b)
    """
    hex_color = hex_color.lstrip('#')
    
    if len(hex_color) != 6:
        return (255, 255, 255)  # Default to white
    
    try:
        r = int(hex_color[0:2], 16)
        g = int(hex_color[2:4], 16)
        b = int(hex_color[4:6], 16)
        return (r, g, b)
    except ValueError:
        return (255, 255, 255)  # Default to white

def convert_rgb_to_hex(r: int, g: int, b: int) -> str:
    """
    Convert RGB values to hex color string
    
    Args:
        r, g, b: RGB color values (0-255)
        
    Returns:
        Hex color string
    """
    return f"#{r:02x}{g:02x}{b:02x}"

def estimate_video_file_size(duration: float, resolution: tuple = (1280, 720), 
                           bitrate_kbps: int = 2000) -> int:
    """
    Estimate the file size of a video based on duration and quality
    
    Args:
        duration: Video duration in seconds
        resolution: Video resolution tuple (width, height)
        bitrate_kbps: Video bitrate in kbps
        
    Returns:
        Estimated file size in bytes
    """
    # Simple estimation: bitrate * duration / 8 (to convert bits to bytes)
    estimated_size = (bitrate_kbps * 1000 * duration) / 8
    return int(estimated_size)

def create_safe_filename(filename: str) -> str:
    """
    Create a safe filename by removing/replacing invalid characters
    
    Args:
        filename: Original filename
        
    Returns:
        Safe filename
    """
    import re
    
    # Remove or replace invalid characters
    safe_filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
    
    # Remove extra underscores and spaces
    safe_filename = re.sub(r'[_\s]+', '_', safe_filename)
    
    # Trim leading/trailing underscores
    safe_filename = safe_filename.strip('_')
    
    # Ensure filename is not empty
    if not safe_filename:
        safe_filename = "output"
    
    return safe_filename

def format_file_size(size_bytes: int) -> str:
    """
    Format file size in human-readable format
    
    Args:
        size_bytes: File size in bytes
        
    Returns:
        Formatted file size string
    """
    if size_bytes == 0:
        return "0 B"
    
    size_names = ["B", "KB", "MB", "GB"]
    i = int(np.floor(np.log(size_bytes) / np.log(1024)))
    p = np.power(1024, i)
    s = round(size_bytes / p, 2)
    
    return f"{s} {size_names[i]}"

def validate_word_timestamps(word_timestamps: List[Dict]) -> List[Dict]:
    """
    Validate and clean word timestamps data
    
    Args:
        word_timestamps: List of word timestamp dictionaries
        
    Returns:
        Cleaned and validated word timestamps
    """
    validated_timestamps = []
    
    for word_data in word_timestamps:
        # Ensure required fields exist
        if not isinstance(word_data, dict):
            continue
        
        word = word_data.get('word', '').strip()
        start = word_data.get('start', 0)
        end = word_data.get('end', 0)
        
        # Skip empty words
        if not word:
            continue
        
        # Ensure numeric timestamps
        try:
            start = float(start)
            end = float(end)
        except (ValueError, TypeError):
            continue
        
        # Ensure logical timestamp order
        if start < 0:
            start = 0
        if end <= start:
            end = start + 0.1  # Minimum duration
        
        validated_timestamps.append({
            'word': word,
            'start': round(start, 3),
            'end': round(end, 3)
        })
    
    return validated_timestamps

def merge_overlapping_timestamps(word_timestamps: List[Dict], 
                               overlap_threshold: float = 0.05) -> List[Dict]:
    """
    Merge overlapping or very close word timestamps
    
    Args:
        word_timestamps: List of word timestamp dictionaries
        overlap_threshold: Threshold for merging close timestamps (seconds)
        
    Returns:
        List with merged timestamps
    """
    if not word_timestamps:
        return []
    
    merged_timestamps = []
    current_group = [word_timestamps[0]]
    
    for word_data in word_timestamps[1:]:
        last_end = current_group[-1]['end']
        current_start = word_data['start']
        
        # Check if words should be merged
        if current_start - last_end <= overlap_threshold:
            current_group.append(word_data)
        else:
            # Merge current group and start new one
            if len(current_group) == 1:
                merged_timestamps.append(current_group[0])
            else:
                # Merge multiple words
                merged_word = {
                    'word': ' '.join([w['word'] for w in current_group]),
                    'start': current_group[0]['start'],
                    'end': current_group[-1]['end']
                }
                merged_timestamps.append(merged_word)
            
            current_group = [word_data]
    
    # Handle final group
    if len(current_group) == 1:
        merged_timestamps.append(current_group[0])
    else:
        merged_word = {
            'word': ' '.join([w['word'] for w in current_group]),
            'start': current_group[0]['start'],
            'end': current_group[-1]['end']
        }
        merged_timestamps.append(merged_word)
    
    return merged_timestamps