Spaces:
Sleeping
Sleeping
| import os | |
| import mimetypes | |
| import tempfile | |
| from pathlib import Path | |
| from typing import Optional, List, Dict | |
| import librosa | |
| import numpy as np | |
| def format_timestamp(seconds: float) -> str: | |
| """ | |
| Format seconds into MM:SS.mmm format | |
| Args: | |
| seconds: Time in seconds | |
| Returns: | |
| Formatted timestamp string | |
| """ | |
| minutes = int(seconds // 60) | |
| remaining_seconds = seconds % 60 | |
| return f"{minutes:02d}:{remaining_seconds:06.3f}" | |
| def validate_audio_file(file_path: str) -> bool: | |
| """ | |
| Validate if the file is a supported audio format | |
| Args: | |
| file_path: Path to the audio file | |
| Returns: | |
| True if valid, False otherwise | |
| """ | |
| try: | |
| if not os.path.exists(file_path): | |
| return False | |
| # Check file extension | |
| supported_extensions = ['.mp3', '.wav', '.m4a', '.flac', '.ogg'] | |
| file_extension = Path(file_path).suffix.lower() | |
| if file_extension not in supported_extensions: | |
| return False | |
| # Check MIME type | |
| mime_type, _ = mimetypes.guess_type(file_path) | |
| if mime_type and not mime_type.startswith('audio/'): | |
| return False | |
| # Try to load with librosa to verify it's a valid audio file | |
| try: | |
| librosa.load(file_path, duration=1.0) # Load just 1 second for validation | |
| return True | |
| except: | |
| return False | |
| except Exception: | |
| return False | |
| def get_audio_info(file_path: str) -> Dict: | |
| """ | |
| Get information about the audio file | |
| Args: | |
| file_path: Path to the audio file | |
| Returns: | |
| Dictionary with audio information | |
| """ | |
| try: | |
| # Load audio file | |
| y, sr = librosa.load(file_path) | |
| duration = len(y) / sr | |
| return { | |
| 'duration': duration, | |
| 'sample_rate': sr, | |
| 'channels': 1 if len(y.shape) == 1 else y.shape[0], | |
| 'file_size': os.path.getsize(file_path), | |
| 'format': Path(file_path).suffix.lower() | |
| } | |
| except Exception as e: | |
| return { | |
| 'error': str(e), | |
| 'duration': 0, | |
| 'sample_rate': 0, | |
| 'channels': 0, | |
| 'file_size': 0, | |
| 'format': 'unknown' | |
| } | |
| def clean_text(text: str) -> str: | |
| """ | |
| Clean and normalize text for better processing | |
| Args: | |
| text: Input text | |
| Returns: | |
| Cleaned text | |
| """ | |
| if not text: | |
| return "" | |
| # Remove extra whitespace | |
| text = ' '.join(text.split()) | |
| # Remove common transcription artifacts | |
| text = text.replace('[Music]', '') | |
| text = text.replace('[Applause]', '') | |
| text = text.replace('[Laughter]', '') | |
| text = text.replace('(Music)', '') | |
| text = text.replace('(Applause)', '') | |
| text = text.replace('(Laughter)', '') | |
| # Clean up extra spaces | |
| text = ' '.join(text.split()) | |
| return text.strip() | |
| def split_text_into_chunks(text: str, max_chars_per_chunk: int = 100) -> List[str]: | |
| """ | |
| Split text into chunks suitable for video display | |
| Args: | |
| text: Input text | |
| max_chars_per_chunk: Maximum characters per chunk | |
| Returns: | |
| List of text chunks | |
| """ | |
| if not text: | |
| return [] | |
| words = text.split() | |
| chunks = [] | |
| current_chunk = [] | |
| current_length = 0 | |
| for word in words: | |
| word_length = len(word) + 1 # +1 for space | |
| if current_length + word_length > max_chars_per_chunk and current_chunk: | |
| # Add current chunk and start new one | |
| chunks.append(' '.join(current_chunk)) | |
| current_chunk = [word] | |
| current_length = len(word) | |
| else: | |
| current_chunk.append(word) | |
| current_length += word_length | |
| # Add final chunk | |
| if current_chunk: | |
| chunks.append(' '.join(current_chunk)) | |
| return chunks | |
| def convert_color_hex_to_rgb(hex_color: str) -> tuple: | |
| """ | |
| Convert hex color to RGB tuple | |
| Args: | |
| hex_color: Hex color string (e.g., '#FF0000') | |
| Returns: | |
| RGB tuple (r, g, b) | |
| """ | |
| hex_color = hex_color.lstrip('#') | |
| if len(hex_color) != 6: | |
| return (255, 255, 255) # Default to white | |
| try: | |
| r = int(hex_color[0:2], 16) | |
| g = int(hex_color[2:4], 16) | |
| b = int(hex_color[4:6], 16) | |
| return (r, g, b) | |
| except ValueError: | |
| return (255, 255, 255) # Default to white | |
| def convert_rgb_to_hex(r: int, g: int, b: int) -> str: | |
| """ | |
| Convert RGB values to hex color string | |
| Args: | |
| r, g, b: RGB color values (0-255) | |
| Returns: | |
| Hex color string | |
| """ | |
| return f"#{r:02x}{g:02x}{b:02x}" | |
| def estimate_video_file_size(duration: float, resolution: tuple = (1280, 720), | |
| bitrate_kbps: int = 2000) -> int: | |
| """ | |
| Estimate the file size of a video based on duration and quality | |
| Args: | |
| duration: Video duration in seconds | |
| resolution: Video resolution tuple (width, height) | |
| bitrate_kbps: Video bitrate in kbps | |
| Returns: | |
| Estimated file size in bytes | |
| """ | |
| # Simple estimation: bitrate * duration / 8 (to convert bits to bytes) | |
| estimated_size = (bitrate_kbps * 1000 * duration) / 8 | |
| return int(estimated_size) | |
| def create_safe_filename(filename: str) -> str: | |
| """ | |
| Create a safe filename by removing/replacing invalid characters | |
| Args: | |
| filename: Original filename | |
| Returns: | |
| Safe filename | |
| """ | |
| import re | |
| # Remove or replace invalid characters | |
| safe_filename = re.sub(r'[<>:"/\\|?*]', '_', filename) | |
| # Remove extra underscores and spaces | |
| safe_filename = re.sub(r'[_\s]+', '_', safe_filename) | |
| # Trim leading/trailing underscores | |
| safe_filename = safe_filename.strip('_') | |
| # Ensure filename is not empty | |
| if not safe_filename: | |
| safe_filename = "output" | |
| return safe_filename | |
| def format_file_size(size_bytes: int) -> str: | |
| """ | |
| Format file size in human-readable format | |
| Args: | |
| size_bytes: File size in bytes | |
| Returns: | |
| Formatted file size string | |
| """ | |
| if size_bytes == 0: | |
| return "0 B" | |
| size_names = ["B", "KB", "MB", "GB"] | |
| i = int(np.floor(np.log(size_bytes) / np.log(1024))) | |
| p = np.power(1024, i) | |
| s = round(size_bytes / p, 2) | |
| return f"{s} {size_names[i]}" | |
| def validate_word_timestamps(word_timestamps: List[Dict]) -> List[Dict]: | |
| """ | |
| Validate and clean word timestamps data | |
| Args: | |
| word_timestamps: List of word timestamp dictionaries | |
| Returns: | |
| Cleaned and validated word timestamps | |
| """ | |
| validated_timestamps = [] | |
| for word_data in word_timestamps: | |
| # Ensure required fields exist | |
| if not isinstance(word_data, dict): | |
| continue | |
| word = word_data.get('word', '').strip() | |
| start = word_data.get('start', 0) | |
| end = word_data.get('end', 0) | |
| # Skip empty words | |
| if not word: | |
| continue | |
| # Ensure numeric timestamps | |
| try: | |
| start = float(start) | |
| end = float(end) | |
| except (ValueError, TypeError): | |
| continue | |
| # Ensure logical timestamp order | |
| if start < 0: | |
| start = 0 | |
| if end <= start: | |
| end = start + 0.1 # Minimum duration | |
| validated_timestamps.append({ | |
| 'word': word, | |
| 'start': round(start, 3), | |
| 'end': round(end, 3) | |
| }) | |
| return validated_timestamps | |
| def merge_overlapping_timestamps(word_timestamps: List[Dict], | |
| overlap_threshold: float = 0.05) -> List[Dict]: | |
| """ | |
| Merge overlapping or very close word timestamps | |
| Args: | |
| word_timestamps: List of word timestamp dictionaries | |
| overlap_threshold: Threshold for merging close timestamps (seconds) | |
| Returns: | |
| List with merged timestamps | |
| """ | |
| if not word_timestamps: | |
| return [] | |
| merged_timestamps = [] | |
| current_group = [word_timestamps[0]] | |
| for word_data in word_timestamps[1:]: | |
| last_end = current_group[-1]['end'] | |
| current_start = word_data['start'] | |
| # Check if words should be merged | |
| if current_start - last_end <= overlap_threshold: | |
| current_group.append(word_data) | |
| else: | |
| # Merge current group and start new one | |
| if len(current_group) == 1: | |
| merged_timestamps.append(current_group[0]) | |
| else: | |
| # Merge multiple words | |
| merged_word = { | |
| 'word': ' '.join([w['word'] for w in current_group]), | |
| 'start': current_group[0]['start'], | |
| 'end': current_group[-1]['end'] | |
| } | |
| merged_timestamps.append(merged_word) | |
| current_group = [word_data] | |
| # Handle final group | |
| if len(current_group) == 1: | |
| merged_timestamps.append(current_group[0]) | |
| else: | |
| merged_word = { | |
| 'word': ' '.join([w['word'] for w in current_group]), | |
| 'start': current_group[0]['start'], | |
| 'end': current_group[-1]['end'] | |
| } | |
| merged_timestamps.append(merged_word) | |
| return merged_timestamps | |