Spaces:

aseelflihan
/

SyncMaster

Sleeping

App Files Files Community

SyncMaster / utils.py

aseelflihan

Upload 15 files

126577b verified 10 months ago

raw

history blame contribute delete

9.96 kB

	import os
	import mimetypes
	import tempfile
	from pathlib import Path
	from typing import Optional, List, Dict
	import librosa
	import numpy as np

	def format_timestamp(seconds: float) -> str:
	"""
	Format seconds into MM:SS.mmm format

	Args:
	seconds: Time in seconds

	Returns:
	Formatted timestamp string
	"""
	minutes = int(seconds // 60)
	remaining_seconds = seconds % 60
	return f"{minutes:02d}:{remaining_seconds:06.3f}"

	def validate_audio_file(file_path: str) -> bool:
	"""
	Validate if the file is a supported audio format

	Args:
	file_path: Path to the audio file

	Returns:
	True if valid, False otherwise
	"""
	try:
	if not os.path.exists(file_path):
	return False

	# Check file extension
	supported_extensions = ['.mp3', '.wav', '.m4a', '.flac', '.ogg']
	file_extension = Path(file_path).suffix.lower()

	if file_extension not in supported_extensions:
	return False

	# Check MIME type
	mime_type, _ = mimetypes.guess_type(file_path)
	if mime_type and not mime_type.startswith('audio/'):
	return False

	# Try to load with librosa to verify it's a valid audio file
	try:
	librosa.load(file_path, duration=1.0) # Load just 1 second for validation
	return True
	except:
	return False

	except Exception:
	return False

	def get_audio_info(file_path: str) -> Dict:
	"""
	Get information about the audio file

	Args:
	file_path: Path to the audio file

	Returns:
	Dictionary with audio information
	"""
	try:
	# Load audio file
	y, sr = librosa.load(file_path)

	duration = len(y) / sr

	return {
	'duration': duration,
	'sample_rate': sr,
	'channels': 1 if len(y.shape) == 1 else y.shape[0],
	'file_size': os.path.getsize(file_path),
	'format': Path(file_path).suffix.lower()
	}

	except Exception as e:
	return {
	'error': str(e),
	'duration': 0,
	'sample_rate': 0,
	'channels': 0,
	'file_size': 0,
	'format': 'unknown'
	}

	def clean_text(text: str) -> str:
	"""
	Clean and normalize text for better processing

	Args:
	text: Input text

	Returns:
	Cleaned text
	"""
	if not text:
	return ""

	# Remove extra whitespace
	text = ' '.join(text.split())

	# Remove common transcription artifacts
	text = text.replace('[Music]', '')
	text = text.replace('[Applause]', '')
	text = text.replace('[Laughter]', '')
	text = text.replace('(Music)', '')
	text = text.replace('(Applause)', '')
	text = text.replace('(Laughter)', '')

	# Clean up extra spaces
	text = ' '.join(text.split())

	return text.strip()

	def split_text_into_chunks(text: str, max_chars_per_chunk: int = 100) -> List[str]:
	"""
	Split text into chunks suitable for video display

	Args:
	text: Input text
	max_chars_per_chunk: Maximum characters per chunk

	Returns:
	List of text chunks
	"""
	if not text:
	return []

	words = text.split()
	chunks = []
	current_chunk = []
	current_length = 0

	for word in words:
	word_length = len(word) + 1 # +1 for space

	if current_length + word_length > max_chars_per_chunk and current_chunk:
	# Add current chunk and start new one
	chunks.append(' '.join(current_chunk))
	current_chunk = [word]
	current_length = len(word)
	else:
	current_chunk.append(word)
	current_length += word_length

	# Add final chunk
	if current_chunk:
	chunks.append(' '.join(current_chunk))

	return chunks

	def convert_color_hex_to_rgb(hex_color: str) -> tuple:
	"""
	Convert hex color to RGB tuple

	Args:
	hex_color: Hex color string (e.g., '#FF0000')

	Returns:
	RGB tuple (r, g, b)
	"""
	hex_color = hex_color.lstrip('#')

	if len(hex_color) != 6:
	return (255, 255, 255) # Default to white

	try:
	r = int(hex_color[0:2], 16)
	g = int(hex_color[2:4], 16)
	b = int(hex_color[4:6], 16)
	return (r, g, b)
	except ValueError:
	return (255, 255, 255) # Default to white

	def convert_rgb_to_hex(r: int, g: int, b: int) -> str:
	"""
	Convert RGB values to hex color string

	Args:
	r, g, b: RGB color values (0-255)

	Returns:
	Hex color string
	"""
	return f"#{r:02x}{g:02x}{b:02x}"

	def estimate_video_file_size(duration: float, resolution: tuple = (1280, 720),
	bitrate_kbps: int = 2000) -> int:
	"""
	Estimate the file size of a video based on duration and quality

	Args:
	duration: Video duration in seconds
	resolution: Video resolution tuple (width, height)
	bitrate_kbps: Video bitrate in kbps

	Returns:
	Estimated file size in bytes
	"""
	# Simple estimation: bitrate * duration / 8 (to convert bits to bytes)
	estimated_size = (bitrate_kbps * 1000 * duration) / 8
	return int(estimated_size)

	def create_safe_filename(filename: str) -> str:
	"""
	Create a safe filename by removing/replacing invalid characters

	Args:
	filename: Original filename

	Returns:
	Safe filename
	"""
	import re

	# Remove or replace invalid characters
	safe_filename = re.sub(r'[<>:"/\\\|?*]', '_', filename)

	# Remove extra underscores and spaces
	safe_filename = re.sub(r'[_\s]+', '_', safe_filename)

	# Trim leading/trailing underscores
	safe_filename = safe_filename.strip('_')

	# Ensure filename is not empty
	if not safe_filename:
	safe_filename = "output"

	return safe_filename

	def format_file_size(size_bytes: int) -> str:
	"""
	Format file size in human-readable format

	Args:
	size_bytes: File size in bytes

	Returns:
	Formatted file size string
	"""
	if size_bytes == 0:
	return "0 B"

	size_names = ["B", "KB", "MB", "GB"]
	i = int(np.floor(np.log(size_bytes) / np.log(1024)))
	p = np.power(1024, i)
	s = round(size_bytes / p, 2)

	return f"{s} {size_names[i]}"

	def validate_word_timestamps(word_timestamps: List[Dict]) -> List[Dict]:
	"""
	Validate and clean word timestamps data

	Args:
	word_timestamps: List of word timestamp dictionaries

	Returns:
	Cleaned and validated word timestamps
	"""
	validated_timestamps = []

	for word_data in word_timestamps:
	# Ensure required fields exist
	if not isinstance(word_data, dict):
	continue

	word = word_data.get('word', '').strip()
	start = word_data.get('start', 0)
	end = word_data.get('end', 0)

	# Skip empty words
	if not word:
	continue

	# Ensure numeric timestamps
	try:
	start = float(start)
	end = float(end)
	except (ValueError, TypeError):
	continue

	# Ensure logical timestamp order
	if start < 0:
	start = 0
	if end <= start:
	end = start + 0.1 # Minimum duration

	validated_timestamps.append({
	'word': word,
	'start': round(start, 3),
	'end': round(end, 3)
	})

	return validated_timestamps

	def merge_overlapping_timestamps(word_timestamps: List[Dict],
	overlap_threshold: float = 0.05) -> List[Dict]:
	"""
	Merge overlapping or very close word timestamps

	Args:
	word_timestamps: List of word timestamp dictionaries
	overlap_threshold: Threshold for merging close timestamps (seconds)

	Returns:
	List with merged timestamps
	"""
	if not word_timestamps:
	return []

	merged_timestamps = []
	current_group = [word_timestamps[0]]

	for word_data in word_timestamps[1:]:
	last_end = current_group[-1]['end']
	current_start = word_data['start']

	# Check if words should be merged
	if current_start - last_end <= overlap_threshold:
	current_group.append(word_data)
	else:
	# Merge current group and start new one
	if len(current_group) == 1:
	merged_timestamps.append(current_group[0])
	else:
	# Merge multiple words
	merged_word = {
	'word': ' '.join([w['word'] for w in current_group]),
	'start': current_group[0]['start'],
	'end': current_group[-1]['end']
	}
	merged_timestamps.append(merged_word)

	current_group = [word_data]

	# Handle final group
	if len(current_group) == 1:
	merged_timestamps.append(current_group[0])
	else:
	merged_word = {
	'word': ' '.join([w['word'] for w in current_group]),
	'start': current_group[0]['start'],
	'end': current_group[-1]['end']
	}
	merged_timestamps.append(merged_word)

	return merged_timestamps