Spaces:

3Stark123
/

Infographics_Generator_1

Running

App Files Files Community

Infographics_Generator_1 / utils /helpers.py

3Stark123

Create utils/helpers.py

3f58cad verified 6 months ago

raw

history blame contribute delete

5.24 kB


	"""
	Helper utility functions
	"""
	import re
	import hashlib
	import logging
	from typing import Dict, List, Any, Optional
	from datetime import datetime

	logger = logging.getLogger(__name__)

	def sanitize_text(text: str) -> str:
	"""Sanitize text input for processing"""
	if not text:
	return ""

	# Remove excessive whitespace
	text = re.sub(r'\s+', ' ', text.strip())

	# Remove special characters that might cause issues
	text = re.sub(r'[^\w\s\.\,\!\?\;\:\-\'\"]', '', text)

	return text

	def validate_hex_color(color: str) -> bool:
	"""Validate hex color format"""
	if not color:
	return False

	pattern = r'^#([A-Fa-f0-9]{6}\|[A-Fa-f0-9]{3})$'
	return bool(re.match(pattern, color))

	def generate_unique_id(content: str = "") -> str:
	"""Generate unique ID for content"""
	timestamp = datetime.now().isoformat()
	content_hash = hashlib.md5(f"{content}{timestamp}".encode()).hexdigest()
	return content_hash[:8]

	def truncate_text(text: str, max_length: int = 100, suffix: str = "...") -> str:
	"""Truncate text to specified length"""
	if len(text) <= max_length:
	return text

	return text[:max_length - len(suffix)].strip() + suffix

	def extract_numbers(text: str) -> List[float]:
	"""Extract all numbers from text"""
	pattern = r'-?\d+(?:\.\d+)?'
	matches = re.findall(pattern, text)
	return [float(match) for match in matches]

	def calculate_reading_time(text: str, wpm: int = 200) -> int:
	"""Calculate estimated reading time in minutes"""
	word_count = len(text.split())
	return max(1, round(word_count / wpm))

	def format_file_size(size_bytes: int) -> str:
	"""Format file size in human readable format"""
	if size_bytes == 0:
	return "0 B"

	size_names = ["B", "KB", "MB", "GB"]
	i = 0
	while size_bytes >= 1024 and i < len(size_names) - 1:
	size_bytes /= 1024.0
	i += 1

	return f"{size_bytes:.1f} {size_names[i]}"

	def safe_divide(a: float, b: float, default: float = 0.0) -> float:
	"""Safe division with default value"""
	try:
	return a / b if b != 0 else default
	except (TypeError, ZeroDivisionError):
	return default

	def merge_dicts(dict1: Dict, dict2: Dict) -> Dict:
	"""Merge two dictionaries recursively"""
	result = dict1.copy()

	for key, value in dict2.items():
	if key in result and isinstance(result[key], dict) and isinstance(value, dict):
	result[key] = merge_dicts(result[key], value)
	else:
	result[key] = value

	return result

	def validate_content_length(text: str, min_length: int = 50, max_length: int = 15000) -> Dict[str, Any]:
	"""Validate content length"""
	length = len(text.strip())

	return {
	'valid': min_length <= length <= max_length,
	'length': length,
	'min_required': min_length,
	'max_allowed': max_length,
	'message': f"Content length: {length} characters"
	}

	def extract_urls(text: str) -> List[str]:
	"""Extract URLs from text"""
	url_pattern = r'http[s]?://(?:[a-zA-Z]\|[0-9]\|[$-_@.&+]\|[!*\$\$,]\|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
	return re.findall(url_pattern, text)

	def clean_filename(filename: str) -> str:
	"""Clean filename for safe file system usage"""
	# Remove or replace invalid characters
	filename = re.sub(r'[<>:"/\\\|?*]', '_', filename)
	filename = filename.strip('. ')

	# Limit length
	if len(filename) > 100:
	name, ext = os.path.splitext(filename)
	filename = name[:100-len(ext)] + ext

	return filename or 'untitled'

	def log_performance(func):
	"""Decorator to log function performance"""
	def wrapper(args, *kwargs):
	start_time = datetime.now()
	try:
	result = func(args, *kwargs)
	end_time = datetime.now()
	duration = (end_time - start_time).total_seconds()
	logger.info(f"{func.__name__} completed in {duration:.2f} seconds")
	return result
	except Exception as e:
	end_time = datetime.now()
	duration = (end_time - start_time).total_seconds()
	logger.error(f"{func.__name__} failed after {duration:.2f} seconds: {e}")
	raise

	return wrapper

	def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 100) -> List[str]:
	"""Split text into overlapping chunks"""
	if len(text) <= chunk_size:
	return [text]

	chunks = []
	start = 0

	while start < len(text):
	end = start + chunk_size

	if end >= len(text):
	chunks.append(text[start:])
	break

	# Try to break at a sentence or word boundary
	chunk = text[start:end]

	# Look for sentence boundary
	last_sentence = chunk.rfind('.')
	if last_sentence > chunk_size // 2:
	chunk = chunk[:last_sentence + 1]
	else:
	# Look for word boundary
	last_space = chunk.rfind(' ')
	if last_space > chunk_size // 2:
	chunk = chunk[:last_space]

	chunks.append(chunk)
	start += len(chunk) - overlap

	return chunks