Spaces:

AlBaraa63
/

MissionControlMCP

Sleeping

App Files Files Community

MissionControlMCP / utils /helpers.py

AlBaraa63

Upload 33 files

f1b19d3 verified about 2 months ago

raw

history blame contribute delete

4.93 kB

	"""
	Helper utility functions
	"""
	import re
	import logging
	from typing import List, Dict, Any
	from datetime import datetime

	# Setup logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	def clean_text(text: str) -> str:
	"""
	Clean and normalize text by removing extra whitespace, special characters, etc.

	Args:
	text: Raw text to clean

	Returns:
	Cleaned text string
	"""
	# Remove extra whitespace
	text = re.sub(r'\s+', ' ', text)
	# Remove special characters but keep basic punctuation
	text = re.sub(r'[^\w\s.,!?;:\-\'\"()]', '', text)
	# Strip leading/trailing whitespace
	text = text.strip()
	return text


	def chunk_text(text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
	"""
	Split text into overlapping chunks for processing.

	Args:
	text: Text to chunk
	chunk_size: Size of each chunk in characters
	overlap: Overlap between chunks

	Returns:
	List of text chunks
	"""
	chunks = []
	start = 0
	text_length = len(text)

	while start < text_length:
	end = start + chunk_size
	chunk = text[start:end]
	chunks.append(chunk)
	start = end - overlap

	return chunks


	def summarize_text(text: str, max_length: int = 500) -> str:
	"""
	Create a simple extractive summary by taking the first sentences.

	Args:
	text: Text to summarize
	max_length: Maximum length of summary

	Returns:
	Summarized text
	"""
	sentences = re.split(r'[.!?]+', text)
	summary = ""

	for sentence in sentences:
	sentence = sentence.strip()
	if not sentence:
	continue
	if len(summary) + len(sentence) + 2 <= max_length: # +2 for ". "
	summary += sentence + ". "
	else:
	break

	# If no sentences fit, return truncated text
	if not summary and text:
	summary = text[:max_length].rsplit(' ', 1)[0] + "..."

	return summary.strip()


	def extract_keywords(text: str, top_n: int = 10) -> List[str]:
	"""
	Extract top keywords from text using simple frequency analysis.

	Args:
	text: Text to analyze
	top_n: Number of top keywords to return

	Returns:
	List of keywords
	"""
	# Simple word frequency approach
	words = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())

	# Remove common stop words
	stop_words = {'that', 'this', 'with', 'from', 'have', 'been', 'were',
	'will', 'would', 'could', 'should', 'about', 'their', 'there'}
	words = [w for w in words if w not in stop_words]

	# Count frequency
	word_freq: Dict[str, int] = {}
	for word in words:
	word_freq[word] = word_freq.get(word, 0) + 1

	# Sort by frequency and return top N
	sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
	return [word for word, freq in sorted_words[:top_n]]


	def validate_url(url: str) -> bool:
	"""
	Validate if a string is a proper URL.

	Args:
	url: URL string to validate

	Returns:
	True if valid URL, False otherwise
	"""
	url_pattern = re.compile(
	r'^https?://' # http:// or https://
	r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?\|' # domain...
	r'localhost\|' # localhost...
	r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
	r'(?::\d+)?' # optional port
	r'(?:/?\|[/?]\S+)$', re.IGNORECASE)
	return url_pattern.match(url) is not None


	def format_timestamp() -> str:
	"""
	Get current timestamp in ISO format.

	Returns:
	ISO formatted timestamp string
	"""
	return datetime.now().isoformat()


	def safe_divide(numerator: float, denominator: float, default: float = 0.0) -> float:
	"""
	Safely divide two numbers, returning default if denominator is zero.

	Args:
	numerator: Numerator value
	denominator: Denominator value
	default: Default value if division by zero

	Returns:
	Division result or default
	"""
	try:
	return numerator / denominator if denominator != 0 else default
	except (TypeError, ZeroDivisionError):
	return default


	def parse_json_safe(json_str: str) -> Dict[str, Any]:
	"""
	Safely parse JSON string with error handling.

	Args:
	json_str: JSON string to parse

	Returns:
	Parsed dictionary or empty dict on error
	"""
	import json
	try:
	return json.loads(json_str)
	except json.JSONDecodeError as e:
	logger.error(f"JSON parse error: {e}")
	return {}