Spaces:

AlBaraa63
/

MissionControlMCP

Sleeping

App Files Files Community

MissionControlMCP / tools /text_extractor.py

AlBaraa63

Upload 33 files

f1b19d3 verified about 2 months ago

raw

history blame contribute delete

3.62 kB

	"""
	Text Extractor Tool - Clean, summarize, and process text
	"""
	import logging
	from typing import Dict, Any
	import sys
	import os

	# Add parent directory to path for imports
	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from utils.helpers import clean_text, chunk_text, summarize_text, extract_keywords

	logger = logging.getLogger(__name__)


	def extract_text(text: str, operation: str = "clean", max_length: int = 500) -> Dict[str, Any]:
	"""
	Process text based on the specified operation.

	Args:
	text: Raw text to process
	operation: Operation to perform - 'clean', 'summarize', 'chunk', or 'keywords'
	max_length: Maximum length for summary operations

	Returns:
	Dictionary containing processed text and metadata
	"""
	try:
	if not text or not text.strip():
	raise ValueError("Input text is empty")

	result = ""
	metadata = {}

	if operation == "clean":
	result = clean_text(text)
	metadata = {
	"operation": "clean",
	"original_length": len(text),
	"cleaned_length": len(result)
	}

	elif operation == "summarize":
	result = summarize_text(text, max_length)
	metadata = {
	"operation": "summarize",
	"original_length": len(text),
	"summary_length": len(result),
	"compression_ratio": round(len(result) / len(text), 2) if len(text) > 0 else 0
	}

	elif operation == "chunk":
	chunks = chunk_text(text, chunk_size=max_length, overlap=50)
	result = "\n\n---CHUNK---\n\n".join(chunks)
	metadata = {
	"operation": "chunk",
	"total_chunks": len(chunks),
	"chunk_size": max_length
	}

	elif operation == "keywords":
	keywords = extract_keywords(text, top_n=10)
	result = ", ".join(keywords)
	metadata = {
	"operation": "keywords",
	"keyword_count": len(keywords),
	"keywords": keywords
	}

	else:
	raise ValueError(f"Unknown operation: {operation}. Use 'clean', 'summarize', 'chunk', or 'keywords'")

	# Calculate word count
	word_count = len(result.split())

	return {
	"result": result,
	"word_count": word_count,
	"metadata": metadata
	}

	except Exception as e:
	logger.error(f"Error extracting text: {e}")
	raise


	def process_multiple_texts(texts: list, operation: str = "clean") -> list:
	"""
	Process multiple texts with the same operation.

	Args:
	texts: List of text strings to process
	operation: Operation to apply to all texts

	Returns:
	List of results for each text
	"""
	results = []
	for idx, text in enumerate(texts):
	try:
	result = extract_text(text, operation)
	result["index"] = idx
	results.append(result)
	except Exception as e:
	logger.error(f"Error processing text at index {idx}: {e}")
	results.append({
	"index": idx,
	"error": str(e),
	"result": "",
	"word_count": 0
	})

	return results