#!/usr/bin/env python3 """Text Processing Plugin""" import re from typing import List class TextProcessor: """Clean and process text data.""" def clean_text(self, text: str) -> str: """Remove extra whitespace, special chars.""" text = re.sub(r'\s+', ' ', text) # Multiple spaces to one text = text.strip() return text def extract_emails(self, text: str) -> List[str]: """Extract email addresses from text.""" pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' return re.findall(pattern, text) def extract_urls(self, text: str) -> List[str]: """Extract URLs from text.""" pattern = r'https?://[^\s]+' return re.findall(pattern, text) def tokenize(self, text: str) -> List[str]: """Simple word tokenization.""" return text.lower().split()