Spaces:

Agents-MCP-Hackathon
/

Smart-Auto-Complete

Sleeping

Sandipan Haldar

feat: Replace general context with LinkedIn-specific context

770544d 6 months ago

8.54 kB

	"""
	Utility functions for Smart Auto-Complete
	Provides common functionality for text processing, logging, and validation
	"""

	import html
	import logging
	import re
	import sys
	import unicodedata
	from typing import Dict, List, Optional, Tuple


	def setup_logging(level: str = "INFO") -> logging.Logger:
	"""
	Set up logging configuration for the application

	Args:
	level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)

	Returns:
	Configured logger instance
	"""
	# Create logger
	logger = logging.getLogger("smart_autocomplete")
	logger.setLevel(getattr(logging, level.upper()))

	# Remove existing handlers to avoid duplicates
	for handler in logger.handlers[:]:
	logger.removeHandler(handler)

	# Create console handler with formatting
	console_handler = logging.StreamHandler(sys.stdout)
	console_handler.setLevel(getattr(logging, level.upper()))

	# Create formatter
	formatter = logging.Formatter(
	"%(asctime)s - %(name)s - %(levelname)s - %(message)s",
	datefmt="%Y-%m-%d %H:%M:%S",
	)
	console_handler.setFormatter(formatter)

	# Add handler to logger
	logger.addHandler(console_handler)

	return logger


	def sanitize_input(text: str) -> str:
	"""
	Sanitize and clean input text for processing

	Args:
	text: Raw input text

	Returns:
	Cleaned and sanitized text
	"""
	if not text:
	return ""

	# Convert to string if not already
	text = str(text)

	# HTML escape to prevent injection
	text = html.escape(text)

	# Normalize unicode characters
	text = unicodedata.normalize("NFKC", text)

	# Remove excessive whitespace but preserve structure
	text = re.sub(r"\n\s\n\s\n", "\n\n", text) # Max 2 consecutive newlines
	text = re.sub(r"[ \t]+", " ", text) # Multiple spaces/tabs to single space

	# Remove control characters except newlines and tabs
	text = "".join(char for char in text if ord(char) >= 32 or char in "\n\t")

	# Trim leading/trailing whitespace
	text = text.strip()

	return text


	def extract_context_hints(text: str) -> Dict[str, any]:
	"""
	Extract contextual hints from the input text to improve suggestions

	Args:
	text: Input text to analyze

	Returns:
	Dictionary containing context hints
	"""
	hints = {
	"length": len(text),
	"word_count": len(text.split()),
	"has_greeting": False,
	"has_signature": False,
	"has_code_markers": False,
	"has_questions": False,
	"tone": "neutral",
	"language_style": "linkedin",
	}

	text_lower = text.lower()

	# Check for email patterns
	email_greetings = [
	"dear",
	"hello",
	"hi",
	"greetings",
	"good morning",
	"good afternoon",
	]
	email_signatures = [
	"sincerely",
	"best regards",
	"thank you",
	"yours truly",
	"kind regards",
	]

	hints["has_greeting"] = any(greeting in text_lower for greeting in email_greetings)
	hints["has_signature"] = any(
	signature in text_lower for signature in email_signatures
	)

	# Check for code patterns
	code_markers = [
	"//",
	"/*",
	"*/",
	"#",
	"def ",
	"function",
	"class ",
	"import ",
	"from ",
	]
	hints["has_code_markers"] = any(marker in text_lower for marker in code_markers)

	# Check for questions
	hints["has_questions"] = "?" in text or any(
	q in text_lower for q in ["what", "how", "why", "when", "where", "who"]
	)

	# Determine tone
	formal_words = ["please", "kindly", "respectfully", "sincerely", "professional"]
	casual_words = ["hey", "yeah", "cool", "awesome", "thanks"]

	formal_count = sum(1 for word in formal_words if word in text_lower)
	casual_count = sum(1 for word in casual_words if word in text_lower)

	if formal_count > casual_count:
	hints["tone"] = "formal"
	elif casual_count > formal_count:
	hints["tone"] = "casual"

	# Determine language style
	if hints["has_code_markers"]:
	hints["language_style"] = "technical"
	elif hints["has_greeting"] or hints["has_signature"]:
	hints["language_style"] = "business"
	elif any(
	creative in text_lower
	for creative in ["once upon", "story", "character", "plot"]
	):
	hints["language_style"] = "creative"

	return hints


	def validate_api_key(api_key: str, provider: str) -> bool:
	"""
	Validate API key format for different providers

	Args:
	api_key: The API key to validate
	provider: The provider name (openai, anthropic)

	Returns:
	True if the key format is valid, False otherwise
	"""
	if not api_key or not isinstance(api_key, str):
	return False

	api_key = api_key.strip()

	if provider.lower() == "openai":
	# OpenAI keys start with 'sk-' and are typically 51 characters
	return api_key.startswith("sk-") and len(api_key) >= 40
	elif provider.lower() == "anthropic":
	# Anthropic keys start with 'sk-ant-'
	return api_key.startswith("sk-ant-") and len(api_key) >= 40

	return False


	def truncate_text(text: str, max_length: int, preserve_words: bool = True) -> str:
	"""
	Truncate text to a maximum length while optionally preserving word boundaries

	Args:
	text: Text to truncate
	max_length: Maximum allowed length
	preserve_words: Whether to preserve word boundaries

	Returns:
	Truncated text
	"""
	if len(text) <= max_length:
	return text

	if not preserve_words:
	return text[:max_length].rstrip() + "..."

	# Find the last space before the max_length
	truncated = text[:max_length]
	last_space = truncated.rfind(" ")

	if last_space > max_length * 0.8: # Only use word boundary if it's not too far back
	return text[:last_space].rstrip() + "..."
	else:
	return text[:max_length].rstrip() + "..."


	def format_suggestions_for_display(
	suggestions: List[str], max_display_length: int = 100
	) -> List[Dict[str, str]]:
	"""
	Format suggestions for display in the UI

	Args:
	suggestions: List of suggestion strings
	max_display_length: Maximum length for display

	Returns:
	List of formatted suggestion dictionaries
	"""
	formatted = []

	for i, suggestion in enumerate(suggestions, 1):
	# Clean the suggestion
	clean_suggestion = sanitize_input(suggestion)

	# Create display version (truncated if needed)
	display_text = truncate_text(clean_suggestion, max_display_length)

	formatted.append(
	{
	"id": i,
	"text": clean_suggestion,
	"display_text": display_text,
	"length": len(clean_suggestion),
	"word_count": len(clean_suggestion.split()),
	}
	)

	return formatted


	def calculate_text_similarity(text1: str, text2: str) -> float:
	"""
	Calculate similarity between two texts using simple word overlap

	Args:
	text1: First text
	text2: Second text

	Returns:
	Similarity score between 0 and 1
	"""
	if not text1 or not text2:
	return 0.0

	# Convert to lowercase and split into words
	words1 = set(text1.lower().split())
	words2 = set(text2.lower().split())

	# Calculate Jaccard similarity
	intersection = len(words1.intersection(words2))
	union = len(words1.union(words2))

	return intersection / union if union > 0 else 0.0


	def get_text_stats(text: str) -> Dict[str, int]:
	"""
	Get basic statistics about the text

	Args:
	text: Text to analyze

	Returns:
	Dictionary with text statistics
	"""
	if not text:
	return {"characters": 0, "words": 0, "sentences": 0, "paragraphs": 0}

	# Count characters (excluding whitespace)
	char_count = len(text.replace(" ", "").replace("\n", "").replace("\t", ""))

	# Count words
	word_count = len(text.split())

	# Count sentences (rough estimate)
	sentence_count = len(re.findall(r"[.!?]+", text))

	# Count paragraphs
	paragraph_count = len([p for p in text.split("\n\n") if p.strip()])

	return {
	"characters": char_count,
	"words": word_count,
	"sentences": max(1, sentence_count), # At least 1 sentence
	"paragraphs": max(1, paragraph_count), # At least 1 paragraph
	}