from langchain_core.tools import tool import re from datetime import datetime, timedelta import json @tool def extract_numbers(text: str) -> str: """ Extracts all numbers from a text string and returns them as a comma-separated list. Useful for parsing numerical data from search results or documents. Args: text (str): The text to extract numbers from Returns: str: Comma-separated list of numbers found in the text """ if not text: return "" # Find all numbers (integers and floats) numbers = re.findall(r'-?\d+\.?\d*', text) return ', '.join(numbers) if numbers else "" @tool def count_words(text: str) -> int: """ Counts the number of words in a text string. Args: text (str): The text to count words in Returns: int: Number of words in the text """ if not text: return 0 # Split by whitespace and filter out empty strings words = [word for word in text.split() if word.strip()] return len(words) @tool def extract_dates(text: str) -> str: """ Extracts date patterns from text and returns them in a standardized format. Supports various date formats including YYYY, YYYY-MM-DD, MM/DD/YYYY, etc. Args: text (str): The text to extract dates from Returns: str: Comma-separated list of dates found in the text """ if not text: return "" date_patterns = [ r'\b\d{4}-\d{2}-\d{2}\b', # YYYY-MM-DD r'\b\d{2}/\d{2}/\d{4}\b', # MM/DD/YYYY r'\b\d{1,2}/\d{1,2}/\d{2,4}\b', # M/D/YY or MM/DD/YYYY r'\b\d{4}\b', # Just years r'\b(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', # Month DD, YYYY ] dates = [] for pattern in date_patterns: matches = re.findall(pattern, text, re.IGNORECASE) dates.extend(matches) return ', '.join(dates) if dates else "" @tool def clean_text(text: str) -> str: """ Cleans text by removing extra whitespace, special characters, and normalizing format. Useful for processing scraped or extracted text. Args: text (str): The text to clean Returns: str: Cleaned text """ if not text: return "" # Remove extra whitespace cleaned = re.sub(r'\s+', ' ', text) # Remove leading/trailing whitespace cleaned = cleaned.strip() # Remove common unwanted characters but keep basic punctuation cleaned = re.sub(r'[^\w\s.,!?()-]', '', cleaned) return cleaned