Spaces:
Sleeping
Sleeping
| from langchain_core.tools import tool | |
| import re | |
| from datetime import datetime, timedelta | |
| import json | |
| def extract_numbers(text: str) -> str: | |
| """ | |
| Extracts all numbers from a text string and returns them as a comma-separated list. | |
| Useful for parsing numerical data from search results or documents. | |
| Args: | |
| text (str): The text to extract numbers from | |
| Returns: | |
| str: Comma-separated list of numbers found in the text | |
| """ | |
| if not text: | |
| return "" | |
| # Find all numbers (integers and floats) | |
| numbers = re.findall(r'-?\d+\.?\d*', text) | |
| return ', '.join(numbers) if numbers else "" | |
| def count_words(text: str) -> int: | |
| """ | |
| Counts the number of words in a text string. | |
| Args: | |
| text (str): The text to count words in | |
| Returns: | |
| int: Number of words in the text | |
| """ | |
| if not text: | |
| return 0 | |
| # Split by whitespace and filter out empty strings | |
| words = [word for word in text.split() if word.strip()] | |
| return len(words) | |
| def extract_dates(text: str) -> str: | |
| """ | |
| Extracts date patterns from text and returns them in a standardized format. | |
| Supports various date formats including YYYY, YYYY-MM-DD, MM/DD/YYYY, etc. | |
| Args: | |
| text (str): The text to extract dates from | |
| Returns: | |
| str: Comma-separated list of dates found in the text | |
| """ | |
| if not text: | |
| return "" | |
| date_patterns = [ | |
| r'\b\d{4}-\d{2}-\d{2}\b', # YYYY-MM-DD | |
| r'\b\d{2}/\d{2}/\d{4}\b', # MM/DD/YYYY | |
| r'\b\d{1,2}/\d{1,2}/\d{2,4}\b', # M/D/YY or MM/DD/YYYY | |
| r'\b\d{4}\b', # Just years | |
| r'\b(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', # Month DD, YYYY | |
| ] | |
| dates = [] | |
| for pattern in date_patterns: | |
| matches = re.findall(pattern, text, re.IGNORECASE) | |
| dates.extend(matches) | |
| return ', '.join(dates) if dates else "" | |
| def clean_text(text: str) -> str: | |
| """ | |
| Cleans text by removing extra whitespace, special characters, and normalizing format. | |
| Useful for processing scraped or extracted text. | |
| Args: | |
| text (str): The text to clean | |
| Returns: | |
| str: Cleaned text | |
| """ | |
| if not text: | |
| return "" | |
| # Remove extra whitespace | |
| cleaned = re.sub(r'\s+', ' ', text) | |
| # Remove leading/trailing whitespace | |
| cleaned = cleaned.strip() | |
| # Remove common unwanted characters but keep basic punctuation | |
| cleaned = re.sub(r'[^\w\s.,!?()-]', '', cleaned) | |
| return cleaned |