Spaces:
Running
Running
File size: 3,694 Bytes
84c328d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
"""NLP service for extracting task attributes from natural language.
[Task]: T029
[From]: specs/007-intermediate-todo-features/tasks.md (User Story 2)
This service provides:
- Tag extraction from natural language ("tagged with X", "add tag Y")
- Priority detection patterns
- Due date parsing patterns
"""
from typing import List, Optional
import re
def extract_tags(text: str) -> List[str]:
"""Extract tags from natural language input.
[Task]: T029, T031 - Tag extraction from natural language
Supports patterns:
- "tagged with X", "tags X", "tag X"
- "add tag X", "with tag X"
- "labeled X"
- Hashtags: "#tagname"
Args:
text: Natural language input text
Returns:
List of extracted tag names (lowercased, deduplicated)
Examples:
>>> extract_tags("Add task tagged with work and urgent")
['work', 'urgent']
>>> extract_tags("Buy groceries #shopping #home")
['shopping', 'home']
>>> extract_tags("Create task with label review")
['review']
"""
if not text:
return []
tags = set()
text_lower = text.lower()
# Pattern 1: Hashtag extraction
hashtag_pattern = r'#(\w+)'
hashtags = re.findall(hashtag_pattern, text)
tags.update(hashtags)
# Pattern 2: "tagged with X and Y" or "tags X, Y"
tagged_with_pattern = r'(?:tagged|tags?|labeled?)\s+(?:with\s+)?(?:[,\s]+)?(\w+(?:\s+(?:and|,)\s+\w+)*)'
matches = re.findall(tagged_with_pattern, text_lower)
for match in matches:
# Split by common separators
parts = re.split(r'\s+(?:and|,)\s+', match)
tags.update(parts)
# Pattern 3: "add tag X" or "with tag X"
add_tag_pattern = r'(?:add|with|has)\s+tag\s+(\w+)'
matches = re.findall(add_tag_pattern, text_lower)
tags.update(matches)
# Pattern 4: "label X"
label_pattern = r'(?:label|categorize|file\s*(?:under)?)(?:ed|s+as)?\s+(\w+)'
matches = re.findall(label_pattern, text_lower)
tags.update(matches)
# Filter out common non-tag words
excluded_words = {
'a', 'an', 'the', 'with', 'for', 'and', 'or', 'but', 'not',
'this', 'that', 'to', 'of', 'in', 'on', 'at', 'by', 'as', 'is',
'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may',
'might', 'must', 'can', 'need', 'want', 'like', 'such'
}
filtered_tags = [tag for tag in tags if tag not in excluded_words and len(tag) > 1]
return sorted(list(filtered_tags))
def normalize_tag_name(tag: str) -> str:
"""Normalize tag name for consistency.
Args:
tag: Raw tag name from user input
Returns:
Normalized tag name (lowercase, trimmed, no special chars)
"""
# Remove special characters except hyphens and underscores
normalized = re.sub(r'[^\w\s-]', '', tag)
# Convert to lowercase and trim
normalized = normalized.lower().strip()
# Replace spaces with hyphens for multi-word tags
normalized = re.sub(r'\s+', '-', normalized)
return normalized
def extract_tags_from_task_data(
title: str,
description: Optional[str] = None
) -> List[str]:
"""Extract tags from task title and description.
Convenience function that extracts tags from both title and description.
Args:
title: Task title
description: Optional task description
Returns:
List of extracted and normalized tag names
"""
text = title
if description:
text = f"{title} {description}"
raw_tags = extract_tags(text)
# Normalize each tag
return [normalize_tag_name(tag) for tag in raw_tags]
|