Spaces:
Sleeping
Sleeping
| import re | |
| from typing import Dict, List, Any | |
| # Import keywords from separate files | |
| from utils.genres_data import GENRES_KEYWORDS | |
| from utils.moods_data import MOOD_KEYWORDS | |
| def parse_user_query(query: str) -> Dict[str, Any]: | |
| """ | |
| Parses a natural language user query to extract structured tags | |
| like genres, moods, target audience, era, decade, specific person, and media type preference. | |
| Args: | |
| query (str): The user's input query string. | |
| Returns: | |
| Dict[str, Any]: A dictionary containing extracted tags. | |
| Example: { | |
| "genres": ["sci-fi", "thriller"], | |
| "mood": ["suspenseful", "dark"], | |
| "target_audience": "adult", # or "children", "young_adult" | |
| "era": "modern", # or "classic", "contemporary" | |
| "decade": "90s", # e.g., "1990s" -> "90s" | |
| "specific_person": "Christopher Nolan", # author or director | |
| "media_type_preference": "book" # or "movie", or None | |
| } | |
| """ | |
| query_lower = query.lower() | |
| parsed_tags: Dict[str, Any] = { | |
| "genres": [], | |
| "mood": [], | |
| "target_audience": None, | |
| "era": None, | |
| "decade": None, | |
| "specific_person": None, | |
| "media_type_preference": None, | |
| "raw_query": query # Keep original query for debugging/explanation | |
| } | |
| # --- Media Type Preference (strong indicator) --- | |
| if re.search(r'\b(movie|film|picture|flick)s?\b', query_lower): | |
| parsed_tags["media_type_preference"] = "movie" | |
| if re.search(r'\b(book|novel|read|story)s?\b', query_lower): | |
| parsed_tags["media_type_preference"] = "book" | |
| # --- Genres --- | |
| for genre, keywords in GENRES_KEYWORDS.items(): | |
| if any(re.search(r'\b' + re.escape(k) + r'\b', query_lower) for k in keywords): | |
| parsed_tags["genres"].append(genre) | |
| # Remove duplicates and normalize genres (e.g., 'young adult' as genre can be 'target_audience') | |
| parsed_tags["genres"] = list(set(parsed_tags["genres"])) | |
| # --- Moods / Tone --- | |
| for mood, keywords in MOOD_KEYWORDS.items(): | |
| if any(re.search(r'\b' + re.escape(k) + r'\b', query_lower) for k in keywords): | |
| if mood not in parsed_tags["mood"]: | |
| parsed_tags["mood"].append(mood) | |
| parsed_tags["mood"] = list(set(parsed_tags["mood"])) | |
| # --- Target Audience --- | |
| if re.search(r'\b(children|kid|kids|child(?:ren\'s)?|younger audiences?|juvenile)\b', query_lower): | |
| parsed_tags["target_audience"] = "children" | |
| if "children" in parsed_tags["genres"]: parsed_tags["genres"].remove("children") | |
| elif re.search(r'\b(young adult|teen|teens|ya|adolescent)\b', query_lower): | |
| parsed_tags["target_audience"] = "young_adult" | |
| if "young adult" in parsed_tags["genres"]: parsed_tags["genres"].remove("young adult") | |
| elif re.search(r'\b(adult|mature|grown-up|general audiences?)\b', query_lower): | |
| parsed_tags["target_audience"] = "adult" | |
| if "adult" in parsed_tags["genres"]: parsed_tags["genres"].remove("adult") | |
| # --- Era --- | |
| if re.search(r'\b(classic|classical|old|vintage|timeless)\b', query_lower): | |
| parsed_tags["era"] = "classic" | |
| elif re.search(r'\b(contemporary|modern|recent|present-day|current)\b', query_lower): | |
| parsed_tags["era"] = "contemporary" | |
| elif re.search(r'\b(historical|period|past|ancient|medieval|victorian|retro)\b', query_lower): | |
| parsed_tags["era"] = "historical" | |
| elif re.search(r'\b(future|futuristic)\b', query_lower): | |
| parsed_tags["era"] = "future" | |
| # --- Decade --- | |
| decade_match = re.search(r'(\d{2}s|(\d{4})s)\b', query_lower) | |
| if decade_match: | |
| decade_str = decade_match.group(1) | |
| if len(decade_str) == 3: # e.g., '90s' | |
| if decade_str.startswith('0'): | |
| parsed_tags["decade"] = "2000s" | |
| elif decade_str.startswith('10'): | |
| parsed_tags["decade"] = "2010s" | |
| elif decade_str.startswith('20'): | |
| parsed_tags["decade"] = "2020s" | |
| else: | |
| parsed_tags["decade"] = f"19{decade_str}" | |
| elif len(decade_str) == 5: # e.g., '1990s' | |
| parsed_tags["decade"] = decade_str | |
| # Explicitly check for "current decade" | |
| if re.search(r'\b(current|recent) decade\b', query_lower) or re.search(r'\b2020s\b', query_lower): | |
| parsed_tags["decade"] = "2020s" | |
| # --- Specific Person (Author/Director/Actor) --- | |
| person_patterns = [ | |
| r'\bby\s+([a-zA-Z\s\.]+)\b', | |
| r'\b(?:directed\s+by|director)\s+([a-zA-Z\s\.]+)\b', | |
| r'\b(?:written\s+by|author)\s+([a-zA-Z\s\.]+)\b', | |
| r'\b(?:starring|featuring|with)\s+([a-zA-Z\s\.]+)\b', | |
| r'\b(?:from|like)\s+([a-zA-Z\s\.]+)s?\b' | |
| ] | |
| for pattern in person_patterns: | |
| person_match = re.search(pattern, query_lower) | |
| if person_match: | |
| person_name = person_match.group(1).strip() | |
| parsed_tags["specific_person"] = ' '.join([n.capitalize() for n in person_name.split()]) | |
| break | |
| # Clean up genres: remove duplicates and ensure audience isn't duplicated | |
| parsed_tags["genres"] = list(set(parsed_tags["genres"])) | |
| if parsed_tags["target_audience"] == "young_adult" and "young adult" in parsed_tags["genres"]: | |
| parsed_tags["genres"].remove("young adult") | |
| if parsed_tags["target_audience"] == "children" and "children" in parsed_tags["genres"]: | |
| parsed_tags["genres"].remove("children") | |
| if parsed_tags["target_audience"] == "adult" and "adult" in parsed_tags["genres"]: | |
| parsed_tags["genres"].remove("adult") | |
| return parsed_tags | |
| if __name__ == '__main__': | |
| # Test cases for demonstration | |
| queries = [ | |
| "I want a heartwarming drama movie for young adults from the 90s.", | |
| "Recommend a thrilling sci-fi book by Isaac Asimov.", | |
| "A dark mystery by Agatha Christie.", | |
| "Show me action films for kids under 10.", | |
| "I need a romantic comedy released in the 2000s.", | |
| "Any classic historical fiction?", | |
| "looking for something uplifting for ages 18+", | |
| "A book about adventure for children.", | |
| "A suspenseful thriller for adults.", | |
| "A historical drama set in the 1800s.", | |
| "A funny animation from the 80s.", | |
| "A contemporary romance novel.", | |
| "A classic sci-fi movie directed by Stanley Kubrick.", | |
| "I want a thriller by Stephen King.", | |
| "A Japanese film like Akira Kurosawa's.", | |
| "I'm feeling sad, recommend a melancholic movie.", | |
| "Give me an exciting thriller movie.", | |
| "I'm in the mood for something lighthearted.", | |
| "Looking for a really dark and grim book.", | |
| "Need something joyful to watch.", | |
| "I need an uplifting and inspiring film.", | |
| "Show me a truly gloomy and depressing story.", | |
| "Find me a film that's both chaotic and funny.", | |
| "I want something thought-provoking and deep.", | |
| "Looking for a movie that's really tense and nerve-wracking.", | |
| "Something wistful and nostalgic.", | |
| "I'm feeling angry, show me something intense and violent.", | |
| "Recommend a bizarre and absurd book.", | |
| "A beautiful and poignant love story.", | |
| "I need a really witty comedy.", | |
| "Something raw and gritty.", | |
| "A grand, sweeping epic.", | |
| "Something that brings tears to my eyes.", | |
| "Find me a slow-paced, meditative film.", | |
| "A mind-bending psychological thriller." | |
| ] | |
| for q in queries: | |
| parsed = parse_user_query(q) | |
| print(f"Query: '{q}'") | |
| print(f"Parsed: {parsed}\n") |