|
|
""" |
|
|
Filtering logic for sentence selection based on topics and creators. |
|
|
""" |
|
|
|
|
|
from typing import Any, Dict, List, Set |
|
|
|
|
|
|
|
|
from .config import sentences, works, creators, topics, topic_names |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_filtered_sentence_ids( |
|
|
filter_topics: List[str] = None, filter_creators: List[str] = None |
|
|
) -> Set[str]: |
|
|
""" |
|
|
Get the set of sentence IDs that match the given filters. |
|
|
""" |
|
|
|
|
|
valid_sentence_ids = set(sentences.keys()) |
|
|
|
|
|
|
|
|
if not filter_topics and not filter_creators: |
|
|
return valid_sentence_ids |
|
|
|
|
|
|
|
|
valid_work_ids = set() |
|
|
|
|
|
|
|
|
if filter_topics: |
|
|
|
|
|
|
|
|
for topic_id in filter_topics: |
|
|
if topic_id in topics: |
|
|
|
|
|
valid_work_ids.update(topics[topic_id]) |
|
|
else: |
|
|
|
|
|
valid_work_ids = set(works.keys()) |
|
|
|
|
|
|
|
|
if filter_creators: |
|
|
|
|
|
creator_work_ids = set() |
|
|
for creator_name in filter_creators: |
|
|
if creator_name in creators: |
|
|
|
|
|
creator_work_ids.update(creators[creator_name]) |
|
|
|
|
|
|
|
|
if filter_topics: |
|
|
valid_work_ids = valid_work_ids.intersection(creator_work_ids) |
|
|
else: |
|
|
valid_work_ids = creator_work_ids |
|
|
|
|
|
|
|
|
filtered_sentence_ids = set() |
|
|
for sentence_id in valid_sentence_ids: |
|
|
|
|
|
work_id = sentence_id.split("_")[0] |
|
|
if work_id in valid_work_ids: |
|
|
filtered_sentence_ids.add(sentence_id) |
|
|
|
|
|
return filtered_sentence_ids |
|
|
|
|
|
|
|
|
def apply_filters_to_results( |
|
|
results: List[Dict[str, Any]], |
|
|
filter_topics: List[str] = None, |
|
|
filter_creators: List[str] = None, |
|
|
) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Filter a list of results based on topics and creators. |
|
|
|
|
|
Args: |
|
|
results: List of result dictionaries with 'id' field |
|
|
filter_topics: List of topic codes to filter by |
|
|
filter_creators: List of creator names to filter by |
|
|
|
|
|
Returns: |
|
|
Filtered list of results |
|
|
""" |
|
|
if not filter_topics and not filter_creators: |
|
|
return results |
|
|
|
|
|
valid_sentence_ids = get_filtered_sentence_ids(filter_topics, filter_creators) |
|
|
|
|
|
|
|
|
filtered_results = [ |
|
|
result for result in results if result.get("id") in valid_sentence_ids |
|
|
] |
|
|
|
|
|
|
|
|
for i, result in enumerate(filtered_results, 1): |
|
|
result["rank"] = i |
|
|
|
|
|
return filtered_results |
|
|
|