Spaces:
Build error
Build error
File size: 2,892 Bytes
7a479d7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | from queue import Queue
class UniqueQueue:
def __init__(self, topics_with_synonyms=None):
self._queue = Queue()
self._seen = set()
self._canonical_mapping = {}
if topics_with_synonyms:
for main_topic, synonyms in topics_with_synonyms:
self.enqueue(main_topic, synonyms)
def _get_canonical(self, item):
return self._canonical_mapping.get(item.lower(), item.lower())
def enqueue(self, item, synonyms=[]):
print("Original item:", item)
canonical_item = self._get_canonical(item.lower())
print("Canonical item:", canonical_item)
if canonical_item not in self._seen:
self._queue.put((canonical_item, synonyms))
self._seen.add(canonical_item)
self._canonical_mapping[item.lower()] = canonical_item
for synonym in synonyms:
self._seen.add(synonym.lower()) # Added this line
self._canonical_mapping[synonym.lower()] = canonical_item
def get_canonical(self, word_or_phrase):
# Check if the word_or_phrase is a synonym
for canonical in self.all_canonicals():
if word_or_phrase in self.synonyms_for_canonical(canonical):
return self.canonical_for_synonym(word_or_phrase)
# Check if the word_or_phrase is a canonical term
if word_or_phrase in self.all_canonicals():
return word_or_phrase
# If not found, return None or raise an exception
return None
def canonical_with_synonyms(self, canonical, synonyms):
return canonical + ", " + ", ".join(synonyms)
def synonyms_for_canonical(self, canonical):
canonical_term = self._get_canonical(canonical.lower())
return [key for key, value in self._canonical_mapping.items() if value == canonical_term and key != canonical_term]
def dequeue(self):
return self._queue.get()
def canonical_for_synonym(self, synonym):
return self._canonical_mapping.get(synonym.lower())
def all_words(self):
unique_words = set(self._canonical_mapping.keys())
for canonical_term in self._canonical_mapping.values():
unique_words.add(canonical_term)
return list(unique_words)
def all_canonicals(self):
return list(set(self._canonical_mapping.values()))
def all_synonyms(self):
return [key for key, value in self._canonical_mapping.items() if key != value]
def is_empty(self):
return self._queue.empty()
def review(self):
return list(self._queue.queue)
def write_topics_to_file(unique_queue, file_path):
with open(file_path, 'w') as file:
for canonical, synonyms in unique_queue.review():
line = canonical + ', ' + ', '.join(synonyms)
file.write(line + '\n') |