File size: 2,892 Bytes
7a479d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from queue import Queue

class UniqueQueue:
    def __init__(self, topics_with_synonyms=None):
        self._queue = Queue()
        self._seen = set()
        self._canonical_mapping = {}
        if topics_with_synonyms:
            for main_topic, synonyms in topics_with_synonyms:
                self.enqueue(main_topic, synonyms)

    def _get_canonical(self, item):
        return self._canonical_mapping.get(item.lower(), item.lower())

    def enqueue(self, item, synonyms=[]):
        print("Original item:", item)
        canonical_item = self._get_canonical(item.lower())
        print("Canonical item:", canonical_item)
        if canonical_item not in self._seen:
            self._queue.put((canonical_item, synonyms))
            self._seen.add(canonical_item)
            self._canonical_mapping[item.lower()] = canonical_item
            for synonym in synonyms:
                self._seen.add(synonym.lower())  # Added this line
                self._canonical_mapping[synonym.lower()] = canonical_item

    def get_canonical(self, word_or_phrase):
        # Check if the word_or_phrase is a synonym
        for canonical in self.all_canonicals():
            if word_or_phrase in self.synonyms_for_canonical(canonical):
                return self.canonical_for_synonym(word_or_phrase)

        # Check if the word_or_phrase is a canonical term
        if word_or_phrase in self.all_canonicals():
            return word_or_phrase

        # If not found, return None or raise an exception
        return None

    def canonical_with_synonyms(self, canonical, synonyms):
        return canonical + ", " + ", ".join(synonyms)
    
    def synonyms_for_canonical(self, canonical):
        canonical_term = self._get_canonical(canonical.lower())
        return [key for key, value in self._canonical_mapping.items() if value == canonical_term and key != canonical_term]

    def dequeue(self):
        return self._queue.get()
    
    def canonical_for_synonym(self, synonym):
        return self._canonical_mapping.get(synonym.lower())
    
    def all_words(self):
        unique_words = set(self._canonical_mapping.keys())
        for canonical_term in self._canonical_mapping.values():
            unique_words.add(canonical_term)
        return list(unique_words)
    
    def all_canonicals(self):
        return list(set(self._canonical_mapping.values()))
    
    def all_synonyms(self):
        return [key for key, value in self._canonical_mapping.items() if key != value]

    def is_empty(self):
        return self._queue.empty()
    
    def review(self):
        return list(self._queue.queue)
    
    def write_topics_to_file(unique_queue, file_path):
        with open(file_path, 'w') as file:
            for canonical, synonyms in unique_queue.review():
                line = canonical + ', ' + ', '.join(synonyms)
                file.write(line + '\n')