alessandro trinca tornidor commited on
Commit
88320db
·
1 Parent(s): a434f16

feat: improve the initial /thesaurus-custom endpoint

Browse files
my_ghost_writer/app.py CHANGED
@@ -24,7 +24,7 @@ from my_ghost_writer.constants import (ALLOWED_ORIGIN_LIST, API_MODE, DOMAIN, IS
24
  ME_CONFIG_MONGODB_HEALTHCHECK_SLEEP, ME_CONFIG_MONGODB_USE_OK, PORT, RAPIDAPI_HOST, STATIC_FOLDER,
25
  STATIC_FOLDER_LITEKOBOLDAINET, WORDSAPI_KEY, WORDSAPI_URL, app_logger)
26
  from my_ghost_writer.pymongo_utils import mongodb_health_check
27
- from my_ghost_writer.text_parsers2 import find_synonyms_for_phrase, custom_synonyms
28
  from my_ghost_writer.thesaurus import get_current_info_wordnet
29
  from my_ghost_writer.type_hints import (RequestQueryThesaurusInflatedBody, RequestQueryThesaurusWordsapiBody,
30
  RequestSplitText, RequestTextFrequencyBody, MultiWordSynonymResponse, CustomSynonymRequest)
@@ -295,17 +295,8 @@ async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
295
  async def add_custom_synonyms(body: CustomSynonymRequest):
296
  """Adds custom synonyms for a given word to the in-memory store."""
297
  try:
298
- word = body.word.lower() # Store words in lowercase
299
- synonyms = [syn.lower() for syn in body.synonyms] # all the synonyms too!
300
-
301
- if word in custom_synonyms:
302
- # Update the synonyms list (append new synonyms, avoid duplicates)
303
- custom_synonyms[word] = list(set(custom_synonyms[word] + synonyms))
304
- return {"message": f"Synonyms for '{body.word}' updated successfully (in-memory)."}
305
- else:
306
- # Insert a new entry
307
- custom_synonyms[word] = synonyms
308
- return {"message": f"Synonyms for '{body.word}' added successfully (in-memory)."}
309
 
310
  except Exception as e:
311
  app_logger.error(f"Error adding custom synonyms: {e}")
 
24
  ME_CONFIG_MONGODB_HEALTHCHECK_SLEEP, ME_CONFIG_MONGODB_USE_OK, PORT, RAPIDAPI_HOST, STATIC_FOLDER,
25
  STATIC_FOLDER_LITEKOBOLDAINET, WORDSAPI_KEY, WORDSAPI_URL, app_logger)
26
  from my_ghost_writer.pymongo_utils import mongodb_health_check
27
+ from my_ghost_writer.text_parsers2 import find_synonyms_for_phrase, custom_synonym_handler
28
  from my_ghost_writer.thesaurus import get_current_info_wordnet
29
  from my_ghost_writer.type_hints import (RequestQueryThesaurusInflatedBody, RequestQueryThesaurusWordsapiBody,
30
  RequestSplitText, RequestTextFrequencyBody, MultiWordSynonymResponse, CustomSynonymRequest)
 
295
  async def add_custom_synonyms(body: CustomSynonymRequest):
296
  """Adds custom synonyms for a given word to the in-memory store."""
297
  try:
298
+ custom_synonym_handler.add_entry(body.word, [r.model_dump() for r in body.related])
299
+ return {"message": f"Custom entry for '{body.word}' added/updated successfully (in-memory)."}
 
 
 
 
 
 
 
 
 
300
 
301
  except Exception as e:
302
  app_logger.error(f"Error adding custom synonyms: {e}")
my_ghost_writer/custom_synonym_handler.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+
3
+
4
+ class CustomSynonymHandler:
5
+ def __init__(self):
6
+ self.lexicon: dict[str, list[dict[str, Any]]] = {} # {word: {relation_type: [{word: related_word, definition: definition}]}}
7
+ self.inverted_index: dict[str, set[str]] = {} # For reverse lookups
8
+
9
+ def add_entry(self, word: str, related: list[dict[str, Any]]):
10
+ word = word.lower()
11
+ self.lexicon[word] = {}
12
+ for relation in related:
13
+ relation_type = relation["type"]
14
+ related_words = [{"word": w.lower(), "definition": relation.get("definition")} for w in relation["words"]]
15
+ self.lexicon[word][relation_type] = related_words
16
+ self._update_inverted_index(word, relation_type, related_words)
17
+
18
+ def get_related(self, word: str, relation_type: str) -> list[dict[str, Any]]:
19
+ word = word.lower()
20
+ if word in self.lexicon and relation_type in self.lexicon[word]:
21
+ return self.lexicon[word][relation_type]
22
+ return []
23
+
24
+ def reverse_lookup(self, related_word: str) -> set[str]:
25
+ related_word = related_word.lower()
26
+ if related_word in self.inverted_index:
27
+ return self.inverted_index[related_word]
28
+ return set()
29
+
30
+ def _update_inverted_index(self, word: str, relation_type: str, related_words: list[dict[str, Any]]):
31
+ """Updates the inverted index for reverse lookups."""
32
+ for related in related_words:
33
+ related_word = related["word"]
34
+ if related_word not in self.inverted_index:
35
+ self.inverted_index[related_word] = set()
36
+ self.inverted_index[related_word].add(word)
my_ghost_writer/text_parsers2.py CHANGED
@@ -9,10 +9,12 @@ from typing import Any, Optional
9
  from fastapi import HTTPException
10
 
11
  from my_ghost_writer.constants import SPACY_MODEL_NAME, app_logger, ELIGIBLE_POS
 
12
  from my_ghost_writer.type_hints import SynonymInfo, WordSynonymResult, ContextInfo, SynonymGroup
13
 
14
 
15
  custom_synonyms: dict[str, list[str]] = {}
 
16
  # Load spaCy model
17
  try:
18
  nlp = spacy.load(SPACY_MODEL_NAME)
@@ -187,36 +189,37 @@ def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[
187
  # 1. Custom Synonym Lookup and Preparation
188
  custom_synset = None # Initialize to None
189
  # 1. Direct Lookup: Check if the word is directly in custom_synonyms
190
- if word_lower in custom_synonyms:
191
- app_logger.info(f"found custom_synonyms:{custom_synonyms[word_lower]} by word:{word_lower}!")
192
- # 2. If custom synonyms exist, create the appropriate structure and return
193
- synonyms: list[dict[str, Any]] = [{"synonym": syn, "is_custom": True} for syn in custom_synonyms[word_lower]]
194
- if synonyms:
195
- # Create a dummy synset for the custom synonyms
 
196
  custom_synset = {
197
  'definition': 'User-defined synonym.',
198
  'examples': [],
199
- 'synonyms': synonyms
200
  }
201
  if pos_tag:
202
  custom_synset["pos"] = pos_tag
203
 
204
  # 2. Reverse Lookup: Check if the word is a *synonym* of any custom word
205
- for custom_word, synonym_list in custom_synonyms.items():
206
- if word_lower in synonym_list:
207
- app_logger.info(f"found reverse match: '{word_lower}' is a synonym of '{custom_word}'")
208
- # Found a reverse match!
209
- # Include the original custom_word in the synonym list
210
- synonyms = [{"synonym": custom_word, "is_custom": True}] # Start with the original word
211
- synonyms.extend([{"synonym": syn, "is_custom": True} for syn in custom_synonyms[custom_word]]) # Add the rest of the synonyms
212
-
213
- custom_synset = {
214
- 'definition': f'User-defined synonym (reverse match for "{word}").',
215
- 'examples': [],
216
- 'synonyms': synonyms
217
- }
218
- if pos_tag:
219
- custom_synset["pos"] = pos_tag
220
 
221
  # 3. WordNet Lookup
222
  try:
 
9
  from fastapi import HTTPException
10
 
11
  from my_ghost_writer.constants import SPACY_MODEL_NAME, app_logger, ELIGIBLE_POS
12
+ from my_ghost_writer.custom_synonym_handler import CustomSynonymHandler
13
  from my_ghost_writer.type_hints import SynonymInfo, WordSynonymResult, ContextInfo, SynonymGroup
14
 
15
 
16
  custom_synonyms: dict[str, list[str]] = {}
17
+ custom_synonym_handler = CustomSynonymHandler()
18
  # Load spaCy model
19
  try:
20
  nlp = spacy.load(SPACY_MODEL_NAME)
 
189
  # 1. Custom Synonym Lookup and Preparation
190
  custom_synset = None # Initialize to None
191
  # 1. Direct Lookup: Check if the word is directly in custom_synonyms
192
+ related_synonyms = custom_synonym_handler.get_related(word_lower, "synonym")
193
+ if related_synonyms:
194
+ app_logger.info(f"found custom_synonyms:{related_synonyms} by word:{word_lower}!")
195
+ synonyms_list: list[dict[str, Any]] = []
196
+ for related in related_synonyms:
197
+ synonyms_list.append({"synonym": related["word"], "is_custom": True, "definition": related.get("definition")})
198
+ if synonyms_list:
199
  custom_synset = {
200
  'definition': 'User-defined synonym.',
201
  'examples': [],
202
+ 'synonyms': synonyms_list
203
  }
204
  if pos_tag:
205
  custom_synset["pos"] = pos_tag
206
 
207
  # 2. Reverse Lookup: Check if the word is a *synonym* of any custom word
208
+ reverse_lookup_words = custom_synonym_handler.reverse_lookup(word_lower)
209
+
210
+ if reverse_lookup_words:
211
+ app_logger.info(f"found reverse match: '{word_lower}' is a synonym of '{reverse_lookup_words}'")
212
+ # Found a reverse match!
213
+ # The reverse_lookup return the original word, not a list of synonyms
214
+ synonyms_list: list[dict[str, Any]] = [{"synonym": reverse_word, "is_custom": True} for reverse_word in reverse_lookup_words]
215
+
216
+ custom_synset = {
217
+ 'definition': f'User-defined synonym (reverse match for "{word}").',
218
+ 'examples': [],
219
+ 'synonyms': synonyms_list
220
+ }
221
+ if pos_tag:
222
+ custom_synset["pos"] = pos_tag
223
 
224
  # 3. WordNet Lookup
225
  try:
my_ghost_writer/type_hints.py CHANGED
@@ -1,16 +1,22 @@
1
- from typing import Any, TypedDict, Optional
2
  from pydantic import BaseModel, field_validator
3
 
4
 
 
 
 
 
 
 
5
  class CustomSynonymRequest(BaseModel):
6
  word: str
7
- synonyms: list[str]
8
 
9
- @field_validator("synonyms")
10
- def synonyms_must_not_be_empty(cls, v):
11
- if not v:
12
- raise ValueError("Synonym list cannot be empty.")
13
- return v
14
 
15
 
16
  class SynonymInfo(TypedDict):
@@ -18,7 +24,6 @@ class SynonymInfo(TypedDict):
18
  is_custom: bool
19
 
20
 
21
-
22
  class RequestWordQueryBody(BaseModel):
23
  word: str
24
 
 
1
+ from typing import Any, TypedDict, Optional, Literal
2
  from pydantic import BaseModel, field_validator
3
 
4
 
5
+ class RelatedEntry(BaseModel):
6
+ type: Literal["synonym", "antonym", "homonym", "homophone", "homograph"]
7
+ words: list[str]
8
+ definition: Optional[str] = None # Definition is now within RelatedEntry
9
+
10
+
11
  class CustomSynonymRequest(BaseModel):
12
  word: str
13
+ related: list[RelatedEntry]
14
 
15
+ # @field_validator("synonyms")
16
+ # def synonyms_must_not_be_empty(cls, v):
17
+ # if not v:
18
+ # raise ValueError("Synonym list cannot be empty.")
19
+ # return v
20
 
21
 
22
  class SynonymInfo(TypedDict):
 
24
  is_custom: bool
25
 
26
 
 
27
  class RequestWordQueryBody(BaseModel):
28
  word: str
29
 
tests/events/request_thesaurus_custom.json CHANGED
@@ -1,4 +1,15 @@
1
  {
2
  "word": "happy",
3
- "synonyms": ["joyful", "content", "cheerful", "elated"]
 
 
 
 
 
 
 
 
 
 
 
4
  }
 
1
  {
2
  "word": "happy",
3
+ "related": [
4
+ {
5
+ "definition": "Feeling or showing pleasure or contentment.",
6
+ "type": "synonym",
7
+ "words": ["joyful", "content", "cheerful", "elated"]
8
+ },
9
+ {
10
+ "definition": "Feeling or showing sorrow; unhappy.",
11
+ "type": "antonym",
12
+ "words": ["sad", "unhappy"]
13
+ }
14
+ ]
15
  }
tests/events/request_thesaurus_custom2.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word": "joy",
3
+ "related": [
4
+ {
5
+ "definition": "Feeling or showing pleasure or contentment.",
6
+ "type": "synonym",
7
+ "words": ["cheer", "happyness"]
8
+ }
9
+ ]
10
+ }
tests/events/request_thesaurus_custom3.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word": "march",
3
+ "related": [
4
+ {
5
+ "type": "synonym",
6
+ "words": ["walk", "run"]
7
+ }
8
+ ]
9
+ }