alessandro trinca tornidor commited on
Commit ·
88320db
1
Parent(s): a434f16
feat: improve the initial /thesaurus-custom endpoint
Browse files- my_ghost_writer/app.py +3 -12
- my_ghost_writer/custom_synonym_handler.py +36 -0
- my_ghost_writer/text_parsers2.py +25 -22
- my_ghost_writer/type_hints.py +13 -8
- tests/events/request_thesaurus_custom.json +12 -1
- tests/events/request_thesaurus_custom2.json +10 -0
- tests/events/request_thesaurus_custom3.json +9 -0
my_ghost_writer/app.py
CHANGED
|
@@ -24,7 +24,7 @@ from my_ghost_writer.constants import (ALLOWED_ORIGIN_LIST, API_MODE, DOMAIN, IS
|
|
| 24 |
ME_CONFIG_MONGODB_HEALTHCHECK_SLEEP, ME_CONFIG_MONGODB_USE_OK, PORT, RAPIDAPI_HOST, STATIC_FOLDER,
|
| 25 |
STATIC_FOLDER_LITEKOBOLDAINET, WORDSAPI_KEY, WORDSAPI_URL, app_logger)
|
| 26 |
from my_ghost_writer.pymongo_utils import mongodb_health_check
|
| 27 |
-
from my_ghost_writer.text_parsers2 import find_synonyms_for_phrase,
|
| 28 |
from my_ghost_writer.thesaurus import get_current_info_wordnet
|
| 29 |
from my_ghost_writer.type_hints import (RequestQueryThesaurusInflatedBody, RequestQueryThesaurusWordsapiBody,
|
| 30 |
RequestSplitText, RequestTextFrequencyBody, MultiWordSynonymResponse, CustomSynonymRequest)
|
|
@@ -295,17 +295,8 @@ async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
|
|
| 295 |
async def add_custom_synonyms(body: CustomSynonymRequest):
|
| 296 |
"""Adds custom synonyms for a given word to the in-memory store."""
|
| 297 |
try:
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
if word in custom_synonyms:
|
| 302 |
-
# Update the synonyms list (append new synonyms, avoid duplicates)
|
| 303 |
-
custom_synonyms[word] = list(set(custom_synonyms[word] + synonyms))
|
| 304 |
-
return {"message": f"Synonyms for '{body.word}' updated successfully (in-memory)."}
|
| 305 |
-
else:
|
| 306 |
-
# Insert a new entry
|
| 307 |
-
custom_synonyms[word] = synonyms
|
| 308 |
-
return {"message": f"Synonyms for '{body.word}' added successfully (in-memory)."}
|
| 309 |
|
| 310 |
except Exception as e:
|
| 311 |
app_logger.error(f"Error adding custom synonyms: {e}")
|
|
|
|
| 24 |
ME_CONFIG_MONGODB_HEALTHCHECK_SLEEP, ME_CONFIG_MONGODB_USE_OK, PORT, RAPIDAPI_HOST, STATIC_FOLDER,
|
| 25 |
STATIC_FOLDER_LITEKOBOLDAINET, WORDSAPI_KEY, WORDSAPI_URL, app_logger)
|
| 26 |
from my_ghost_writer.pymongo_utils import mongodb_health_check
|
| 27 |
+
from my_ghost_writer.text_parsers2 import find_synonyms_for_phrase, custom_synonym_handler
|
| 28 |
from my_ghost_writer.thesaurus import get_current_info_wordnet
|
| 29 |
from my_ghost_writer.type_hints import (RequestQueryThesaurusInflatedBody, RequestQueryThesaurusWordsapiBody,
|
| 30 |
RequestSplitText, RequestTextFrequencyBody, MultiWordSynonymResponse, CustomSynonymRequest)
|
|
|
|
| 295 |
async def add_custom_synonyms(body: CustomSynonymRequest):
|
| 296 |
"""Adds custom synonyms for a given word to the in-memory store."""
|
| 297 |
try:
|
| 298 |
+
custom_synonym_handler.add_entry(body.word, [r.model_dump() for r in body.related])
|
| 299 |
+
return {"message": f"Custom entry for '{body.word}' added/updated successfully (in-memory)."}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
|
| 301 |
except Exception as e:
|
| 302 |
app_logger.error(f"Error adding custom synonyms: {e}")
|
my_ghost_writer/custom_synonym_handler.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class CustomSynonymHandler:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
self.lexicon: dict[str, list[dict[str, Any]]] = {} # {word: {relation_type: [{word: related_word, definition: definition}]}}
|
| 7 |
+
self.inverted_index: dict[str, set[str]] = {} # For reverse lookups
|
| 8 |
+
|
| 9 |
+
def add_entry(self, word: str, related: list[dict[str, Any]]):
|
| 10 |
+
word = word.lower()
|
| 11 |
+
self.lexicon[word] = {}
|
| 12 |
+
for relation in related:
|
| 13 |
+
relation_type = relation["type"]
|
| 14 |
+
related_words = [{"word": w.lower(), "definition": relation.get("definition")} for w in relation["words"]]
|
| 15 |
+
self.lexicon[word][relation_type] = related_words
|
| 16 |
+
self._update_inverted_index(word, relation_type, related_words)
|
| 17 |
+
|
| 18 |
+
def get_related(self, word: str, relation_type: str) -> list[dict[str, Any]]:
|
| 19 |
+
word = word.lower()
|
| 20 |
+
if word in self.lexicon and relation_type in self.lexicon[word]:
|
| 21 |
+
return self.lexicon[word][relation_type]
|
| 22 |
+
return []
|
| 23 |
+
|
| 24 |
+
def reverse_lookup(self, related_word: str) -> set[str]:
|
| 25 |
+
related_word = related_word.lower()
|
| 26 |
+
if related_word in self.inverted_index:
|
| 27 |
+
return self.inverted_index[related_word]
|
| 28 |
+
return set()
|
| 29 |
+
|
| 30 |
+
def _update_inverted_index(self, word: str, relation_type: str, related_words: list[dict[str, Any]]):
|
| 31 |
+
"""Updates the inverted index for reverse lookups."""
|
| 32 |
+
for related in related_words:
|
| 33 |
+
related_word = related["word"]
|
| 34 |
+
if related_word not in self.inverted_index:
|
| 35 |
+
self.inverted_index[related_word] = set()
|
| 36 |
+
self.inverted_index[related_word].add(word)
|
my_ghost_writer/text_parsers2.py
CHANGED
|
@@ -9,10 +9,12 @@ from typing import Any, Optional
|
|
| 9 |
from fastapi import HTTPException
|
| 10 |
|
| 11 |
from my_ghost_writer.constants import SPACY_MODEL_NAME, app_logger, ELIGIBLE_POS
|
|
|
|
| 12 |
from my_ghost_writer.type_hints import SynonymInfo, WordSynonymResult, ContextInfo, SynonymGroup
|
| 13 |
|
| 14 |
|
| 15 |
custom_synonyms: dict[str, list[str]] = {}
|
|
|
|
| 16 |
# Load spaCy model
|
| 17 |
try:
|
| 18 |
nlp = spacy.load(SPACY_MODEL_NAME)
|
|
@@ -187,36 +189,37 @@ def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[
|
|
| 187 |
# 1. Custom Synonym Lookup and Preparation
|
| 188 |
custom_synset = None # Initialize to None
|
| 189 |
# 1. Direct Lookup: Check if the word is directly in custom_synonyms
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
|
|
|
| 196 |
custom_synset = {
|
| 197 |
'definition': 'User-defined synonym.',
|
| 198 |
'examples': [],
|
| 199 |
-
'synonyms':
|
| 200 |
}
|
| 201 |
if pos_tag:
|
| 202 |
custom_synset["pos"] = pos_tag
|
| 203 |
|
| 204 |
# 2. Reverse Lookup: Check if the word is a *synonym* of any custom word
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
|
| 221 |
# 3. WordNet Lookup
|
| 222 |
try:
|
|
|
|
| 9 |
from fastapi import HTTPException
|
| 10 |
|
| 11 |
from my_ghost_writer.constants import SPACY_MODEL_NAME, app_logger, ELIGIBLE_POS
|
| 12 |
+
from my_ghost_writer.custom_synonym_handler import CustomSynonymHandler
|
| 13 |
from my_ghost_writer.type_hints import SynonymInfo, WordSynonymResult, ContextInfo, SynonymGroup
|
| 14 |
|
| 15 |
|
| 16 |
custom_synonyms: dict[str, list[str]] = {}
|
| 17 |
+
custom_synonym_handler = CustomSynonymHandler()
|
| 18 |
# Load spaCy model
|
| 19 |
try:
|
| 20 |
nlp = spacy.load(SPACY_MODEL_NAME)
|
|
|
|
| 189 |
# 1. Custom Synonym Lookup and Preparation
|
| 190 |
custom_synset = None # Initialize to None
|
| 191 |
# 1. Direct Lookup: Check if the word is directly in custom_synonyms
|
| 192 |
+
related_synonyms = custom_synonym_handler.get_related(word_lower, "synonym")
|
| 193 |
+
if related_synonyms:
|
| 194 |
+
app_logger.info(f"found custom_synonyms:{related_synonyms} by word:{word_lower}!")
|
| 195 |
+
synonyms_list: list[dict[str, Any]] = []
|
| 196 |
+
for related in related_synonyms:
|
| 197 |
+
synonyms_list.append({"synonym": related["word"], "is_custom": True, "definition": related.get("definition")})
|
| 198 |
+
if synonyms_list:
|
| 199 |
custom_synset = {
|
| 200 |
'definition': 'User-defined synonym.',
|
| 201 |
'examples': [],
|
| 202 |
+
'synonyms': synonyms_list
|
| 203 |
}
|
| 204 |
if pos_tag:
|
| 205 |
custom_synset["pos"] = pos_tag
|
| 206 |
|
| 207 |
# 2. Reverse Lookup: Check if the word is a *synonym* of any custom word
|
| 208 |
+
reverse_lookup_words = custom_synonym_handler.reverse_lookup(word_lower)
|
| 209 |
+
|
| 210 |
+
if reverse_lookup_words:
|
| 211 |
+
app_logger.info(f"found reverse match: '{word_lower}' is a synonym of '{reverse_lookup_words}'")
|
| 212 |
+
# Found a reverse match!
|
| 213 |
+
# The reverse_lookup return the original word, not a list of synonyms
|
| 214 |
+
synonyms_list: list[dict[str, Any]] = [{"synonym": reverse_word, "is_custom": True} for reverse_word in reverse_lookup_words]
|
| 215 |
+
|
| 216 |
+
custom_synset = {
|
| 217 |
+
'definition': f'User-defined synonym (reverse match for "{word}").',
|
| 218 |
+
'examples': [],
|
| 219 |
+
'synonyms': synonyms_list
|
| 220 |
+
}
|
| 221 |
+
if pos_tag:
|
| 222 |
+
custom_synset["pos"] = pos_tag
|
| 223 |
|
| 224 |
# 3. WordNet Lookup
|
| 225 |
try:
|
my_ghost_writer/type_hints.py
CHANGED
|
@@ -1,16 +1,22 @@
|
|
| 1 |
-
from typing import Any, TypedDict, Optional
|
| 2 |
from pydantic import BaseModel, field_validator
|
| 3 |
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
class CustomSynonymRequest(BaseModel):
|
| 6 |
word: str
|
| 7 |
-
|
| 8 |
|
| 9 |
-
@field_validator("synonyms")
|
| 10 |
-
def synonyms_must_not_be_empty(cls, v):
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
|
| 15 |
|
| 16 |
class SynonymInfo(TypedDict):
|
|
@@ -18,7 +24,6 @@ class SynonymInfo(TypedDict):
|
|
| 18 |
is_custom: bool
|
| 19 |
|
| 20 |
|
| 21 |
-
|
| 22 |
class RequestWordQueryBody(BaseModel):
|
| 23 |
word: str
|
| 24 |
|
|
|
|
| 1 |
+
from typing import Any, TypedDict, Optional, Literal
|
| 2 |
from pydantic import BaseModel, field_validator
|
| 3 |
|
| 4 |
|
| 5 |
+
class RelatedEntry(BaseModel):
|
| 6 |
+
type: Literal["synonym", "antonym", "homonym", "homophone", "homograph"]
|
| 7 |
+
words: list[str]
|
| 8 |
+
definition: Optional[str] = None # Definition is now within RelatedEntry
|
| 9 |
+
|
| 10 |
+
|
| 11 |
class CustomSynonymRequest(BaseModel):
|
| 12 |
word: str
|
| 13 |
+
related: list[RelatedEntry]
|
| 14 |
|
| 15 |
+
# @field_validator("synonyms")
|
| 16 |
+
# def synonyms_must_not_be_empty(cls, v):
|
| 17 |
+
# if not v:
|
| 18 |
+
# raise ValueError("Synonym list cannot be empty.")
|
| 19 |
+
# return v
|
| 20 |
|
| 21 |
|
| 22 |
class SynonymInfo(TypedDict):
|
|
|
|
| 24 |
is_custom: bool
|
| 25 |
|
| 26 |
|
|
|
|
| 27 |
class RequestWordQueryBody(BaseModel):
|
| 28 |
word: str
|
| 29 |
|
tests/events/request_thesaurus_custom.json
CHANGED
|
@@ -1,4 +1,15 @@
|
|
| 1 |
{
|
| 2 |
"word": "happy",
|
| 3 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"word": "happy",
|
| 3 |
+
"related": [
|
| 4 |
+
{
|
| 5 |
+
"definition": "Feeling or showing pleasure or contentment.",
|
| 6 |
+
"type": "synonym",
|
| 7 |
+
"words": ["joyful", "content", "cheerful", "elated"]
|
| 8 |
+
},
|
| 9 |
+
{
|
| 10 |
+
"definition": "Feeling or showing sorrow; unhappy.",
|
| 11 |
+
"type": "antonym",
|
| 12 |
+
"words": ["sad", "unhappy"]
|
| 13 |
+
}
|
| 14 |
+
]
|
| 15 |
}
|
tests/events/request_thesaurus_custom2.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word": "joy",
|
| 3 |
+
"related": [
|
| 4 |
+
{
|
| 5 |
+
"definition": "Feeling or showing pleasure or contentment.",
|
| 6 |
+
"type": "synonym",
|
| 7 |
+
"words": ["cheer", "happyness"]
|
| 8 |
+
}
|
| 9 |
+
]
|
| 10 |
+
}
|
tests/events/request_thesaurus_custom3.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word": "march",
|
| 3 |
+
"related": [
|
| 4 |
+
{
|
| 5 |
+
"type": "synonym",
|
| 6 |
+
"words": ["walk", "run"]
|
| 7 |
+
}
|
| 8 |
+
]
|
| 9 |
+
}
|