Commit
·
0c9becc
1
Parent(s):
97cd79a
Update code
Browse files- routers/soundex.py +2 -1
- routers/spell.py +3 -2
- routers/tokenize.py +4 -3
- routers/util.py +5 -4
routers/soundex.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
import json
|
| 3 |
from fastapi import APIRouter, Response
|
|
|
|
| 4 |
from pythainlp.soundex import (
|
| 5 |
soundex as py_soundex
|
| 6 |
)
|
|
@@ -26,7 +27,7 @@ def soundex(word: str, engine: SoundexEngine = "udom83"):
|
|
| 26 |
- **word**: A word that want into phonetic code.
|
| 27 |
- **engine**: Soundex Engine (default is udom83)
|
| 28 |
"""
|
| 29 |
-
return
|
| 30 |
json.dumps({"soundex": py_soundex(text=word, engine=engine)}, ensure_ascii=False),
|
| 31 |
media_type="application/json; charset=utf-8",
|
| 32 |
)
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
import json
|
| 3 |
from fastapi import APIRouter, Response
|
| 4 |
+
from fastapi.responses import JSONResponse
|
| 5 |
from pythainlp.soundex import (
|
| 6 |
soundex as py_soundex
|
| 7 |
)
|
|
|
|
| 27 |
- **word**: A word that want into phonetic code.
|
| 28 |
- **engine**: Soundex Engine (default is udom83)
|
| 29 |
"""
|
| 30 |
+
return JSONResponse(
|
| 31 |
json.dumps({"soundex": py_soundex(text=word, engine=engine)}, ensure_ascii=False),
|
| 32 |
media_type="application/json; charset=utf-8",
|
| 33 |
)
|
routers/spell.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
import json
|
| 3 |
from fastapi import APIRouter, Response
|
|
|
|
| 4 |
from pythainlp.spell import (
|
| 5 |
correct as py_correct,
|
| 6 |
spell as py_spell
|
|
@@ -34,7 +35,7 @@ def correct(word: float, engine: CorrectEngine = "pn"):
|
|
| 34 |
- **word**: A word that want corrects the spelling of the given word.
|
| 35 |
- **engine**: Correct Engine (default is pn)
|
| 36 |
"""
|
| 37 |
-
return
|
| 38 |
json.dumps({"word": py_correct(word, engine=engine)}, ensure_ascii=False),
|
| 39 |
media_type="application/json; charset=utf-8",
|
| 40 |
)
|
|
@@ -50,7 +51,7 @@ def spell(word: float, engine: SpellEngine = "pn"):
|
|
| 50 |
- **word**: A word that want to check spell.
|
| 51 |
- **engine**: Spell Engine (default is pn)
|
| 52 |
"""
|
| 53 |
-
return
|
| 54 |
json.dumps({"word": py_spell(word, engine=engine)}, ensure_ascii=False),
|
| 55 |
media_type="application/json; charset=utf-8",
|
| 56 |
)
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
import json
|
| 3 |
from fastapi import APIRouter, Response
|
| 4 |
+
from fastapi.responses import JSONResponse
|
| 5 |
from pythainlp.spell import (
|
| 6 |
correct as py_correct,
|
| 7 |
spell as py_spell
|
|
|
|
| 35 |
- **word**: A word that want corrects the spelling of the given word.
|
| 36 |
- **engine**: Correct Engine (default is pn)
|
| 37 |
"""
|
| 38 |
+
return JSONResponse(
|
| 39 |
json.dumps({"word": py_correct(word, engine=engine)}, ensure_ascii=False),
|
| 40 |
media_type="application/json; charset=utf-8",
|
| 41 |
)
|
|
|
|
| 51 |
- **word**: A word that want to check spell.
|
| 52 |
- **engine**: Spell Engine (default is pn)
|
| 53 |
"""
|
| 54 |
+
return JSONResponse(
|
| 55 |
json.dumps({"word": py_spell(word, engine=engine)}, ensure_ascii=False),
|
| 56 |
media_type="application/json; charset=utf-8",
|
| 57 |
)
|
routers/tokenize.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
import json
|
| 3 |
from fastapi import APIRouter, Response
|
|
|
|
| 4 |
from pythainlp.tokenize import (
|
| 5 |
word_tokenize as py_word_tokenize,
|
| 6 |
subword_tokenize as py_subword_tokenize,
|
|
@@ -50,7 +51,7 @@ def word_tokenize(text: str, engine: WordTokenizeEngine = "newmm"):
|
|
| 50 |
- **text**: Text that want to tokenize.
|
| 51 |
- **engine**: Word Tokenize Engine (default is newmm)
|
| 52 |
"""
|
| 53 |
-
return
|
| 54 |
json.dumps({"words": py_word_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
| 55 |
media_type="application/json; charset=utf-8",
|
| 56 |
)
|
|
@@ -66,7 +67,7 @@ def subword_tokenize(text: str, engine: SubwordTokenizeEngine = "tcc"):
|
|
| 66 |
- **text**: Text that want to tokenize.
|
| 67 |
- **engine**: Sub word Tokenize Engine (default is tcc)
|
| 68 |
"""
|
| 69 |
-
return
|
| 70 |
json.dumps({"subwords": py_subword_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
| 71 |
media_type="application/json; charset=utf-8",
|
| 72 |
)
|
|
@@ -82,7 +83,7 @@ def sent_tokenize(text: str, engine: SentTokenizeEngine = "crfcut"):
|
|
| 82 |
- **text**: Text that want to tokenize.
|
| 83 |
- **engine**: Sentence Tokenize Engine (default is crfcut)
|
| 84 |
"""
|
| 85 |
-
return
|
| 86 |
json.dumps({"sents": py_sent_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
| 87 |
media_type="application/json; charset=utf-8",
|
| 88 |
)
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
import json
|
| 3 |
from fastapi import APIRouter, Response
|
| 4 |
+
from fastapi.responses import JSONResponse
|
| 5 |
from pythainlp.tokenize import (
|
| 6 |
word_tokenize as py_word_tokenize,
|
| 7 |
subword_tokenize as py_subword_tokenize,
|
|
|
|
| 51 |
- **text**: Text that want to tokenize.
|
| 52 |
- **engine**: Word Tokenize Engine (default is newmm)
|
| 53 |
"""
|
| 54 |
+
return JSONResponse(
|
| 55 |
json.dumps({"words": py_word_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
| 56 |
media_type="application/json; charset=utf-8",
|
| 57 |
)
|
|
|
|
| 67 |
- **text**: Text that want to tokenize.
|
| 68 |
- **engine**: Sub word Tokenize Engine (default is tcc)
|
| 69 |
"""
|
| 70 |
+
return JSONResponse(
|
| 71 |
json.dumps({"subwords": py_subword_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
| 72 |
media_type="application/json; charset=utf-8",
|
| 73 |
)
|
|
|
|
| 83 |
- **text**: Text that want to tokenize.
|
| 84 |
- **engine**: Sentence Tokenize Engine (default is crfcut)
|
| 85 |
"""
|
| 86 |
+
return JSONResponse(
|
| 87 |
json.dumps({"sents": py_sent_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
| 88 |
media_type="application/json; charset=utf-8",
|
| 89 |
)
|
routers/util.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
import json
|
| 3 |
from fastapi import APIRouter, Response
|
|
|
|
| 4 |
from pythainlp.util import (
|
| 5 |
bahttext as py_bahttext,
|
| 6 |
normalize as py_normalize,
|
|
@@ -14,7 +15,7 @@ def bahttext(number: float):
|
|
| 14 |
"""
|
| 15 |
This api converts a number to Thai text and adds a suffix “บาท” (Baht).
|
| 16 |
"""
|
| 17 |
-
return
|
| 18 |
json.dumps({"bahttext": py_bahttext(number)}, ensure_ascii=False),
|
| 19 |
media_type="application/json; charset=utf-8",
|
| 20 |
)
|
|
@@ -25,7 +26,7 @@ def normalize(text: str):
|
|
| 25 |
"""
|
| 26 |
Normalize and clean Thai text
|
| 27 |
"""
|
| 28 |
-
return
|
| 29 |
json.dumps({"text": py_normalize(text)}, ensure_ascii=False),
|
| 30 |
media_type="application/json; charset=utf-8",
|
| 31 |
)
|
|
@@ -36,7 +37,7 @@ def tone_detector(syllable: str):
|
|
| 36 |
"""
|
| 37 |
Thai tone detector for word.
|
| 38 |
"""
|
| 39 |
-
return
|
| 40 |
json.dumps({"tone": py_tone_detector(syllable)}, ensure_ascii=False),
|
| 41 |
media_type="application/json; charset=utf-8",
|
| 42 |
)
|
|
@@ -53,7 +54,7 @@ def thaiword_to_num(text: str):
|
|
| 53 |
|
| 54 |
- **text**: Spelled-out numerals in Thai scripts
|
| 55 |
"""
|
| 56 |
-
return
|
| 57 |
json.dumps({"number": py_thaiword_to_num(text)}, ensure_ascii=False),
|
| 58 |
media_type="application/json; charset=utf-8",
|
| 59 |
)
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
import json
|
| 3 |
from fastapi import APIRouter, Response
|
| 4 |
+
from fastapi.responses import JSONResponse
|
| 5 |
from pythainlp.util import (
|
| 6 |
bahttext as py_bahttext,
|
| 7 |
normalize as py_normalize,
|
|
|
|
| 15 |
"""
|
| 16 |
This api converts a number to Thai text and adds a suffix “บาท” (Baht).
|
| 17 |
"""
|
| 18 |
+
return JSONResponse(
|
| 19 |
json.dumps({"bahttext": py_bahttext(number)}, ensure_ascii=False),
|
| 20 |
media_type="application/json; charset=utf-8",
|
| 21 |
)
|
|
|
|
| 26 |
"""
|
| 27 |
Normalize and clean Thai text
|
| 28 |
"""
|
| 29 |
+
return JSONResponse(
|
| 30 |
json.dumps({"text": py_normalize(text)}, ensure_ascii=False),
|
| 31 |
media_type="application/json; charset=utf-8",
|
| 32 |
)
|
|
|
|
| 37 |
"""
|
| 38 |
Thai tone detector for word.
|
| 39 |
"""
|
| 40 |
+
return JSONResponse(
|
| 41 |
json.dumps({"tone": py_tone_detector(syllable)}, ensure_ascii=False),
|
| 42 |
media_type="application/json; charset=utf-8",
|
| 43 |
)
|
|
|
|
| 54 |
|
| 55 |
- **text**: Spelled-out numerals in Thai scripts
|
| 56 |
"""
|
| 57 |
+
return JSONResponse(
|
| 58 |
json.dumps({"number": py_thaiword_to_num(text)}, ensure_ascii=False),
|
| 59 |
media_type="application/json; charset=utf-8",
|
| 60 |
)
|