|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import sys |
|
|
import spacy |
|
|
from pathlib import Path |
|
|
import json |
|
|
|
|
|
|
|
|
RoundTo = 2 |
|
|
Encoding = 'utf8' |
|
|
ScoreThreshold = 0.2 |
|
|
MaxResults = 3 |
|
|
ParagraphMinLetters = 10 |
|
|
ListMinLetters = 10 |
|
|
|
|
|
SubModels = {} |
|
|
|
|
|
Nlp = spacy.load(sys.argv[1]) |
|
|
SubModelDir = Path(__file__).parent.joinpath(sys.argv[2]).absolute() |
|
|
input = sys.argv[3] |
|
|
|
|
|
|
|
|
def filterDoc(doc, scoreThreshold, maxResults, roundTo=2): |
|
|
cats = doc.cats.items() |
|
|
filt = list(filter(lambda c: c[1] > scoreThreshold, cats)) |
|
|
sort = sorted(filt, key=lambda c: c[1], reverse=True) |
|
|
maxi = sort[0:maxResults] |
|
|
rund = [(l[0], round(l[1], roundTo)) for l in maxi ] |
|
|
return dict(rund) |
|
|
|
|
|
|
|
|
|
|
|
def recognize(text): |
|
|
global Nlp |
|
|
|
|
|
labels = filterDoc(Nlp(text), ScoreThreshold, MaxResults) |
|
|
|
|
|
|
|
|
relabels = dict() |
|
|
for label in labels.keys(): |
|
|
label2 = label.strip() |
|
|
SubModelPath = SubModelDir.joinpath(label2).absolute() |
|
|
if SubModelPath.exists(): |
|
|
Nlp = spacy.load(SubModelPath) |
|
|
docSub = filterDoc(Nlp(text), ScoreThreshold, MaxResults) |
|
|
relabels[label2] = {'score': labels[label], 'subs': docSub} |
|
|
else: |
|
|
relabels[label2] = {'score': labels[label]} |
|
|
relabels["messages"] = "Submodel path \"" + str(SubModelPath) + "\" not found" |
|
|
|
|
|
return relabels |
|
|
|
|
|
print(json.dumps(recognize(input))) |
|
|
|