Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,37 +1,24 @@
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
import spacy
|
| 3 |
-
from spacy.language import Language
|
| 4 |
-
from spacy.tokens import Span
|
| 5 |
import json
|
|
|
|
| 6 |
|
| 7 |
app = FastAPI()
|
| 8 |
|
| 9 |
# Ladda svensk spaCy-modell
|
| 10 |
nlp = spacy.load("sv_core_news_sm")
|
| 11 |
|
| 12 |
-
# Ladda
|
| 13 |
with open("entities.json") as f:
|
| 14 |
entities = json.load(f)
|
| 15 |
ITEMS = set(entities["items"])
|
| 16 |
COLORS = set(entities["colors"])
|
| 17 |
PRICES = set(entities["prices"])
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
for token in doc:
|
| 24 |
-
text = token.text.lower()
|
| 25 |
-
if text in ITEMS:
|
| 26 |
-
spans.append(Span(doc, token.i, token.i + 1, label="VARA"))
|
| 27 |
-
elif text in COLORS:
|
| 28 |
-
spans.append(Span(doc, token.i, token.i + 1, label="FÄRG"))
|
| 29 |
-
elif text in PRICES:
|
| 30 |
-
spans.append(Span(doc, token.i, token.i + 1, label="PRIS"))
|
| 31 |
-
doc.ents = spans
|
| 32 |
-
return doc
|
| 33 |
-
|
| 34 |
-
nlp.add_pipe("custom_matcher")
|
| 35 |
|
| 36 |
@app.post("/parse")
|
| 37 |
async def parse_user_request(request: str):
|
|
@@ -40,9 +27,24 @@ async def parse_user_request(request: str):
|
|
| 40 |
try:
|
| 41 |
# Analysera text med spaCy
|
| 42 |
doc = nlp(request)
|
| 43 |
-
entities = {ent.label_: ent.text.lower() for ent in doc.ents}
|
| 44 |
|
| 45 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
if "VARA" not in entities:
|
| 47 |
return {"result": "error:ingen vara"}
|
| 48 |
|
|
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
import spacy
|
|
|
|
|
|
|
| 3 |
import json
|
| 4 |
+
import difflib
|
| 5 |
|
| 6 |
app = FastAPI()
|
| 7 |
|
| 8 |
# Ladda svensk spaCy-modell
|
| 9 |
nlp = spacy.load("sv_core_news_sm")
|
| 10 |
|
| 11 |
+
# Ladda entiteter från entities.json
|
| 12 |
with open("entities.json") as f:
|
| 13 |
entities = json.load(f)
|
| 14 |
ITEMS = set(entities["items"])
|
| 15 |
COLORS = set(entities["colors"])
|
| 16 |
PRICES = set(entities["prices"])
|
| 17 |
|
| 18 |
+
def correct_spelling(word, valid_words, threshold=0.8):
|
| 19 |
+
"""Korrigera stavfel genom att hitta närmaste match i valid_words."""
|
| 20 |
+
matches = difflib.get_close_matches(word, valid_words, n=1, cutoff=threshold)
|
| 21 |
+
return matches[0] if matches else word
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
@app.post("/parse")
|
| 24 |
async def parse_user_request(request: str):
|
|
|
|
| 27 |
try:
|
| 28 |
# Analysera text med spaCy
|
| 29 |
doc = nlp(request)
|
|
|
|
| 30 |
|
| 31 |
+
# Extrahera entiteter
|
| 32 |
+
entities = {}
|
| 33 |
+
for token in doc:
|
| 34 |
+
text = token.text.lower()
|
| 35 |
+
# Prioritera definierade varor med stavfelskorrigering
|
| 36 |
+
corrected_text = correct_spelling(text, ITEMS)
|
| 37 |
+
if corrected_text in ITEMS:
|
| 38 |
+
entities["VARA"] = corrected_text
|
| 39 |
+
elif token.pos_ == "NOUN" and not entities.get("VARA"):
|
| 40 |
+
entities["VARA"] = corrected_text
|
| 41 |
+
# Identifiera färger och priser
|
| 42 |
+
elif text in COLORS:
|
| 43 |
+
entities["FÄRG"] = text
|
| 44 |
+
elif text in PRICES:
|
| 45 |
+
entities["PRIS"] = text
|
| 46 |
+
|
| 47 |
+
# Om ingen vara hittades
|
| 48 |
if "VARA" not in entities:
|
| 49 |
return {"result": "error:ingen vara"}
|
| 50 |
|