predict lang fix2
Browse files- app.py +22 -6
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -1,21 +1,37 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from huggingface_hub import hf_hub_download
|
| 3 |
import fasttext
|
|
|
|
| 4 |
|
| 5 |
REPO = "NbAiLab/nb-nbnn-lid"
|
| 6 |
FILE = "nb-nbnn-lid.ftz"
|
|
|
|
| 7 |
MODEL_PATH = hf_hub_download(repo_id=REPO, filename=FILE)
|
| 8 |
ft = fasttext.load_model(MODEL_PATH)
|
| 9 |
-
MAP = {"__label__nob": "nb", "__label__nno": "nn"}
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
return ""
|
| 14 |
-
label, score = ft.predict(
|
| 15 |
return MAP.get(label[0], "nb")
|
| 16 |
|
| 17 |
-
demo = gr.Interface(
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
if __name__ == "__main__":
|
| 21 |
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from huggingface_hub import hf_hub_download
|
| 3 |
import fasttext
|
| 4 |
+
import re
|
| 5 |
|
| 6 |
REPO = "NbAiLab/nb-nbnn-lid"
|
| 7 |
FILE = "nb-nbnn-lid.ftz"
|
| 8 |
+
|
| 9 |
MODEL_PATH = hf_hub_download(repo_id=REPO, filename=FILE)
|
| 10 |
ft = fasttext.load_model(MODEL_PATH)
|
|
|
|
| 11 |
|
| 12 |
+
MAP = {"__label__nob": "nb", "__label__nno": "nn"} # Bokmål / Nynorsk
|
| 13 |
+
|
| 14 |
+
def _clean(text: str) -> str:
|
| 15 |
+
# fastText expects ONE line: strip, collapse whitespace, remove newlines
|
| 16 |
+
text = text or ""
|
| 17 |
+
text = re.sub(r"[\r\n\t]+", " ", text).strip()
|
| 18 |
+
# optional: shrink multiple spaces
|
| 19 |
+
text = re.sub(r"\s{2,}", " ", text)
|
| 20 |
+
return text
|
| 21 |
+
|
| 22 |
+
def classify(text: str) -> str:
|
| 23 |
+
t = _clean(text)
|
| 24 |
+
if not t:
|
| 25 |
return ""
|
| 26 |
+
label, score = ft.predict(t) # single text → top-1 label
|
| 27 |
return MAP.get(label[0], "nb")
|
| 28 |
|
| 29 |
+
demo = gr.Interface(
|
| 30 |
+
fn=classify,
|
| 31 |
+
inputs=gr.Textbox(label="Text"),
|
| 32 |
+
outputs=gr.Textbox(label="Label (nb or nn)"),
|
| 33 |
+
title="Bokmål / Nynorsk classifier"
|
| 34 |
+
)
|
| 35 |
|
| 36 |
if __name__ == "__main__":
|
| 37 |
demo.launch(server_name="0.0.0.0", server_port=7860)
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
gradio
|
| 2 |
huggingface_hub
|
| 3 |
-
fasttext-wheel
|
|
|
|
|
|
| 1 |
gradio
|
| 2 |
huggingface_hub
|
| 3 |
+
fasttext-wheel==0.9.2
|
| 4 |
+
numpy<2.0
|