dagaa commited on
Commit
b2de20c
·
1 Parent(s): a9ca93f

predict lang fix2

Browse files
Files changed (2) hide show
  1. app.py +22 -6
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,21 +1,37 @@
1
  import gradio as gr
2
  from huggingface_hub import hf_hub_download
3
  import fasttext
 
4
 
5
  REPO = "NbAiLab/nb-nbnn-lid"
6
  FILE = "nb-nbnn-lid.ftz"
 
7
  MODEL_PATH = hf_hub_download(repo_id=REPO, filename=FILE)
8
  ft = fasttext.load_model(MODEL_PATH)
9
- MAP = {"__label__nob": "nb", "__label__nno": "nn"}
10
 
11
- def classify(text):
12
- if not text or not text.strip():
 
 
 
 
 
 
 
 
 
 
 
13
  return ""
14
- label, score = ft.predict(text.strip())
15
  return MAP.get(label[0], "nb")
16
 
17
- demo = gr.Interface(fn=classify, inputs="text", outputs="text",
18
- title="Bokmål / Nynorsk classifier")
 
 
 
 
19
 
20
  if __name__ == "__main__":
21
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
2
  from huggingface_hub import hf_hub_download
3
  import fasttext
4
+ import re
5
 
6
  REPO = "NbAiLab/nb-nbnn-lid"
7
  FILE = "nb-nbnn-lid.ftz"
8
+
9
  MODEL_PATH = hf_hub_download(repo_id=REPO, filename=FILE)
10
  ft = fasttext.load_model(MODEL_PATH)
 
11
 
12
+ MAP = {"__label__nob": "nb", "__label__nno": "nn"} # Bokmål / Nynorsk
13
+
14
+ def _clean(text: str) -> str:
15
+ # fastText expects ONE line: strip, collapse whitespace, remove newlines
16
+ text = text or ""
17
+ text = re.sub(r"[\r\n\t]+", " ", text).strip()
18
+ # optional: shrink multiple spaces
19
+ text = re.sub(r"\s{2,}", " ", text)
20
+ return text
21
+
22
+ def classify(text: str) -> str:
23
+ t = _clean(text)
24
+ if not t:
25
  return ""
26
+ label, score = ft.predict(t) # single text → top-1 label
27
  return MAP.get(label[0], "nb")
28
 
29
+ demo = gr.Interface(
30
+ fn=classify,
31
+ inputs=gr.Textbox(label="Text"),
32
+ outputs=gr.Textbox(label="Label (nb or nn)"),
33
+ title="Bokmål / Nynorsk classifier"
34
+ )
35
 
36
  if __name__ == "__main__":
37
  demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  gradio
2
  huggingface_hub
3
- fasttext-wheel
 
 
1
  gradio
2
  huggingface_hub
3
+ fasttext-wheel==0.9.2
4
+ numpy<2.0