nabin2004 commited on
Commit
a3476c1
·
verified ·
1 Parent(s): af875ad

Upload folder using huggingface_hub

Browse files
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
data/simplified_dict.txt CHANGED
@@ -14975,6 +14975,7 @@
14975
  "िशवालय": "शवलय",
14976
  "िसददेश्वर अाधारभुत िवद्यालय": "सददश्वरअधरभतवद्यलय",
14977
  "ोखरपौवा": "खरपव",
 
14978
  "ॐ ओम शान्ति ध्यान केन्द्र": "ॐओमशन्तध्यनकन्द्र",
14979
  "ॐ नम पाण्डवेश्वर महादेव बचत तथा ऋण सहकारी सस्था लिमिटेड": "ॐनमपण्डवश्वरमहदवबचततथऋणसहकरसस्थलमटड",
14980
  "ॐ नमः शिवाय फर्मा": "ॐनमशवयफर्म",
@@ -14995,5 +14996,5 @@
14995
  "४ नं वडा कार्यालय महेन्द्रधार": "४नवडकर्यलयमहन्द्रधर",
14996
  "६६१/१५": "६६११५",
14997
  "७ अ साईड मैडान": "७असईडमडन",
14998
- "८डी म्याजिक थ्रीयटर": "८डम्यजकथ्रयटर",
14999
  }
 
14975
  "िशवालय": "शवलय",
14976
  "िसददेश्वर अाधारभुत िवद्यालय": "सददश्वरअधरभतवद्यलय",
14977
  "ोखरपौवा": "खरपव",
14978
+ "रुकुम":"रकम",
14979
  "ॐ ओम शान्ति ध्यान केन्द्र": "ॐओमशन्तध्यनकन्द्र",
14980
  "ॐ नम पाण्डवेश्वर महादेव बचत तथा ऋण सहकारी सस्था लिमिटेड": "ॐनमपण्डवश्वरमहदवबचततथऋणसहकरसस्थलमटड",
14981
  "ॐ नमः शिवाय फर्मा": "ॐनमशवयफर्म",
 
14996
  "४ नं वडा कार्यालय महेन्द्रधार": "४नवडकर्यलयमहन्द्रधर",
14997
  "६६१/१५": "६६११५",
14998
  "७ अ साईड मैडान": "७असईडमडन",
14999
+ "८डी म्याजिक थ्रीयटर": "८डम्यजकथ्रयटर"
15000
  }
data/vocab.txt CHANGED
@@ -252549,3 +252549,4 @@
252549
  देख्नुभएका
252550
  बदल्छन्
252551
  मस्यौदाभित्र
 
 
252549
  देख्नुभएका
252550
  बदल्छन्
252551
  मस्यौदाभित्र
252552
+ रुकुम (पश्चिम भाग)
requirements.txt CHANGED
@@ -60,8 +60,6 @@ hyperlink==21.0.0
60
  idna==2.8
61
  importlib-resources==1.4.0
62
  incremental==24.7.2
63
- ipython==9.2.0
64
- ipython_pygments_lexers==1.1.1
65
  itemadapter==0.11.0
66
  itemloaders==1.3.2
67
  itsdangerous==2.2.0
 
60
  idna==2.8
61
  importlib-resources==1.4.0
62
  incremental==24.7.2
 
 
63
  itemadapter==0.11.0
64
  itemloaders==1.3.2
65
  itsdangerous==2.2.0
runed_gradio.py CHANGED
@@ -1,9 +1,15 @@
1
  import gradio as gr
2
  import re
 
3
  from symspellpy import SymSpell, Verbosity
4
  from nepali_stemmer.stemmer import NepStemmer
5
  from itertools import product
6
- from typing import List, Tuple, Dict, Set
 
 
 
 
 
7
 
8
  # ------------------- Utilities -------------------
9
 
@@ -17,17 +23,50 @@ def load_vocab(filepath: str) -> Set[str]:
17
  with open(filepath, "r", encoding="utf-8") as f:
18
  return {line.strip() for line in f if line.strip()}
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def load_simplified_map(filepath: str) -> Dict[str, str]:
21
- simplified_map = {}
22
  with open(filepath, "r", encoding="utf-8") as f:
 
 
 
 
 
 
 
 
 
23
  for line in f:
24
- if ":" not in line:
 
25
  continue
26
- parts = line.strip().strip(",").replace('"', '').split(":")
27
- if len(parts) == 2:
28
- orig, simp = parts[0].strip(), parts[1].strip()
29
- simplified_map[simp] = orig
30
- return simplified_map
 
 
 
 
31
 
32
  def init_spellchecker(dict_path: str, max_edit_distance: int, prefix_length: int) -> SymSpell:
33
  sym_spell = SymSpell(max_dictionary_edit_distance=max_edit_distance, prefix_length=prefix_length)
@@ -44,11 +83,6 @@ def correct_sentence(
44
  top_k: int
45
  ) -> List[str]:
46
 
47
- simplified_only_path = "./data/simplified_only_names2.txt"
48
- simplified_dict_path = "./data/simplified_dict.txt"
49
- vocab_path = "./data/vocab.txt"
50
-
51
- # Load components
52
  sym_spell = init_spellchecker(simplified_only_path, max_edit_distance, prefix_length)
53
  simplified_map = load_simplified_map(simplified_dict_path)
54
  vocab = load_vocab(vocab_path)
@@ -92,24 +126,54 @@ def correct_sentence(
92
 
93
  # ------------------- Gradio UI -------------------
94
 
95
- examples = [
96
- ["भतपरको जिज्ञासु वातावरणले धेरै पर्यटकलाई आकर्षित गर्छ।", 2, 3, 3],
97
- ["ललतपुर प्राचीन मूर्तिकला र वास्तुकलाको केन्द्र हो।", 2, 3, 3],
98
- ]
99
-
100
- iface = gr.Interface(
101
- fn=correct_sentence,
102
- inputs=[
103
- gr.Textbox(label="Input Nepali Sentence", lines=2, placeholder="नेपालको समृद्ध इतिहास..."),
104
- gr.Slider(0, 4, value=2, step=1, label="Max Edit Distance"),
105
- gr.Slider(1, 5, value=3, step=1, label="Prefix Length"),
106
- gr.Slider(1, 5, value=3, step=1, label="Top-K Suggestions per Word")
107
- ],
108
- outputs=gr.Textbox(label="Corrected Sentence Variants"),
109
- title="Nepali Spell Correction App",
110
- description="Generates corrected sentence variants using SymSpell and a stemmer.",
111
- examples=examples
112
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  if __name__ == "__main__":
115
- iface.launch(share=True)
 
1
  import gradio as gr
2
  import re
3
+ import json
4
  from symspellpy import SymSpell, Verbosity
5
  from nepali_stemmer.stemmer import NepStemmer
6
  from itertools import product
7
+ from typing import List, Dict, Set
8
+
9
+ # ------------------- Paths -------------------
10
+ simplified_only_path = "./data/simplified_only_names2.txt"
11
+ simplified_dict_path = "./data/simplified_dict.txt"
12
+ vocab_path = "./data/vocab.txt"
13
 
14
  # ------------------- Utilities -------------------
15
 
 
23
  with open(filepath, "r", encoding="utf-8") as f:
24
  return {line.strip() for line in f if line.strip()}
25
 
26
+ def save_to_vocab(word: str, filepath: str = vocab_path) -> str:
27
+ word = word.strip()
28
+ if not word:
29
+ return "Invalid input. No word added."
30
+ vocab = load_vocab(filepath)
31
+ if word in vocab:
32
+ return f"'{word}' already exists in the vocab."
33
+ with open(filepath, "a", encoding="utf-8") as f:
34
+ f.write(word + "\n")
35
+ return f"'{word}' added to vocab."
36
+
37
+ def load_simplified_keys(filepath: str) -> Set[str]:
38
+ keys = set()
39
+ with open(filepath, "r", encoding="utf-8") as f:
40
+ for line in f:
41
+ word = line.strip()
42
+ if word:
43
+ keys.add(word)
44
+ return keys
45
+
46
  def load_simplified_map(filepath: str) -> Dict[str, str]:
 
47
  with open(filepath, "r", encoding="utf-8") as f:
48
+ data = json.load(f)
49
+ return {v: k for k, v in data.items()}
50
+
51
+ def list_locations(simplified_keys_file: str = simplified_only_path,
52
+ simplified_map_file: str = simplified_dict_path) -> str:
53
+ simplified_map = load_simplified_map(simplified_map_file)
54
+
55
+ keys = []
56
+ with open(simplified_keys_file, "r", encoding="utf-8") as f:
57
  for line in f:
58
+ line = line.strip()
59
+ if not line:
60
  continue
61
+ key = line.split("$")[0]
62
+ keys.append(key)
63
+
64
+ output_lines = []
65
+ for key in keys:
66
+ original_name = simplified_map.get(key, "Unknown")
67
+ output_lines.append(f"{key} -> {original_name}")
68
+
69
+ return "\n".join(output_lines)
70
 
71
  def init_spellchecker(dict_path: str, max_edit_distance: int, prefix_length: int) -> SymSpell:
72
  sym_spell = SymSpell(max_dictionary_edit_distance=max_edit_distance, prefix_length=prefix_length)
 
83
  top_k: int
84
  ) -> List[str]:
85
 
 
 
 
 
 
86
  sym_spell = init_spellchecker(simplified_only_path, max_edit_distance, prefix_length)
87
  simplified_map = load_simplified_map(simplified_dict_path)
88
  vocab = load_vocab(vocab_path)
 
126
 
127
  # ------------------- Gradio UI -------------------
128
 
129
+ with gr.Blocks(title="Nepali Spell Correction Tool") as demo:
130
+ gr.Markdown(
131
+ """
132
+ # Nepali Spell Correction Tool
133
+ Automatically correct Nepali sentences using spelling correction and stemming techniques.
134
+ """
135
+ )
136
+
137
+ with gr.Row():
138
+ with gr.Column(scale=3):
139
+ gr.Markdown("### Sentence Correction")
140
+ sentence_input = gr.Textbox(label="Input Sentence", placeholder="Enter a Nepali sentence...", lines=2)
141
+ max_dist = gr.Slider(0, 4, value=2, step=1, label="Max Edit Distance")
142
+ prefix_len = gr.Slider(1, 5, value=3, step=1, label="Prefix Length")
143
+ top_k = gr.Slider(1, 5, value=3, step=1, label="Top-K Suggestions")
144
+
145
+ submit_btn = gr.Button("Correct Sentence")
146
+
147
+ gr.Markdown("### Location Vocabulary")
148
+
149
+ with gr.Accordion("View and Manage Locations", open=False):
150
+ loc_out = gr.Textbox(label="Available Locations", lines=8, interactive=False)
151
+ view_btn = gr.Button("Show Locations")
152
+
153
+ # Uncomment below to add vocab management features
154
+ # new_loc = gr.Textbox(label="Add New Place", placeholder="e.g., काठमाडौँ")
155
+ # add_btn = gr.Button("Add Location")
156
+ # add_msg = gr.Textbox(label="Status", interactive=False)
157
+
158
+ with gr.Column(scale=2):
159
+ corrected_out = gr.Textbox(label="Corrected Variants", lines=8)
160
+
161
+ # Bind buttons to functions
162
+ submit_btn.click(
163
+ correct_sentence,
164
+ inputs=[sentence_input, max_dist, prefix_len, top_k],
165
+ outputs=corrected_out
166
+ )
167
+
168
+ view_btn.click(
169
+ list_locations,
170
+ inputs=[],
171
+ outputs=loc_out
172
+ )
173
+
174
+ # add_btn.click(save_to_vocab, inputs=new_loc, outputs=add_msg)
175
+
176
+ # ------------------- Launch App -------------------
177
 
178
  if __name__ == "__main__":
179
+ demo.launch()