nabin2004 commited on
Commit
539887b
·
verified ·
1 Parent(s): c1703cf

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. requirements.txt +9 -158
  2. runed_gradio.py +57 -27
requirements.txt CHANGED
@@ -1,206 +1,57 @@
1
- absl-py==2.2.2
2
- ago==0.1.0
3
  aiofiles==24.1.0
4
  annotated-types==0.7.0
5
- antlr4-python3-runtime==4.8
6
  anyio==4.9.0
7
- asttokens==3.0.0
8
- astunparse==1.6.3
9
- attrs==25.3.0
10
- Automat==25.4.16
11
- beautifulsoup4==4.13.4
12
- bitarray==3.4.1
13
- blinker==1.9.0
14
- boto3==1.38.20
15
- botocore==1.38.20
16
- bs4==0.0.2
17
  certifi==2025.4.26
18
- cffi==1.17.1
19
- chardet==3.0.4
20
  charset-normalizer==3.4.2
21
  click==8.1.8
22
- colorama==0.4.6
23
- constantly==23.10.4
24
- cryptography==45.0.2
25
- cssselect==1.3.0
26
- Cython==3.1.1
27
- decorator==5.2.1
28
- defusedxml==0.7.1
29
- dotmap==1.3.30
30
  editdistpy==0.1.5
31
- elasticsearch==7.17.12
32
- executing==2.2.0
33
- fairseq==0.12.2
34
  fastapi==0.115.12
35
- faust-cchardet==2.1.19
36
- feedfinder2==0.0.4
37
- feedparser==6.0.11
38
  ffmpy==0.5.0
39
  filelock==3.18.0
40
- Flask==3.1.1
41
- flatbuffers==25.2.10
42
- fsspec==2025.3.2
43
- gast==0.6.0
44
- gensim==3.7.3
45
- google-pasta==0.2.0
46
- gradio==5.29.1
47
  gradio_client==1.10.1
48
  groovy==0.1.2
49
- grpcio==1.71.0
50
- gunicorn==23.0.0
51
  h11==0.16.0
52
- h5py==3.13.0
53
- hjson==3.1.0
54
  httpcore==1.0.9
55
  httpx==0.28.1
56
  huggingface-hub==0.31.4
57
- hurry.filesize==0.9
58
- hydra-core==1.0.7
59
- hyperlink==21.0.0
60
- idna==2.8
61
- importlib-resources==1.4.0
62
- incremental==24.7.2
63
- itemadapter==0.11.0
64
- itemloaders==1.3.2
65
- itsdangerous==2.2.0
66
- jedi==0.19.2
67
- jieba3k==0.35.1
68
  Jinja2==3.1.6
69
- jmespath==1.0.1
70
- joblib==1.5.0
71
- keras==3.10.0
72
- langdetect==1.0.9
73
- libclang==18.1.1
74
- lxml==5.4.0
75
- lxml_html_clean==0.4.2
76
- Markdown==3.8
77
  markdown-it-py==3.0.0
78
  MarkupSafe==3.0.2
79
- matplotlib-inline==0.1.7
80
  mdurl==0.1.2
81
- ml_dtypes==0.5.1
82
- mpmath==1.3.0
83
- namex==0.0.9
84
- Nepali-nlp @ git+https://github.com/nabin2004/Nepali_nlp@67dd261ffacdfe7ec6e9c06c57d4768be2f80628
85
  nepali-stemmer==0.0.2
86
- networkx==3.4.2
87
- news-please==1.6.10
88
- newspaper3k==0.2.8
89
- nltk==3.4.5
90
- numpy==2.1.3
91
- nvidia-cublas-cu12==12.6.4.1
92
- nvidia-cuda-cupti-cu12==12.6.80
93
- nvidia-cuda-nvrtc-cu12==12.6.77
94
- nvidia-cuda-runtime-cu12==12.6.77
95
- nvidia-cudnn-cu12==9.5.1.17
96
- nvidia-cufft-cu12==11.3.0.4
97
- nvidia-cufile-cu12==1.11.1.6
98
- nvidia-curand-cu12==10.3.7.77
99
- nvidia-cusolver-cu12==11.7.1.2
100
- nvidia-cusparse-cu12==12.5.4.2
101
- nvidia-cusparselt-cu12==0.6.3
102
- nvidia-nccl-cu12==2.26.2
103
- nvidia-nvjitlink-cu12==12.6.85
104
- nvidia-nvtx-cu12==12.6.77
105
- # omegaconf==2.0.6
106
- opencv-python==4.11.0.86
107
- opt_einsum==3.4.0
108
- optree==0.15.0
109
  orjson==3.10.18
110
  packaging==25.0
111
  pandas==2.2.3
112
- parsel==1.10.0
113
- parso==0.8.4
114
- pexpect==4.9.0
115
  pillow==11.2.1
116
- pipdeptree==2.26.1
117
- plac==1.4.5
118
- portalocker==3.1.1
119
- progressbar2==4.5.0
120
- prompt_toolkit==3.0.51
121
- Protego==0.4.0
122
- protobuf==5.29.4
123
- psycopg2-binary==2.9.10
124
- ptyprocess==0.7.0
125
- pure_eval==0.2.3
126
- pyasn1==0.6.1
127
- pyasn1_modules==0.4.2
128
- pycparser==2.22
129
- pydantic==2.11.4
130
  pydantic_core==2.33.2
131
- PyDispatcher==2.0.7
132
- pydload==1.0.9
133
  pydub==0.25.1
134
  Pygments==2.19.1
135
- PyMySQL==1.1.1
136
- pyOpenSSL==25.1.0
137
- pytesseract==0.3.13
138
  python-dateutil==2.9.0.post0
139
  python-multipart==0.0.20
140
- python-utils==3.9.1
141
  pytz==2025.2
142
  PyYAML==6.0.2
143
- queuelib==1.8.0
144
- readability-lxml==0.8.4.1
145
- regex==2024.11.6
146
- requests==2.22.0
147
- requests-file==2.1.0
148
  rich==14.0.0
149
- ruff==0.11.10
150
- s3transfer==0.12.0
151
- sacrebleu==2.5.1
152
  safehttpx==0.1.6
153
- safetensors==0.5.3
154
- scikit-learn==1.6.1
155
- scipy==1.15.3
156
- Scrapy==2.13.0
157
  semantic-version==2.10.0
158
- sentencepiece==0.2.0
159
- service-identity==24.2.0
160
- setuptools==80.8.0
161
- sgmllib3k==1.0.0
162
  shellingham==1.5.4
163
  six==1.17.0
164
- smart-open==7.1.0
165
  sniffio==1.3.1
166
- snowballstemmer==3.0.1
167
- soupsieve==2.7
168
- spello==1.2.0
169
- stack-data==0.6.3
170
  starlette==0.46.2
171
- sympy==1.14.0
172
  symspellpy==6.9.0
173
- tabulate==0.9.0
174
- tensorboard==2.19.0
175
- tensorboard-data-server==0.7.2
176
- tensorboardX==2.6.2.2
177
- tensorflow==2.19.0
178
- termcolor==3.1.0
179
- threadpoolctl==3.6.0
180
- tinysegmenter==0.3
181
- tldextract==5.3.0
182
- tokenizers==0.21.1
183
  tomlkit==0.13.2
184
- torch==2.7.0
185
- torchaudio==2.7.0
186
  tqdm==4.67.1
187
- traitlets==5.14.3
188
- transformers==4.52.1
189
- triton==3.3.0
190
- Twisted==24.11.0
191
  typer==0.15.4
192
- typing-inspection==0.4.0
193
  typing_extensions==4.13.2
194
  tzdata==2025.2
195
  urllib3==2.4.0
196
- uv==0.7.7
197
  uvicorn==0.34.2
198
- w3lib==2.3.1
199
- warcio==1.7.5
200
- wcwidth==0.2.13
201
  websockets==15.0.1
202
- Werkzeug==3.1.3
203
- wget==3.2
204
- wheel==0.45.1
205
- wrapt==1.17.2
206
- zope.interface==7.2
 
 
 
1
  aiofiles==24.1.0
2
  annotated-types==0.7.0
 
3
  anyio==4.9.0
 
 
 
 
 
 
 
 
 
 
4
  certifi==2025.4.26
 
 
5
  charset-normalizer==3.4.2
6
  click==8.1.8
 
 
 
 
 
 
 
 
7
  editdistpy==0.1.5
 
 
 
8
  fastapi==0.115.12
 
 
 
9
  ffmpy==0.5.0
10
  filelock==3.18.0
11
+ fsspec==2025.5.0
12
+ gradio==5.31.0
 
 
 
 
 
13
  gradio_client==1.10.1
14
  groovy==0.1.2
 
 
15
  h11==0.16.0
 
 
16
  httpcore==1.0.9
17
  httpx==0.28.1
18
  huggingface-hub==0.31.4
19
+ idna==3.10
20
+ importlib_resources==6.5.2
 
 
 
 
 
 
 
 
 
21
  Jinja2==3.1.6
 
 
 
 
 
 
 
 
22
  markdown-it-py==3.0.0
23
  MarkupSafe==3.0.2
 
24
  mdurl==0.1.2
 
 
 
 
25
  nepali-stemmer==0.0.2
26
+ numpy==2.2.6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  orjson==3.10.18
28
  packaging==25.0
29
  pandas==2.2.3
 
 
 
30
  pillow==11.2.1
31
+ pydantic==2.11.5
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  pydantic_core==2.33.2
 
 
33
  pydub==0.25.1
34
  Pygments==2.19.1
 
 
 
35
  python-dateutil==2.9.0.post0
36
  python-multipart==0.0.20
 
37
  pytz==2025.2
38
  PyYAML==6.0.2
39
+ requests==2.32.3
 
 
 
 
40
  rich==14.0.0
41
+ ruff==0.11.11
 
 
42
  safehttpx==0.1.6
 
 
 
 
43
  semantic-version==2.10.0
 
 
 
 
44
  shellingham==1.5.4
45
  six==1.17.0
 
46
  sniffio==1.3.1
 
 
 
 
47
  starlette==0.46.2
 
48
  symspellpy==6.9.0
 
 
 
 
 
 
 
 
 
 
49
  tomlkit==0.13.2
 
 
50
  tqdm==4.67.1
 
 
 
 
51
  typer==0.15.4
52
+ typing-inspection==0.4.1
53
  typing_extensions==4.13.2
54
  tzdata==2025.2
55
  urllib3==2.4.0
 
56
  uvicorn==0.34.2
 
 
 
57
  websockets==15.0.1
 
 
 
 
 
runed_gradio.py CHANGED
@@ -34,22 +34,15 @@ def save_to_vocab(word: str, filepath: str = vocab_path) -> str:
34
  f.write(word + "\n")
35
  return f"'{word}' added to vocab."
36
 
37
- def load_simplified_keys(filepath: str) -> Set[str]:
38
- keys = set()
39
- with open(filepath, "r", encoding="utf-8") as f:
40
- for line in f:
41
- word = line.strip()
42
- if word:
43
- keys.add(word)
44
- return keys
45
-
46
  def load_simplified_map(filepath: str) -> Dict[str, str]:
47
  with open(filepath, "r", encoding="utf-8") as f:
48
  data = json.load(f)
49
  return {v: k for k, v in data.items()}
50
 
51
- def list_locations(simplified_keys_file: str = simplified_only_path,
52
- simplified_map_file: str = simplified_dict_path) -> str:
 
 
53
  simplified_map = load_simplified_map(simplified_map_file)
54
 
55
  keys = []
@@ -58,15 +51,15 @@ def list_locations(simplified_keys_file: str = simplified_only_path,
58
  line = line.strip()
59
  if not line:
60
  continue
61
- key = line.split("$")[0]
62
  keys.append(key)
63
 
64
- output_lines = []
65
  for key in keys:
66
  original_name = simplified_map.get(key, "Unknown")
67
- output_lines.append(f"{key} -> {original_name}")
68
 
69
- return "\n".join(output_lines)
70
 
71
  def init_spellchecker(dict_path: str, max_edit_distance: int, prefix_length: int) -> SymSpell:
72
  sym_spell = SymSpell(max_dictionary_edit_distance=max_edit_distance, prefix_length=prefix_length)
@@ -81,7 +74,7 @@ def correct_sentence(
81
  max_edit_distance: int,
82
  prefix_length: int,
83
  top_k: int
84
- ) -> List[str]:
85
 
86
  sym_spell = init_spellchecker(simplified_only_path, max_edit_distance, prefix_length)
87
  simplified_map = load_simplified_map(simplified_dict_path)
@@ -122,7 +115,7 @@ def correct_sentence(
122
  sentence_options.append(correction_list)
123
 
124
  corrected_variants = [' '.join(variant) for variant in product(*sentence_options)]
125
- return corrected_variants
126
 
127
  # ------------------- Gradio UI -------------------
128
 
@@ -130,35 +123,72 @@ with gr.Blocks(title="Nepali Spell Correction Tool") as demo:
130
  gr.Markdown(
131
  """
132
  # Nepali Spell Correction Tool
133
- Automatically correct Nepali sentences using spelling correction and stemming techniques.
 
134
  """
135
  )
136
 
 
 
 
 
 
 
137
  with gr.Row():
138
  with gr.Column(scale=3):
139
- gr.Markdown("### Sentence Correction")
140
- sentence_input = gr.Textbox(label="Input Sentence", placeholder="Enter a Nepali sentence...", lines=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  max_dist = gr.Slider(0, 4, value=2, step=1, label="Max Edit Distance")
142
  prefix_len = gr.Slider(1, 5, value=3, step=1, label="Prefix Length")
143
  top_k = gr.Slider(1, 5, value=3, step=1, label="Top-K Suggestions")
144
 
145
  submit_btn = gr.Button("Correct Sentence")
146
 
147
- gr.Markdown("### Location Vocabulary")
148
 
149
- with gr.Accordion("View and Manage Locations", open=False):
150
- loc_out = gr.Textbox(label="Available Locations", lines=8, interactive=False)
 
 
 
 
 
 
151
  view_btn = gr.Button("Show Locations")
152
 
153
- # Uncomment below to add vocab management features
154
  # new_loc = gr.Textbox(label="Add New Place", placeholder="e.g., काठमाडौँ")
155
  # add_btn = gr.Button("Add Location")
156
  # add_msg = gr.Textbox(label="Status", interactive=False)
157
 
158
  with gr.Column(scale=2):
159
- corrected_out = gr.Textbox(label="Corrected Variants", lines=8)
 
 
 
 
 
 
160
 
161
- # Bind buttons to functions
162
  submit_btn.click(
163
  correct_sentence,
164
  inputs=[sentence_input, max_dist, prefix_len, top_k],
@@ -166,7 +196,7 @@ with gr.Blocks(title="Nepali Spell Correction Tool") as demo:
166
  )
167
 
168
  view_btn.click(
169
- list_locations,
170
  inputs=[],
171
  outputs=loc_out
172
  )
 
34
  f.write(word + "\n")
35
  return f"'{word}' added to vocab."
36
 
 
 
 
 
 
 
 
 
 
37
  def load_simplified_map(filepath: str) -> Dict[str, str]:
38
  with open(filepath, "r", encoding="utf-8") as f:
39
  data = json.load(f)
40
  return {v: k for k, v in data.items()}
41
 
42
+ def list_locations_as_table(
43
+ simplified_keys_file: str = simplified_only_path,
44
+ simplified_map_file: str = simplified_dict_path
45
+ ) -> List[List[str]]:
46
  simplified_map = load_simplified_map(simplified_map_file)
47
 
48
  keys = []
 
51
  line = line.strip()
52
  if not line:
53
  continue
54
+ key = line.split("$")[0]
55
  keys.append(key)
56
 
57
+ output_table = []
58
  for key in keys:
59
  original_name = simplified_map.get(key, "Unknown")
60
+ output_table.append([key, original_name])
61
 
62
+ return output_table
63
 
64
  def init_spellchecker(dict_path: str, max_edit_distance: int, prefix_length: int) -> SymSpell:
65
  sym_spell = SymSpell(max_dictionary_edit_distance=max_edit_distance, prefix_length=prefix_length)
 
74
  max_edit_distance: int,
75
  prefix_length: int,
76
  top_k: int
77
+ ) -> List[List[str]]:
78
 
79
  sym_spell = init_spellchecker(simplified_only_path, max_edit_distance, prefix_length)
80
  simplified_map = load_simplified_map(simplified_dict_path)
 
115
  sentence_options.append(correction_list)
116
 
117
  corrected_variants = [' '.join(variant) for variant in product(*sentence_options)]
118
+ return [[variant] for variant in corrected_variants]
119
 
120
  # ------------------- Gradio UI -------------------
121
 
 
123
  gr.Markdown(
124
  """
125
  # Nepali Spell Correction Tool
126
+
127
+ Enter a Nepali sentence to generate corrected variants. You can also view and manage the location vocabulary.
128
  """
129
  )
130
 
131
+ example_sentences = {
132
+ "Example 1": "भतपरको जिज्ञासु वातावरणले धेरै पर्यटकलाई आकर्षित गर्छ।",
133
+ "Example 2": "ललतपुर प्राचीन मूर्तिकला र वास्तुकलाको केन्द्र हो।",
134
+ "Example 3": "पोखराेाै प्रकृतिक सौन्दर्यले भरिपूर्ण शहर हो।"
135
+ }
136
+
137
  with gr.Row():
138
  with gr.Column(scale=3):
139
+ gr.Markdown("## Sentence Correction")
140
+
141
+ example_dropdown = gr.Dropdown(
142
+ label="Choose Example Sentence",
143
+ choices=list(example_sentences.values()),
144
+ value=list(example_sentences.values())[0],
145
+ interactive=True
146
+ )
147
+
148
+ sentence_input = gr.Textbox(
149
+ label="Input Sentence",
150
+ value=list(example_sentences.values())[0],
151
+ placeholder="Enter a Nepali sentence",
152
+ lines=2
153
+ )
154
+
155
+ def set_example(example):
156
+ return example
157
+
158
+ example_dropdown.change(set_example, inputs=[example_dropdown], outputs=[sentence_input])
159
+
160
  max_dist = gr.Slider(0, 4, value=2, step=1, label="Max Edit Distance")
161
  prefix_len = gr.Slider(1, 5, value=3, step=1, label="Prefix Length")
162
  top_k = gr.Slider(1, 5, value=3, step=1, label="Top-K Suggestions")
163
 
164
  submit_btn = gr.Button("Correct Sentence")
165
 
166
+ gr.Markdown("## Location Vocabulary Table")
167
 
168
+ with gr.Accordion("View or Manage Location Vocabulary", open=False):
169
+ loc_out = gr.Dataframe(
170
+ headers=["Simplified Form", "Original Name"],
171
+ datatype=["str", "str"],
172
+ row_count=5,
173
+ interactive=False,
174
+ label="Location Vocabulary"
175
+ )
176
  view_btn = gr.Button("Show Locations")
177
 
178
+ # Uncomment below to enable adding new locations
179
  # new_loc = gr.Textbox(label="Add New Place", placeholder="e.g., काठमाडौँ")
180
  # add_btn = gr.Button("Add Location")
181
  # add_msg = gr.Textbox(label="Status", interactive=False)
182
 
183
  with gr.Column(scale=2):
184
+ gr.Markdown("## Corrected Sentence Variants")
185
+ corrected_out = gr.Dataframe(
186
+ headers=["Corrected Sentence Variants"],
187
+ datatype=["str"],
188
+ row_count=5,
189
+ interactive=False
190
+ )
191
 
 
192
  submit_btn.click(
193
  correct_sentence,
194
  inputs=[sentence_input, max_dist, prefix_len, top_k],
 
196
  )
197
 
198
  view_btn.click(
199
+ list_locations_as_table,
200
  inputs=[],
201
  outputs=loc_out
202
  )