Spaces:

nabin2004
/

SymSpell_for_Post_processing_ASR_applications

Sleeping

App Files Files Community

nabin2004 commited on May 23, 2025

Commit

539887b

verified ·

1 Parent(s): c1703cf

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

requirements.txt +9 -158
runed_gradio.py +57 -27

requirements.txt CHANGED Viewed

@@ -1,206 +1,57 @@
-absl-py==2.2.2
-ago==0.1.0
 aiofiles==24.1.0
 annotated-types==0.7.0
-antlr4-python3-runtime==4.8
 anyio==4.9.0
-asttokens==3.0.0
-astunparse==1.6.3
-attrs==25.3.0
-Automat==25.4.16
-beautifulsoup4==4.13.4
-bitarray==3.4.1
-blinker==1.9.0
-boto3==1.38.20
-botocore==1.38.20
-bs4==0.0.2
 certifi==2025.4.26
-cffi==1.17.1
-chardet==3.0.4
 charset-normalizer==3.4.2
 click==8.1.8
-colorama==0.4.6
-constantly==23.10.4
-cryptography==45.0.2
-cssselect==1.3.0
-Cython==3.1.1
-decorator==5.2.1
-defusedxml==0.7.1
-dotmap==1.3.30
 editdistpy==0.1.5
-elasticsearch==7.17.12
-executing==2.2.0
-fairseq==0.12.2
 fastapi==0.115.12
-faust-cchardet==2.1.19
-feedfinder2==0.0.4
-feedparser==6.0.11
 ffmpy==0.5.0
 filelock==3.18.0
-Flask==3.1.1
-flatbuffers==25.2.10
-fsspec==2025.3.2
-gast==0.6.0
-gensim==3.7.3
-google-pasta==0.2.0
-gradio==5.29.1
 gradio_client==1.10.1
 groovy==0.1.2
-grpcio==1.71.0
-gunicorn==23.0.0
 h11==0.16.0
-h5py==3.13.0
-hjson==3.1.0
 httpcore==1.0.9
 httpx==0.28.1
 huggingface-hub==0.31.4
-hurry.filesize==0.9
-hydra-core==1.0.7
-hyperlink==21.0.0
-idna==2.8
-importlib-resources==1.4.0
-incremental==24.7.2
-itemadapter==0.11.0
-itemloaders==1.3.2
-itsdangerous==2.2.0
-jedi==0.19.2
-jieba3k==0.35.1
 Jinja2==3.1.6
-jmespath==1.0.1
-joblib==1.5.0
-keras==3.10.0
-langdetect==1.0.9
-libclang==18.1.1
-lxml==5.4.0
-lxml_html_clean==0.4.2
-Markdown==3.8
 markdown-it-py==3.0.0
 MarkupSafe==3.0.2
-matplotlib-inline==0.1.7
 mdurl==0.1.2
-ml_dtypes==0.5.1
-mpmath==1.3.0
-namex==0.0.9
-Nepali-nlp @ git+https://github.com/nabin2004/Nepali_nlp@67dd261ffacdfe7ec6e9c06c57d4768be2f80628
 nepali-stemmer==0.0.2
-networkx==3.4.2
-news-please==1.6.10
-newspaper3k==0.2.8
-nltk==3.4.5
-numpy==2.1.3
-nvidia-cublas-cu12==12.6.4.1
-nvidia-cuda-cupti-cu12==12.6.80
-nvidia-cuda-nvrtc-cu12==12.6.77
-nvidia-cuda-runtime-cu12==12.6.77
-nvidia-cudnn-cu12==9.5.1.17
-nvidia-cufft-cu12==11.3.0.4
-nvidia-cufile-cu12==1.11.1.6
-nvidia-curand-cu12==10.3.7.77
-nvidia-cusolver-cu12==11.7.1.2
-nvidia-cusparse-cu12==12.5.4.2
-nvidia-cusparselt-cu12==0.6.3
-nvidia-nccl-cu12==2.26.2
-nvidia-nvjitlink-cu12==12.6.85
-nvidia-nvtx-cu12==12.6.77
-# omegaconf==2.0.6
-opencv-python==4.11.0.86
-opt_einsum==3.4.0
-optree==0.15.0
 orjson==3.10.18
 packaging==25.0
 pandas==2.2.3
-parsel==1.10.0
-parso==0.8.4
-pexpect==4.9.0
 pillow==11.2.1
-pipdeptree==2.26.1
-plac==1.4.5
-portalocker==3.1.1
-progressbar2==4.5.0
-prompt_toolkit==3.0.51
-Protego==0.4.0
-protobuf==5.29.4
-psycopg2-binary==2.9.10
-ptyprocess==0.7.0
-pure_eval==0.2.3
-pyasn1==0.6.1
-pyasn1_modules==0.4.2
-pycparser==2.22
-pydantic==2.11.4
 pydantic_core==2.33.2
-PyDispatcher==2.0.7
-pydload==1.0.9
 pydub==0.25.1
 Pygments==2.19.1
-PyMySQL==1.1.1
-pyOpenSSL==25.1.0
-pytesseract==0.3.13
 python-dateutil==2.9.0.post0
 python-multipart==0.0.20
-python-utils==3.9.1
 pytz==2025.2
 PyYAML==6.0.2
-queuelib==1.8.0
-readability-lxml==0.8.4.1
-regex==2024.11.6
-requests==2.22.0
-requests-file==2.1.0
 rich==14.0.0
-ruff==0.11.10
-s3transfer==0.12.0
-sacrebleu==2.5.1
 safehttpx==0.1.6
-safetensors==0.5.3
-scikit-learn==1.6.1
-scipy==1.15.3
-Scrapy==2.13.0
 semantic-version==2.10.0
-sentencepiece==0.2.0
-service-identity==24.2.0
-setuptools==80.8.0
-sgmllib3k==1.0.0
 shellingham==1.5.4
 six==1.17.0
-smart-open==7.1.0
 sniffio==1.3.1
-snowballstemmer==3.0.1
-soupsieve==2.7
-spello==1.2.0
-stack-data==0.6.3
 starlette==0.46.2
-sympy==1.14.0
 symspellpy==6.9.0
-tabulate==0.9.0
-tensorboard==2.19.0
-tensorboard-data-server==0.7.2
-tensorboardX==2.6.2.2
-tensorflow==2.19.0
-termcolor==3.1.0
-threadpoolctl==3.6.0
-tinysegmenter==0.3
-tldextract==5.3.0
-tokenizers==0.21.1
 tomlkit==0.13.2
-torch==2.7.0
-torchaudio==2.7.0
 tqdm==4.67.1
-traitlets==5.14.3
-transformers==4.52.1
-triton==3.3.0
-Twisted==24.11.0
 typer==0.15.4
-typing-inspection==0.4.0
 typing_extensions==4.13.2
 tzdata==2025.2
 urllib3==2.4.0
-uv==0.7.7
 uvicorn==0.34.2
-w3lib==2.3.1
-warcio==1.7.5
-wcwidth==0.2.13
 websockets==15.0.1
-Werkzeug==3.1.3
-wget==3.2
-wheel==0.45.1
-wrapt==1.17.2
-zope.interface==7.2

 aiofiles==24.1.0
 annotated-types==0.7.0
 anyio==4.9.0
 certifi==2025.4.26
 charset-normalizer==3.4.2
 click==8.1.8
 editdistpy==0.1.5
 fastapi==0.115.12
 ffmpy==0.5.0
 filelock==3.18.0
+fsspec==2025.5.0
+gradio==5.31.0
 gradio_client==1.10.1
 groovy==0.1.2
 h11==0.16.0
 httpcore==1.0.9
 httpx==0.28.1
 huggingface-hub==0.31.4
+idna==3.10
+importlib_resources==6.5.2
 Jinja2==3.1.6
 markdown-it-py==3.0.0
 MarkupSafe==3.0.2
 mdurl==0.1.2
 nepali-stemmer==0.0.2
+numpy==2.2.6
 orjson==3.10.18
 packaging==25.0
 pandas==2.2.3
 pillow==11.2.1
+pydantic==2.11.5
 pydantic_core==2.33.2
 pydub==0.25.1
 Pygments==2.19.1
 python-dateutil==2.9.0.post0
 python-multipart==0.0.20
 pytz==2025.2
 PyYAML==6.0.2
+requests==2.32.3
 rich==14.0.0
+ruff==0.11.11
 safehttpx==0.1.6
 semantic-version==2.10.0
 shellingham==1.5.4
 six==1.17.0
 sniffio==1.3.1
 starlette==0.46.2
 symspellpy==6.9.0
 tomlkit==0.13.2
 tqdm==4.67.1
 typer==0.15.4
+typing-inspection==0.4.1
 typing_extensions==4.13.2
 tzdata==2025.2
 urllib3==2.4.0
 uvicorn==0.34.2
 websockets==15.0.1

runed_gradio.py CHANGED Viewed

@@ -34,22 +34,15 @@ def save_to_vocab(word: str, filepath: str = vocab_path) -> str:
         f.write(word + "\n")
     return f"'{word}' added to vocab."
-def load_simplified_keys(filepath: str) -> Set[str]:
-    keys = set()
-    with open(filepath, "r", encoding="utf-8") as f:
-        for line in f:
-            word = line.strip()
-            if word:
-                keys.add(word)
-    return keys
 def load_simplified_map(filepath: str) -> Dict[str, str]:
     with open(filepath, "r", encoding="utf-8") as f:
         data = json.load(f)
     return {v: k for k, v in data.items()}
-def list_locations(simplified_keys_file: str = simplified_only_path,
-                   simplified_map_file: str = simplified_dict_path) -> str:
     simplified_map = load_simplified_map(simplified_map_file)
     keys = []
@@ -58,15 +51,15 @@ def list_locations(simplified_keys_file: str = simplified_only_path,
             line = line.strip()
             if not line:
                 continue
-            key = line.split("$")[0]
             keys.append(key)
-    output_lines = []
     for key in keys:
         original_name = simplified_map.get(key, "Unknown")
-        output_lines.append(f"{key}  ->  {original_name}")
-    return "\n".join(output_lines)
 def init_spellchecker(dict_path: str, max_edit_distance: int, prefix_length: int) -> SymSpell:
     sym_spell = SymSpell(max_dictionary_edit_distance=max_edit_distance, prefix_length=prefix_length)
@@ -81,7 +74,7 @@ def correct_sentence(
     max_edit_distance: int,
     prefix_length: int,
     top_k: int
-) -> List[str]:
     sym_spell = init_spellchecker(simplified_only_path, max_edit_distance, prefix_length)
     simplified_map = load_simplified_map(simplified_dict_path)
@@ -122,7 +115,7 @@ def correct_sentence(
         sentence_options.append(correction_list)
     corrected_variants = [' '.join(variant) for variant in product(*sentence_options)]
-    return corrected_variants
 # ------------------- Gradio UI -------------------
@@ -130,35 +123,72 @@ with gr.Blocks(title="Nepali Spell Correction Tool") as demo:
     gr.Markdown(
         """
         # Nepali Spell Correction Tool
-        Automatically correct Nepali sentences using spelling correction and stemming techniques.
         """
     )
     with gr.Row():
         with gr.Column(scale=3):
-            gr.Markdown("### Sentence Correction")
-            sentence_input = gr.Textbox(label="Input Sentence", placeholder="Enter a Nepali sentence...", lines=2)
             max_dist = gr.Slider(0, 4, value=2, step=1, label="Max Edit Distance")
             prefix_len = gr.Slider(1, 5, value=3, step=1, label="Prefix Length")
             top_k = gr.Slider(1, 5, value=3, step=1, label="Top-K Suggestions")
             submit_btn = gr.Button("Correct Sentence")
-            gr.Markdown("### Location Vocabulary")
-            with gr.Accordion("View and Manage Locations", open=False):
-                loc_out = gr.Textbox(label="Available Locations", lines=8, interactive=False)
                 view_btn = gr.Button("Show Locations")
-                # Uncomment below to add vocab management features
                 # new_loc = gr.Textbox(label="Add New Place", placeholder="e.g., काठमाडौँ")
                 # add_btn = gr.Button("Add Location")
                 # add_msg = gr.Textbox(label="Status", interactive=False)
         with gr.Column(scale=2):
-            corrected_out = gr.Textbox(label="Corrected Variants", lines=8)
-    # Bind buttons to functions
     submit_btn.click(
         correct_sentence,
         inputs=[sentence_input, max_dist, prefix_len, top_k],
@@ -166,7 +196,7 @@ with gr.Blocks(title="Nepali Spell Correction Tool") as demo:
     )
     view_btn.click(
-        list_locations,
         inputs=[],
         outputs=loc_out
     )

         f.write(word + "\n")
     return f"'{word}' added to vocab."
 def load_simplified_map(filepath: str) -> Dict[str, str]:
     with open(filepath, "r", encoding="utf-8") as f:
         data = json.load(f)
     return {v: k for k, v in data.items()}
+def list_locations_as_table(
+    simplified_keys_file: str = simplified_only_path,
+    simplified_map_file: str = simplified_dict_path
+) -> List[List[str]]:
     simplified_map = load_simplified_map(simplified_map_file)
     keys = []
             line = line.strip()
             if not line:
                 continue
+            key = line.split("$")[0]
             keys.append(key)
+    output_table = []
     for key in keys:
         original_name = simplified_map.get(key, "Unknown")
+        output_table.append([key, original_name])
+    return output_table
 def init_spellchecker(dict_path: str, max_edit_distance: int, prefix_length: int) -> SymSpell:
     sym_spell = SymSpell(max_dictionary_edit_distance=max_edit_distance, prefix_length=prefix_length)
     max_edit_distance: int,
     prefix_length: int,
     top_k: int
+) -> List[List[str]]:
     sym_spell = init_spellchecker(simplified_only_path, max_edit_distance, prefix_length)
     simplified_map = load_simplified_map(simplified_dict_path)
         sentence_options.append(correction_list)
     corrected_variants = [' '.join(variant) for variant in product(*sentence_options)]
+    return [[variant] for variant in corrected_variants]
 # ------------------- Gradio UI -------------------
     gr.Markdown(
         """
         # Nepali Spell Correction Tool
+        Enter a Nepali sentence to generate corrected variants. You can also view and manage the location vocabulary.
         """
     )
+    example_sentences = {
+        "Example 1": "भतपरको  जिज्ञासु वातावरणले धेरै पर्यटकलाई आकर्षित गर्छ।",
+        "Example 2": "ललतपुर प्राचीन मूर्तिकला र वास्तुकलाको केन्द्र हो।",
+        "Example 3": "पोखराेाै प्रकृतिक सौन्दर्यले भरिपूर्ण शहर हो।"
+    }
     with gr.Row():
         with gr.Column(scale=3):
+            gr.Markdown("## Sentence Correction")
+            example_dropdown = gr.Dropdown(
+                label="Choose Example Sentence",
+                choices=list(example_sentences.values()),
+                value=list(example_sentences.values())[0],
+                interactive=True
+            )
+            sentence_input = gr.Textbox(
+                label="Input Sentence",
+                value=list(example_sentences.values())[0],
+                placeholder="Enter a Nepali sentence",
+                lines=2
+            )
+            def set_example(example):
+                return example
+            example_dropdown.change(set_example, inputs=[example_dropdown], outputs=[sentence_input])
             max_dist = gr.Slider(0, 4, value=2, step=1, label="Max Edit Distance")
             prefix_len = gr.Slider(1, 5, value=3, step=1, label="Prefix Length")
             top_k = gr.Slider(1, 5, value=3, step=1, label="Top-K Suggestions")
             submit_btn = gr.Button("Correct Sentence")
+            gr.Markdown("## Location Vocabulary Table")
+            with gr.Accordion("View or Manage Location Vocabulary", open=False):
+                loc_out = gr.Dataframe(
+                    headers=["Simplified Form", "Original Name"],
+                    datatype=["str", "str"],
+                    row_count=5,
+                    interactive=False,
+                    label="Location Vocabulary"
+                )
                 view_btn = gr.Button("Show Locations")
+                # Uncomment below to enable adding new locations
                 # new_loc = gr.Textbox(label="Add New Place", placeholder="e.g., काठमाडौँ")
                 # add_btn = gr.Button("Add Location")
                 # add_msg = gr.Textbox(label="Status", interactive=False)
         with gr.Column(scale=2):
+            gr.Markdown("## Corrected Sentence Variants")
+            corrected_out = gr.Dataframe(
+                headers=["Corrected Sentence Variants"],
+                datatype=["str"],
+                row_count=5,
+                interactive=False
+            )
     submit_btn.click(
         correct_sentence,
         inputs=[sentence_input, max_dist, prefix_len, top_k],
     )
     view_btn.click(
+        list_locations_as_table,
         inputs=[],
         outputs=loc_out
     )