Spaces:
Running
Running
| import gradio as gr | |
| from hazm import Normalizer, word_tokenize, Lemmatizer, Chunker | |
| # Initialize Hazm components | |
| lemmatizer = Lemmatizer() | |
| chunker = Chunker(model='resources/chunker.model') | |
| def process_text(text, operation, correct_spacing, remove_diacritics, remove_specials_chars, decrease_repeated_chars, persian_style, persian_numbers, unicodes_replacement, seperate_mi): | |
| # Initialize the Normalizer with user-selected parameters | |
| normalizer = Normalizer( | |
| correct_spacing=correct_spacing, | |
| remove_diacritics=remove_diacritics, | |
| remove_specials_chars=remove_specials_chars, | |
| decrease_repeated_chars=decrease_repeated_chars, | |
| persian_style=persian_style, | |
| persian_numbers=persian_numbers, | |
| unicodes_replacement=unicodes_replacement, | |
| seperate_mi=seperate_mi | |
| ) | |
| result = "" | |
| if operation == "normalize": | |
| result = normalizer.normalize(text) | |
| elif operation == "tokenize": | |
| tokens = word_tokenize(text) | |
| result = " ".join(tokens) # Show tokens as a space-separated string | |
| elif operation == "lemmatize": | |
| lemmas = [lemmatizer.lemmatize(token) for token in word_tokenize(text)] | |
| result = " ".join(lemmas) # Show lemmas as a space-separated string | |
| elif operation == "chunk": | |
| pos_tags = word_tokenize(text) | |
| chunks = chunker.parse(pos_tags) | |
| result = str(chunks) # Show chunks as text | |
| return result | |
| # Define Gradio interface | |
| operations = ['normalize', 'tokenize', 'lemmatize', 'chunk'] | |
| iface = gr.Interface( | |
| fn=process_text, | |
| inputs=[ | |
| gr.Textbox(lines=10, label="Input Text"), | |
| gr.Radio(operations, label="Select Operation", type="value"), # Radio button to select one operation at a time | |
| gr.Checkbox(value=True, label="Correct Spacing", interactive=True), | |
| gr.Checkbox(value=True, label="Remove Diacritics", interactive=True), | |
| gr.Checkbox(value=True, label="Remove Special Characters", interactive=True), | |
| gr.Checkbox(value=True, label="Decrease Repeated Characters", interactive=True), | |
| gr.Checkbox(value=True, label="Persian Style", interactive=True), | |
| gr.Checkbox(value=True, label="Persian Numbers", interactive=True), | |
| gr.Checkbox(value=True, label="Unicodes Replacement", interactive=True), | |
| gr.Checkbox(value=True, label="Separate 'می'", interactive=True) | |
| ], | |
| outputs=gr.Textbox(label="Processed Text", interactive=False, lines=10, show_copy_button=True, show_label=True), # Output as copyable text | |
| title="Persian Text Processor with Hazm", | |
| description="Select an operation and normalization parameters to process the input text using Hazm." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |