Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from hazm import Normalizer, word_tokenize, Lemmatizer, Chunker
|
| 3 |
|
| 4 |
# Initialize Hazm components
|
| 5 |
lemmatizer = Lemmatizer()
|
| 6 |
-
|
|
|
|
| 7 |
|
| 8 |
def process_text(text, operation, correct_spacing, remove_diacritics, remove_specials_chars, decrease_repeated_chars, persian_style, persian_numbers, unicodes_replacement, seperate_mi):
|
| 9 |
# Initialize the Normalizer with user-selected parameters
|
|
@@ -29,8 +30,10 @@ def process_text(text, operation, correct_spacing, remove_diacritics, remove_spe
|
|
| 29 |
lemmas = [lemmatizer.lemmatize(token) for token in word_tokenize(text)]
|
| 30 |
result = " ".join(lemmas) # Show lemmas as a space-separated string
|
| 31 |
elif operation == "chunk":
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
| 34 |
result = str(chunks) # Show chunks as text
|
| 35 |
|
| 36 |
return result
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from hazm import Normalizer, word_tokenize, Lemmatizer, POSTagger, Chunker
|
| 3 |
|
| 4 |
# Initialize Hazm components
|
| 5 |
lemmatizer = Lemmatizer()
|
| 6 |
+
pos_tagger = POSTagger(model='resources/pos_tagger.model') # Load POS Tagger model
|
| 7 |
+
chunker = Chunker(model='resources/chunker.model') # Load Chunker model
|
| 8 |
|
| 9 |
def process_text(text, operation, correct_spacing, remove_diacritics, remove_specials_chars, decrease_repeated_chars, persian_style, persian_numbers, unicodes_replacement, seperate_mi):
|
| 10 |
# Initialize the Normalizer with user-selected parameters
|
|
|
|
| 30 |
lemmas = [lemmatizer.lemmatize(token) for token in word_tokenize(text)]
|
| 31 |
result = " ".join(lemmas) # Show lemmas as a space-separated string
|
| 32 |
elif operation == "chunk":
|
| 33 |
+
# Tokenize and tag the input text
|
| 34 |
+
tokens = word_tokenize(text)
|
| 35 |
+
pos_tags = pos_tagger.tag(tokens) # Generate POS tags
|
| 36 |
+
chunks = chunker.parse(pos_tags) # Pass tagged tokens to Chunker
|
| 37 |
result = str(chunks) # Show chunks as text
|
| 38 |
|
| 39 |
return result
|