Spaces:
Running
Running
File size: 1,730 Bytes
d14409f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
from hazm import Normalizer, word_tokenize, Lemmatizer, POSTagger, Chunker, DependencyParser
# Initialize Hazm components
normalizer = Normalizer()
lemmatizer = Lemmatizer()
tagger = POSTagger(model='resources/postagger.model')
chunker = Chunker(model='resources/chunker.model')
parser = DependencyParser(tagger=tagger, lemmatizer=lemmatizer)
def process_text(text, operations):
result = {}
if 'normalize' in operations:
text = normalizer.normalize(text)
result['Normalized Text'] = text
if 'tokenize' in operations:
tokens = word_tokenize(text)
result['Tokens'] = tokens
if 'lemmatize' in operations:
lemmas = [lemmatizer.lemmatize(token) for token in word_tokenize(text)]
result['Lemmas'] = lemmas
if 'pos_tag' in operations:
pos_tags = tagger.tag(word_tokenize(text))
result['POS Tags'] = pos_tags
if 'chunk' in operations:
pos_tags = tagger.tag(word_tokenize(text))
chunks = chunker.parse(pos_tags)
result['Chunks'] = str(chunks)
if 'dependency_parse' in operations:
parse_tree = parser.parse(word_tokenize(text))
result['Dependency Parse'] = str(parse_tree)
return result
# Define Gradio interface
operations = ['normalize', 'tokenize', 'lemmatize', 'pos_tag', 'chunk', 'dependency_parse']
iface = gr.Interface(
fn=process_text,
inputs=[
gr.inputs.Textbox(lines=10, label="Input Text"),
gr.inputs.CheckboxGroup(operations, label="Operations")
],
outputs="json",
title="Persian Text Processor with Hazm",
description="Select operations to perform on the input text using Hazm."
)
if __name__ == "__main__":
iface.launch()
|