Spaces:

ShynBui
/

process

Sleeping

ShynBui commited on Feb 22, 2024

Commit

b5a336b

verified ·

1 Parent(s): a5ded10

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,7 +1,44 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

 import gradio as gr
+import re
+import os
+import py_vncorenlp
+def preprocess_text(text):
+    # Loại bỏ các ký tự đặc biệt và dấu câu
+    text = re.sub(r'[^\w\s]', '', text)
+    # Loại bỏ URL
+    text = re.sub(r'http\S+', '', text)
+    # Loại bỏ đường dẫn file
+    text = re.sub(r'\/\w+', '', text)
+    return text
+def remove_escape_sequences(text):
+    escape_sequences = ['\n', '\t', '\r', '\\']
+    for sequence in escape_sequences:
+        text = text.replace(sequence, '')
+    return text
+def remove_html_tags(text):
+    clean_text = re.sub(r'<[^>]*>', '', text)
+    return clean_text
+def vi_word_segment(text):
+    rdrsegmenter = py_vncorenlp.VnCoreNLP(annotators=["wseg"], save_dir=os.getcwd())
+    output = rdrsegmenter.word_segment(text)
+    return ' '.join(output)
+def process_text(text):
+    text = text[:256]
+    text = preprocess_text(text)
+    text = remove_escape_sequences(text)
+    text = remove_escape_sequences(text)
+    text = vi_word_segment(text)
+    return text
+if __name__ == '__main__':
+    iface = gr.Interface(fn=process_text, inputs="text", outputs="text")
+    iface.launch()