Spaces:
Sleeping
Sleeping
Commit ·
97aced8
1
Parent(s): f00bbd2
2.11.0
Browse files- README copy.md +12 -0
- app.py +135 -0
- requirements.txt +1 -0
README copy.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Grammar Corrector
|
| 3 |
+
emoji: 🏢
|
| 4 |
+
colorFrom: pink
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 3.39.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import requests
|
| 3 |
+
import os
|
| 4 |
+
import subprocess
|
| 5 |
+
subprocess.run("python3 -m spacy download en".split(" "))
|
| 6 |
+
data = {"sentences":["I am an good boy.", "I wanted to going to supermarket."]}
|
| 7 |
+
llama2_url = os.environ['url']
|
| 8 |
+
|
| 9 |
+
# UJ
|
| 10 |
+
import json
|
| 11 |
+
import errant
|
| 12 |
+
from tqdm import tqdm
|
| 13 |
+
import random
|
| 14 |
+
from difflib import Differ
|
| 15 |
+
|
| 16 |
+
annotator = errant.load('en')
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
error_categories = [
|
| 20 |
+
'ADJ',
|
| 21 |
+
'ADJ:FORM',
|
| 22 |
+
'ADV',
|
| 23 |
+
'CONJ',
|
| 24 |
+
'CONTR',
|
| 25 |
+
'DET',
|
| 26 |
+
'MORPH',
|
| 27 |
+
'NOUN',
|
| 28 |
+
'NOUN:INFL',
|
| 29 |
+
'NOUN:NUM',
|
| 30 |
+
'NOUN:POSS',
|
| 31 |
+
'ORTH',
|
| 32 |
+
'OTHER',
|
| 33 |
+
'PART',
|
| 34 |
+
'PREP',
|
| 35 |
+
'PRON',
|
| 36 |
+
'PUNCT',
|
| 37 |
+
'SPELL',
|
| 38 |
+
'UNK',
|
| 39 |
+
'VERB',
|
| 40 |
+
'VERB:FORM',
|
| 41 |
+
'VERB:INFL',
|
| 42 |
+
'VERB:SVA',
|
| 43 |
+
'VERB:TENSE',
|
| 44 |
+
'WO',
|
| 45 |
+
]
|
| 46 |
+
eng2zh = {
|
| 47 |
+
'M': '漏掉',
|
| 48 |
+
'R': '換成',
|
| 49 |
+
'U': '多餘的',
|
| 50 |
+
'ADJ': '形容詞',
|
| 51 |
+
'ADJ:FORM': '形容詞形(比較級或最高級)',
|
| 52 |
+
'ADV': '副詞',
|
| 53 |
+
'CONJ': '連接詞',
|
| 54 |
+
'CONTR': '縮寫',
|
| 55 |
+
'DET': ' 限定詞(冠詞、指示詞、所有格)',
|
| 56 |
+
'MORPH': '語尾變化(詞性、單複數、拼字)',
|
| 57 |
+
'NOUN': '名詞',
|
| 58 |
+
'NOUN:INFL': '名詞語尾變化(可不可數、單複數、拼字)',
|
| 59 |
+
'NOUN:NUM': '名詞單複數',
|
| 60 |
+
'NOUN:POSS': '名詞所有格',
|
| 61 |
+
'ORTH': '大小寫',
|
| 62 |
+
'OTHER': '換其它的用法',
|
| 63 |
+
'PART': '介副詞',
|
| 64 |
+
'PREP': '介詞',
|
| 65 |
+
'PRON': '代名詞',
|
| 66 |
+
'PUNCT': '標點',
|
| 67 |
+
'SPELL': '拼字',
|
| 68 |
+
'UNK': '難以歸類',
|
| 69 |
+
'VERB': '動詞',
|
| 70 |
+
'VERB:FORM': '動詞形',
|
| 71 |
+
'VERB:INFL': '動詞詞語尾變化',
|
| 72 |
+
'VERB:SVA': '主詞動詞一致',
|
| 73 |
+
'VERB:TENSE': '動詞時態',
|
| 74 |
+
'WO': '詞序',
|
| 75 |
+
}
|
| 76 |
+
color_map = {}
|
| 77 |
+
for pre in ['M', 'R', 'U']:
|
| 78 |
+
for err in error_categories:
|
| 79 |
+
color_map[f'{pre}:{err}'] = {'M': 'red', 'R': 'blue', 'U': 'green'}[pre]
|
| 80 |
+
def comp(s1, s2):
|
| 81 |
+
global annotator
|
| 82 |
+
orig = annotator.parse(s1, tokenise=True)
|
| 83 |
+
cor = annotator.parse(s2, tokenise=True)
|
| 84 |
+
edits = annotator.annotate(orig, cor, merging="all-equal")
|
| 85 |
+
ori_anno = {
|
| 86 |
+
'text': s1,
|
| 87 |
+
'entities': []
|
| 88 |
+
}
|
| 89 |
+
cor_anno = {
|
| 90 |
+
'text': s2,
|
| 91 |
+
'entities': []
|
| 92 |
+
}
|
| 93 |
+
for e in edits:
|
| 94 |
+
typ, content = e.type[0], e.type[2:]
|
| 95 |
+
print(e.type, typ, content)
|
| 96 |
+
if typ in eng2zh and content in eng2zh:
|
| 97 |
+
new_statement = eng2zh[typ]+':' + eng2zh[content]
|
| 98 |
+
ori_anno['entities'].append({
|
| 99 |
+
'entity': new_statement,
|
| 100 |
+
'start': orig[e.o_start:e.o_end].start_char,
|
| 101 |
+
'end': orig[e.o_start:e.o_end].end_char
|
| 102 |
+
})
|
| 103 |
+
cor_anno['entities'].append({
|
| 104 |
+
'entity': new_statement,
|
| 105 |
+
'start': cor[e.c_start:e.c_end].start_char,
|
| 106 |
+
'end': cor[e.c_start:e.c_end].end_char
|
| 107 |
+
})
|
| 108 |
+
|
| 109 |
+
return ori_anno, cor_anno
|
| 110 |
+
def llama2_all(text):
|
| 111 |
+
corr = llama2_cor(text)
|
| 112 |
+
ori_anno, corr_anno = comp(text.strip(), corr.strip())
|
| 113 |
+
return corr, ori_anno, corr_anno
|
| 114 |
+
def llama2_cor(text):
|
| 115 |
+
data = {"sentences": text}
|
| 116 |
+
r = requests.post(f"{llama2_url}/llama2", json=data)
|
| 117 |
+
try:
|
| 118 |
+
json_res = r.json()
|
| 119 |
+
return json_res['sentences']
|
| 120 |
+
except: return "Please retry or reboot the LLM server."
|
| 121 |
+
with gr.Blocks() as demo:
|
| 122 |
+
with gr.Tab("Llama-2-13b-chat"):
|
| 123 |
+
with gr.Row():
|
| 124 |
+
text_input = gr.Textbox(lines=5, label="Input", placeholder="Please enter sentences line by line.")
|
| 125 |
+
text_output = gr.Textbox(lines=5, label="Output")
|
| 126 |
+
with gr.Row():
|
| 127 |
+
text_output01 = gr.HighlightedText(label="Original Text", combine_adjacent=True, line=10).style(color_map=color_map)
|
| 128 |
+
text_output02 = gr.HighlightedText(label="Corrected Text", combine_adjacent=True, line=10).style(color_map=color_map)
|
| 129 |
+
with gr.Row():
|
| 130 |
+
text_button = gr.Button("Correct and Compare")
|
| 131 |
+
text_button1 = gr.Button("Correct")
|
| 132 |
+
text_button.click(llama2_all, inputs=text_input, outputs=[text_output, text_output01, text_output02])
|
| 133 |
+
text_button1.click(llama2_cor, inputs=text_input, outputs=text_output)
|
| 134 |
+
|
| 135 |
+
demo.launch(enable_queue=True)
|
requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
errant
|