Bud1234 commited on
Commit
47dcb18
·
verified ·
1 Parent(s): b40a6a9

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +192 -19
index.html CHANGED
@@ -1,19 +1,192 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import html
4
+ import difflib
5
+ from typing import List, Tuple
6
+
7
+ import gradio as gr
8
+
9
+ # --- LanguageTool (rule-based grammar checker) ---
10
+ import language_tool_python
11
+
12
+ # --- Transformers model for grammar correction ---
13
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
14
+ import torch
15
+
16
+
17
+ LT_LANG = "en-US"
18
+ # Инициализаци (нэг удаа)
19
+ tool = language_tool_python.LanguageTool(LT_LANG)
20
+
21
+ # Жижиг, CPU-д ээлтэй T5 correction загвар
22
+ MODEL_NAME = "vennify/t5-base-grammar-correction" # HF дээрх нийтлэг GEC загвар
23
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
24
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
25
+ model.eval()
26
+
27
+
28
+ def t5_correct(text: str, max_new_tokens: int = 128) -> str:
29
+ """
30
+ T5 загвараар англи бичвэрийн зассан хувилбар гаргана.
31
+ """
32
+ if not text.strip():
33
+ return ""
34
+
35
+ # Зарим GEC-T5 загваруудад тусгай prefix хэрэглэдэг. Энэ загварт prefix шаардлагагүй.
36
+ inputs = tokenizer([text], return_tensors="pt", truncation=True)
37
+ with torch.no_grad():
38
+ outputs = model.generate(
39
+ **inputs,
40
+ max_new_tokens=max_new_tokens,
41
+ num_beams=4,
42
+ early_stopping=True
43
+ )
44
+ out = tokenizer.decode(outputs[0], skip_special_tokens=True)
45
+ return out.strip()
46
+
47
+
48
+ def lt_find_issues(text: str):
49
+ """
50
+ LanguageTool ашиглан дүрмийн асуудлууд, тайлбар, санал болгох засваруудыг буцаана.
51
+ """
52
+ matches = tool.check(text)
53
+ rows = []
54
+ for m in matches:
55
+ err = text[m.offset : m.offset + m.errorLength]
56
+ repls = ", ".join(m.replacements[:5]) if m.replacements else ""
57
+ rows.append(
58
+ {
59
+ "Error": err,
60
+ "Message": m.message,
61
+ "Rule": m.ruleId,
62
+ "Suggestions": repls,
63
+ "Start": m.offset,
64
+ "Length": m.errorLength,
65
+ }
66
+ )
67
+ return rows
68
+
69
+
70
+ def highlight_html(text: str, rows: List[dict]) -> str:
71
+ """
72
+ Олдсон алдаануудыг текст дээр <mark> таг ашиглан тэмдэглэсэн HTML үүсгэнэ.
73
+ Давхцах highlight үүсэхээс сэргийлж баруунаас нь эхлэн wrap хийнэ.
74
+ """
75
+ if not rows:
76
+ return f"<div style='white-space:pre-wrap'>{html.escape(text)}</div>"
77
+
78
+ # Баруунаас нь wrap хийхийн тулд эхлээд offset-ээр эрэмбэлнэ.
79
+ rows_sorted = sorted(rows, key=lambda r: r["Start"], reverse=True)
80
+ buf = text
81
+ for r in rows_sorted:
82
+ s = r["Start"]
83
+ e = s + r["Length"]
84
+ if s < 0 or e > len(buf): # хамгаалалт
85
+ continue
86
+ frag = html.escape(buf[s:e])
87
+ tip = html.escape(f'{r["Message"]} | Suggestions: {r.get("Suggestions","")}')
88
+ wrapped = f"<mark title='{tip}'>{frag}</mark>"
89
+ buf = html.escape(buf[:s]) + wrapped + html.escape(buf[e:])
90
+ # Дээр нь escape давхардахгүй байх үүднээс буферийг дахин unescape хийхгүй.
91
+ # Учир нь бид хэсгүүдийг аль хэдийн escape хийсэн.
92
+
93
+ # Гэхдээ бид буферийн үлдсэн escape-г зөв үлдээхийн тулд дараах жижиг тохируулга:
94
+ # html.escape аль хэдийн хийсэн тул <mark> тагийг буцааж сэргээе.
95
+ buf = buf.replace("&lt;mark", "<mark").replace("mark&gt;", "mark>")
96
+
97
+ return f"<div style='white-space:pre-wrap'>{buf}</div>"
98
+
99
+
100
+ def diff_html(a: str, b: str) -> str:
101
+ """
102
+ Эх бичвэр (a) ба зассан бичвэр (b)-ийн ялгааг HTML хэлбэрээр харуулна.
103
+ """
104
+ a_esc = html.escape(a)
105
+ b_esc = html.escape(b)
106
+ diff = difflib.ndiff(a_esc.split(), b_esc.split())
107
+ parts = []
108
+ for token in diff:
109
+ if token.startswith("+ "):
110
+ parts.append(f"<ins>{token[2:]}</ins>")
111
+ elif token.startswith("- "):
112
+ parts.append(f"<del>{token[2:]}</del>")
113
+ elif token.startswith("? "):
114
+ # туслах мөр – алгасъя
115
+ continue
116
+ else:
117
+ parts.append(token[2:])
118
+ return "<div style='line-height:1.9; word-wrap:break-word'>" + " ".join(parts) + "</div>"
119
+
120
+
121
+ def pipeline(text: str):
122
+ """
123
+ Нэг товчоор гурван үр дүн:
124
+ 1) LanguageTool-ийн алдааны хүснэгт
125
+ 2) Алдаатай хэсгийг highlight хийсэн HTML
126
+ 3) T5 загвараар зассан хувилбар + diff
127
+ """
128
+ text = (text or "").strip()
129
+ if not text:
130
+ return "", [], "", "", ""
131
+
132
+ # 1) LanguageTool
133
+ rows = lt_find_issues(text)
134
+ table_rows = [
135
+ [r["Error"], r["Message"], r["Rule"], r["Suggestions"]] for r in rows
136
+ ]
137
+ highlighted = highlight_html(text, rows)
138
+
139
+ # 2) T5 correction
140
+ corrected = t5_correct(text)
141
+
142
+ # 3) Diff
143
+ dhtml = diff_html(text, corrected)
144
+
145
+ return corrected, table_rows, highlighted, dhtml, f"Found {len(rows)} potential issue(s)."
146
+
147
+
148
+ with gr.Blocks(title="Grammar Classroom (HF Space)") as demo:
149
+ gr.Markdown(
150
+ """
151
+ # 🧑‍🏫 English Grammar Classroom
152
+ - **Rule-based check** (LanguageTool): errors, explanations, suggestions
153
+ - **AI correction** (T5): corrected version
154
+ - **Diff view**: see changes compared to your original
155
+ """
156
+ )
157
+
158
+ with gr.Row():
159
+ inp = gr.Textbox(
160
+ label="Enter English text",
161
+ placeholder="Paste or type your sentence/paragraph here…",
162
+ lines=7,
163
+ )
164
+ run_btn = gr.Button("Check & Correct", variant="primary")
165
+
166
+ gr.Markdown("### ✅ AI-corrected Text")
167
+ corrected_out = gr.Textbox(label="Corrected", lines=6)
168
+
169
+ gr.Markdown("### 📋 Grammar Issues (LanguageTool)")
170
+ issues_df = gr.Dataframe(
171
+ headers=["Error", "Message", "Rule", "Suggestions"],
172
+ datatype=["str", "str", "str", "str"],
173
+ wrap=True,
174
+ interactive=False,
175
+ row_count=(0, "dynamic"),
176
+ )
177
+ issues_summary = gr.Markdown()
178
+
179
+ gr.Markdown("### ✨ Highlighted Issues")
180
+ highlighted_out = gr.HTML()
181
+
182
+ gr.Markdown("### 🔍 Diff (Original vs Corrected)")
183
+ diff_out = gr.HTML()
184
+
185
+ run_btn.click(
186
+ fn=pipeline,
187
+ inputs=[inp],
188
+ outputs=[corrected_out, issues_df, highlighted_out, diff_out, issues_summary],
189
+ )
190
+
191
+ if __name__ == "__main__":
192
+ demo.launch()