cstr commited on
Commit
8cfd6f3
·
verified ·
1 Parent(s): a556454

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +253 -0
app.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+ import subprocess
5
+ import urllib.request
6
+ import socket
7
+ import gradio as gr
8
+ import http.client
9
+ import urllib.parse
10
+ import xml.etree.ElementTree as ET
11
+
12
+ # =============================================================================
13
+ # CONFIGURATION
14
+ # =============================================================================
15
+ REPO_URL = "https://github.com/Automattic/atd-server-next.git"
16
+ SERVER_DIR = "atd-server-next" # Local folder name
17
+ MODELS_DIR = os.path.join(SERVER_DIR, "models")
18
+ MODEL_BASE_URL = "https://openatd.svn.wordpress.org/atd-server/models/"
19
+ HOST = "127.0.0.1"
20
+ PORT = 1049
21
+
22
+ # List of binary models to download
23
+ MODEL_FILES = [
24
+ "cnetwork.bin", "cnetwork2.bin", "dictionary.txt", "edits.bin",
25
+ "endings.bin", "hnetwork.bin", "hnetwork2.bin", "hnetwork4.bin",
26
+ "lexicon.bin", "model.bin", "model.zip", "network3f.bin",
27
+ "network3p.bin", "not_misspelled.txt", "stringpool.bin", "trigrams.bin"
28
+ ]
29
+
30
+ # =============================================================================
31
+ # PHASE 1: AUTO-INSTALLATION & SETUP
32
+ # =============================================================================
33
+ def setup_server():
34
+ """Clones repo, downloads models, and compiles rules."""
35
+ print("--- [PHASE 0] CHECKING REPOSITORY ---")
36
+
37
+ # 1. Clone Repository if missing
38
+ if not os.path.exists(SERVER_DIR):
39
+ print(f"Repository not found. Cloning from {REPO_URL}...")
40
+ try:
41
+ # Use shallow clone (--depth 1) to save space and time
42
+ subprocess.run(["git", "clone", "--depth", "1", REPO_URL, SERVER_DIR], check=True)
43
+ print(" -> Clone successful.")
44
+ except subprocess.CalledProcessError as e:
45
+ print(f" -> FAILED to clone repository: {e}")
46
+ sys.exit(1)
47
+ else:
48
+ print(f" -> Repository found at {SERVER_DIR}")
49
+
50
+ print("\n--- [PHASE 1] CHECKING MODELS ---")
51
+
52
+ if not os.path.exists(MODELS_DIR):
53
+ print(f"Creating directory: {MODELS_DIR}")
54
+ os.makedirs(MODELS_DIR, exist_ok=True)
55
+
56
+ # 2. Download Models
57
+ print("Checking model files...")
58
+ for filename in MODEL_FILES:
59
+ filepath = os.path.join(MODELS_DIR, filename)
60
+ if not os.path.exists(filepath):
61
+ url = MODEL_BASE_URL + filename
62
+ print(f"Downloading {filename}...")
63
+ try:
64
+ urllib.request.urlretrieve(url, filepath)
65
+ print(f" -> Saved.")
66
+ except Exception as e:
67
+ print(f" -> FAILED: {e}")
68
+ sys.exit(1)
69
+ else:
70
+ print(f" -> Found {filename}")
71
+
72
+ # 3. Compile Rules
73
+ print("\n--- [PHASE 2] COMPILING RULES ---")
74
+ print("Running Java Rule Compiler (this takes a moment)...")
75
+
76
+ # Note: Classpath separator is ':' for Linux (Hugging Face), ';' for Windows
77
+ cp_sep = ";" if os.name == 'nt' else ":"
78
+
79
+ try:
80
+ subprocess.run(
81
+ [
82
+ "java",
83
+ "-Datd.lowmem=true",
84
+ "-Xmx1024M",
85
+ "-jar", "lib/sleep.jar",
86
+ "utils/rules/rules.sl"
87
+ ],
88
+ cwd=SERVER_DIR,
89
+ check=True
90
+ )
91
+ print("Rules compiled successfully.")
92
+ except subprocess.CalledProcessError as e:
93
+ print(f"Error compiling rules: {e}")
94
+ # We don't exit here because sometimes it works anyway if previous run succeeded
95
+
96
+ # =============================================================================
97
+ # PHASE 2: SERVER MANAGEMENT
98
+ # =============================================================================
99
+ def start_backend():
100
+ """Starts the Java server in the background."""
101
+ print("\n--- [PHASE 3] STARTING SERVER ---")
102
+
103
+ cp_sep = ";" if os.name == 'nt' else ":"
104
+
105
+ # Construct classpath
106
+ # Linux: lib/sleep.jar:lib/moconti.jar...
107
+ classpath = f"lib/sleep.jar{cp_sep}lib/moconti.jar{cp_sep}lib/spellutils.jar"
108
+ sleep_cp = f"lib{cp_sep}service/code"
109
+
110
+ cmd = [
111
+ "java",
112
+ "-Dfile.encoding=UTF-8",
113
+ "-XX:+AggressiveHeap",
114
+ "-XX:+UseParallelGC",
115
+ "-Datd.lowmem=true",
116
+ "-Dbind.interface=127.0.0.1",
117
+ f"-Dserver.port={PORT}",
118
+ f"-Dsleep.classpath={sleep_cp}",
119
+ "-Dsleep.debug=24",
120
+ "-classpath", classpath,
121
+ "httpd.Moconti",
122
+ "atdconfig.sl"
123
+ ]
124
+
125
+ print(f"Executing: {' '.join(cmd)}")
126
+ # Start process, redirect stdout/stderr so we can see logs in HF console
127
+ return subprocess.Popen(cmd, cwd=SERVER_DIR)
128
+
129
+ def wait_for_port(timeout=60):
130
+ print(f"Waiting for port {PORT}...")
131
+ start = time.time()
132
+ while time.time() - start < timeout:
133
+ try:
134
+ with socket.create_connection((HOST, PORT), timeout=1):
135
+ print("Server is Online!")
136
+ return True
137
+ except (ConnectionRefusedError, OSError):
138
+ time.sleep(1)
139
+ return False
140
+
141
+ # =============================================================================
142
+ # PHASE 3: CLIENT LOGIC (Embedded PyATD)
143
+ # =============================================================================
144
+ class AtDClient:
145
+ def __init__(self):
146
+ self.host = HOST
147
+ self.port = PORT
148
+
149
+ def check_document(self, text):
150
+ try:
151
+ conn = http.client.HTTPConnection(self.host, self.port, timeout=5)
152
+ params = urllib.parse.urlencode({'key': 'gradio', 'data': text})
153
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
154
+ conn.request("POST", "/checkDocument", params, headers)
155
+ resp = conn.getresponse()
156
+ if resp.status != 200: return []
157
+
158
+ root = ET.fromstring(resp.read())
159
+ errors = []
160
+ for e in root.findall('error'):
161
+ err = {
162
+ 'string': e.find('string').text,
163
+ 'description': e.find('description').text,
164
+ 'type': e.find('type').text,
165
+ 'precontext': e.find('precontext').text or "",
166
+ 'suggestions': []
167
+ }
168
+ sug = e.find('suggestions')
169
+ if sug is not None:
170
+ err['suggestions'] = [o.text for o in sug.findall('option') if o.text]
171
+ errors.append(err)
172
+ return errors
173
+ except Exception as e:
174
+ print(f"Client Error: {e}")
175
+ return []
176
+
177
+ # =============================================================================
178
+ # PHASE 4: GRADIO UI
179
+ # =============================================================================
180
+ client = AtDClient()
181
+
182
+ def analyze_text(text):
183
+ if not text.strip(): return []
184
+
185
+ errors = client.check_document(text)
186
+
187
+ output = []
188
+ last_pos = 0
189
+
190
+ # We need to find the errors in the text.
191
+ for err in errors:
192
+ word = err['string']
193
+ # Search for the word starting from last_pos
194
+ # We try to use precontext to verify uniqueness
195
+ search_block = err['precontext'] + word
196
+
197
+ # Try finding with context first
198
+ idx = text.find(search_block, last_pos)
199
+
200
+ if idx != -1:
201
+ # Found with context
202
+ actual_word_start = idx + len(err['precontext'])
203
+ else:
204
+ # Fallback: Find just the word
205
+ actual_word_start = text.find(word, last_pos)
206
+
207
+ if actual_word_start != -1:
208
+ # Add clean text before error
209
+ if actual_word_start > last_pos:
210
+ output.append((text[last_pos:actual_word_start], None))
211
+
212
+ # Add error text
213
+ label = f"{err['type']}: {err['description']}"
214
+ if err['suggestions']:
215
+ label += f" -> {', '.join(err['suggestions'][:3])}"
216
+
217
+ output.append((text[actual_word_start:actual_word_start+len(word)], label))
218
+ last_pos = actual_word_start + len(word)
219
+
220
+ # Add remainder
221
+ if last_pos < len(text):
222
+ output.append((text[last_pos:], None))
223
+
224
+ return output
225
+
226
+ # =============================================================================
227
+ # MAIN EXECUTION
228
+ # =============================================================================
229
+ if __name__ == "__main__":
230
+ # 1. Run Setup (Clone, Download, Compile)
231
+ setup_server()
232
+
233
+ # 2. Start Server
234
+ server_proc = start_backend()
235
+
236
+ # 3. Wait for ready
237
+ if wait_for_port():
238
+ # 4. Launch UI
239
+ with gr.Blocks(title="AtD Self-Hosted") as demo:
240
+ gr.Markdown("# 🛡️ After The Deadline (Self-Installing Server)")
241
+ gr.Markdown("This Space automatically downloaded models, compiled Java rules, and launched the server.")
242
+
243
+ with gr.Row():
244
+ inp = gr.Textbox(label="Input", placeholder="Type here... e.g., I has a error.", lines=6)
245
+ out = gr.HighlightedText(label="Corrections", combine_adjacent=True)
246
+
247
+ btn = gr.Button("Check Text", variant="primary")
248
+ btn.click(analyze_text, inputs=inp, outputs=out)
249
+
250
+ demo.launch(server_name="0.0.0.0", server_port=7860)
251
+ else:
252
+ print("FATAL: Server did not start.")
253
+ server_proc.kill()