cstr commited on
Commit
0cd52de
·
verified ·
1 Parent(s): 0f28594

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -96
app.py CHANGED
@@ -8,18 +8,19 @@ import gradio as gr
8
  import http.client
9
  import urllib.parse
10
  import xml.etree.ElementTree as ET
 
 
11
 
12
  # =============================================================================
13
  # CONFIGURATION
14
  # =============================================================================
15
  REPO_URL = "https://github.com/Automattic/atd-server-next.git"
16
- SERVER_DIR = "atd-server-next" # Local folder name
17
  MODELS_DIR = os.path.join(SERVER_DIR, "models")
18
  MODEL_BASE_URL = "https://openatd.svn.wordpress.org/atd-server/models/"
19
  HOST = "127.0.0.1"
20
  PORT = 1049
21
 
22
- # List of binary models to download
23
  MODEL_FILES = [
24
  "cnetwork.bin", "cnetwork2.bin", "dictionary.txt", "edits.bin",
25
  "endings.bin", "hnetwork.bin", "hnetwork2.bin", "hnetwork4.bin",
@@ -30,31 +31,52 @@ MODEL_FILES = [
30
  # =============================================================================
31
  # PHASE 1: AUTO-INSTALLATION & SETUP
32
  # =============================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def setup_server():
34
- """Clones repo, downloads models, and compiles rules."""
35
  print("--- [PHASE 0] CHECKING REPOSITORY ---")
36
-
37
- # 1. Clone Repository if missing
38
  if not os.path.exists(SERVER_DIR):
39
  print(f"Repository not found. Cloning from {REPO_URL}...")
40
- try:
41
- # Use shallow clone (--depth 1) to save space and time
42
- subprocess.run(["git", "clone", "--depth", "1", REPO_URL, SERVER_DIR], check=True)
43
- print(" -> Clone successful.")
44
- except subprocess.CalledProcessError as e:
45
- print(f" -> FAILED to clone repository: {e}")
46
- sys.exit(1)
47
- else:
48
- print(f" -> Repository found at {SERVER_DIR}")
49
 
50
  print("\n--- [PHASE 1] CHECKING MODELS ---")
51
-
52
  if not os.path.exists(MODELS_DIR):
53
- print(f"Creating directory: {MODELS_DIR}")
54
  os.makedirs(MODELS_DIR, exist_ok=True)
55
 
56
- # 2. Download Models
57
- print("Checking model files...")
58
  for filename in MODEL_FILES:
59
  filepath = os.path.join(MODELS_DIR, filename)
60
  if not os.path.exists(filepath):
@@ -62,48 +84,33 @@ def setup_server():
62
  print(f"Downloading {filename}...")
63
  try:
64
  urllib.request.urlretrieve(url, filepath)
65
- print(f" -> Saved.")
66
  except Exception as e:
67
  print(f" -> FAILED: {e}")
68
- sys.exit(1)
69
- else:
70
- print(f" -> Found {filename}")
71
 
72
- # 3. Compile Rules
73
  print("\n--- [PHASE 2] COMPILING RULES ---")
74
- print("Running Java Rule Compiler (this takes a moment)...")
75
-
76
- # Note: Classpath separator is ':' for Linux (Hugging Face), ';' for Windows
77
- cp_sep = ";" if os.name == 'nt' else ":"
78
-
79
- try:
80
- subprocess.run(
81
- [
82
- "java",
83
- "-Datd.lowmem=true",
84
- "-Xmx1024M",
85
- "-jar", "lib/sleep.jar",
86
- "utils/rules/rules.sl"
87
- ],
88
- cwd=SERVER_DIR,
89
- check=True
90
- )
91
- print("Rules compiled successfully.")
92
- except subprocess.CalledProcessError as e:
93
- print(f"Error compiling rules: {e}")
94
- # We don't exit here because sometimes it works anyway if previous run succeeded
95
 
96
  # =============================================================================
97
  # PHASE 2: SERVER MANAGEMENT
98
  # =============================================================================
99
  def start_backend():
100
- """Starts the Java server in the background."""
101
  print("\n--- [PHASE 3] STARTING SERVER ---")
102
-
103
  cp_sep = ";" if os.name == 'nt' else ":"
104
 
105
- # Construct classpath
106
- # Linux: lib/sleep.jar:lib/moconti.jar...
107
  classpath = f"lib/sleep.jar{cp_sep}lib/moconti.jar{cp_sep}lib/spellutils.jar"
108
  sleep_cp = f"lib{cp_sep}service/code"
109
 
@@ -121,9 +128,8 @@ def start_backend():
121
  "httpd.Moconti",
122
  "atdconfig.sl"
123
  ]
124
-
125
- print(f"Executing: {' '.join(cmd)}")
126
- # Start process, redirect stdout/stderr so we can see logs in HF console
127
  return subprocess.Popen(cmd, cwd=SERVER_DIR)
128
 
129
  def wait_for_port(timeout=60):
@@ -139,23 +145,28 @@ def wait_for_port(timeout=60):
139
  return False
140
 
141
  # =============================================================================
142
- # PHASE 3: CLIENT LOGIC (Embedded PyATD)
143
  # =============================================================================
144
  class AtDClient:
145
- def __init__(self):
146
- self.host = HOST
147
- self.port = PORT
148
-
149
  def check_document(self, text):
150
  try:
151
- conn = http.client.HTTPConnection(self.host, self.port, timeout=5)
152
  params = urllib.parse.urlencode({'key': 'gradio', 'data': text})
153
  headers = {"Content-Type": "application/x-www-form-urlencoded"}
154
  conn.request("POST", "/checkDocument", params, headers)
155
  resp = conn.getresponse()
156
- if resp.status != 200: return []
157
 
158
- root = ET.fromstring(resp.read())
 
 
 
 
 
 
 
 
 
 
159
  errors = []
160
  for e in root.findall('error'):
161
  err = {
@@ -174,71 +185,56 @@ class AtDClient:
174
  print(f"Client Error: {e}")
175
  return []
176
 
177
- # =============================================================================
178
- # PHASE 4: GRADIO UI
179
- # =============================================================================
180
  client = AtDClient()
181
 
182
  def analyze_text(text):
183
  if not text.strip(): return []
184
-
185
  errors = client.check_document(text)
186
-
187
  output = []
188
  last_pos = 0
189
 
190
- # We need to find the errors in the text.
191
  for err in errors:
192
  word = err['string']
193
- # Search for the word starting from last_pos
194
- # We try to use precontext to verify uniqueness
195
- search_block = err['precontext'] + word
196
-
197
- # Try finding with context first
198
- idx = text.find(search_block, last_pos)
199
 
 
 
 
 
 
 
 
200
  if idx != -1:
201
- # Found with context
202
- actual_word_start = idx + len(err['precontext'])
203
- else:
204
- # Fallback: Find just the word
205
- actual_word_start = text.find(word, last_pos)
206
 
207
- if actual_word_start != -1:
208
- # Add clean text before error
209
- if actual_word_start > last_pos:
210
- output.append((text[last_pos:actual_word_start], None))
211
-
212
- # Add error text
213
  label = f"{err['type']}: {err['description']}"
214
  if err['suggestions']:
215
  label += f" -> {', '.join(err['suggestions'][:3])}"
216
 
217
- output.append((text[actual_word_start:actual_word_start+len(word)], label))
218
- last_pos = actual_word_start + len(word)
219
 
220
- # Add remainder
221
  if last_pos < len(text):
222
  output.append((text[last_pos:], None))
223
-
224
  return output
225
 
226
- # =============================================================================
227
- # MAIN EXECUTION
228
- # =============================================================================
229
  if __name__ == "__main__":
230
- # 1. Run Setup (Clone, Download, Compile)
231
  setup_server()
232
-
233
- # 2. Start Server
234
  server_proc = start_backend()
235
 
236
- # 3. Wait for ready
 
 
 
 
 
 
 
237
  if wait_for_port():
238
- # 4. Launch UI
239
  with gr.Blocks(title="AtD Self-Hosted") as demo:
240
- gr.Markdown("# 🛡️ After The Deadline (Self-Installing Server)")
241
- gr.Markdown("This Space automatically downloaded models, compiled Java rules, and launched the server.")
242
 
243
  with gr.Row():
244
  inp = gr.Textbox(label="Input", placeholder="Type here... e.g., I has a error.", lines=6)
 
8
  import http.client
9
  import urllib.parse
10
  import xml.etree.ElementTree as ET
11
+ import shutil
12
+ import re
13
 
14
  # =============================================================================
15
  # CONFIGURATION
16
  # =============================================================================
17
  REPO_URL = "https://github.com/Automattic/atd-server-next.git"
18
+ SERVER_DIR = "atd-server-next"
19
  MODELS_DIR = os.path.join(SERVER_DIR, "models")
20
  MODEL_BASE_URL = "https://openatd.svn.wordpress.org/atd-server/models/"
21
  HOST = "127.0.0.1"
22
  PORT = 1049
23
 
 
24
  MODEL_FILES = [
25
  "cnetwork.bin", "cnetwork2.bin", "dictionary.txt", "edits.bin",
26
  "endings.bin", "hnetwork.bin", "hnetwork2.bin", "hnetwork4.bin",
 
31
  # =============================================================================
32
  # PHASE 1: AUTO-INSTALLATION & SETUP
33
  # =============================================================================
34
+ def patch_server_code():
35
+ """
36
+ Patches legacy Sleep code to run on modern Java (JDK 9+).
37
+ Removes 'from: lib/spellutils.jar' syntax which causes ClassCastException.
38
+ """
39
+ print("--- [PHASE 1.5] PATCHING CODE FOR JAVA 17+ ---")
40
+
41
+ # Files that are known to contain the incompatible import syntax
42
+ files_to_check = [
43
+ os.path.join(SERVER_DIR, "lib", "spellcheck.sl"),
44
+ os.path.join(SERVER_DIR, "utils", "spell", "trainspell.sl") # Potentially here too
45
+ ]
46
+
47
+ for file_path in files_to_check:
48
+ if not os.path.exists(file_path):
49
+ continue
50
+
51
+ try:
52
+ with open(file_path, "r") as f:
53
+ content = f.read()
54
+
55
+ # Regex to remove 'from: lib/spellutils.jar' but keep the semicolon
56
+ # Matches: from: [whitespace] lib/spellutils.jar
57
+ new_content = re.sub(r'from:\s*lib/spellutils\.jar', '', content)
58
+
59
+ if content != new_content:
60
+ print(f"Patching {file_path}...")
61
+ with open(file_path, "w") as f:
62
+ f.write(new_content)
63
+ print("-> Success: Code patched for modern Java.")
64
+ else:
65
+ print(f"-> {file_path} already clean.")
66
+
67
+ except Exception as e:
68
+ print(f"Error patching {file_path}: {e}")
69
+
70
  def setup_server():
 
71
  print("--- [PHASE 0] CHECKING REPOSITORY ---")
 
 
72
  if not os.path.exists(SERVER_DIR):
73
  print(f"Repository not found. Cloning from {REPO_URL}...")
74
+ subprocess.run(["git", "clone", "--depth", "1", REPO_URL, SERVER_DIR], check=True)
 
 
 
 
 
 
 
 
75
 
76
  print("\n--- [PHASE 1] CHECKING MODELS ---")
 
77
  if not os.path.exists(MODELS_DIR):
 
78
  os.makedirs(MODELS_DIR, exist_ok=True)
79
 
 
 
80
  for filename in MODEL_FILES:
81
  filepath = os.path.join(MODELS_DIR, filename)
82
  if not os.path.exists(filepath):
 
84
  print(f"Downloading {filename}...")
85
  try:
86
  urllib.request.urlretrieve(url, filepath)
 
87
  except Exception as e:
88
  print(f" -> FAILED: {e}")
89
+
90
+ # RUN THE PATCH BEFORE COMPILING
91
+ patch_server_code()
92
 
 
93
  print("\n--- [PHASE 2] COMPILING RULES ---")
94
+ # Only compile if the output file doesn't exist to save startup time on reboots
95
+ if not os.path.exists(os.path.join(SERVER_DIR, "models", "grammar.bin")): # Heuristic check
96
+ try:
97
+ subprocess.run(
98
+ ["java", "-Datd.lowmem=true", "-Xmx1024M", "-jar", "lib/sleep.jar", "utils/rules/rules.sl"],
99
+ cwd=SERVER_DIR,
100
+ check=True
101
+ )
102
+ print("Rules compiled successfully.")
103
+ except subprocess.CalledProcessError:
104
+ print("Rule compilation warning (ignoring)...")
 
 
 
 
 
 
 
 
 
 
105
 
106
  # =============================================================================
107
  # PHASE 2: SERVER MANAGEMENT
108
  # =============================================================================
109
  def start_backend():
 
110
  print("\n--- [PHASE 3] STARTING SERVER ---")
 
111
  cp_sep = ";" if os.name == 'nt' else ":"
112
 
113
+ # We explicitly add spellutils.jar to classpath here, rendering the 'from:' syntax obsolete
 
114
  classpath = f"lib/sleep.jar{cp_sep}lib/moconti.jar{cp_sep}lib/spellutils.jar"
115
  sleep_cp = f"lib{cp_sep}service/code"
116
 
 
128
  "httpd.Moconti",
129
  "atdconfig.sl"
130
  ]
131
+
132
+ # Start in SERVER_DIR so relative paths (./models) work
 
133
  return subprocess.Popen(cmd, cwd=SERVER_DIR)
134
 
135
  def wait_for_port(timeout=60):
 
145
  return False
146
 
147
  # =============================================================================
148
+ # PHASE 3: CLIENT
149
  # =============================================================================
150
  class AtDClient:
 
 
 
 
151
  def check_document(self, text):
152
  try:
153
+ conn = http.client.HTTPConnection(HOST, PORT, timeout=5)
154
  params = urllib.parse.urlencode({'key': 'gradio', 'data': text})
155
  headers = {"Content-Type": "application/x-www-form-urlencoded"}
156
  conn.request("POST", "/checkDocument", params, headers)
157
  resp = conn.getresponse()
 
158
 
159
+ # If server is still warming up, it might return 503 or connection drop
160
+ if resp.status != 200:
161
+ print(f"Server returned status: {resp.status}")
162
+ return []
163
+
164
+ data = resp.read()
165
+ # Basic check for valid XML
166
+ if not data.strip().startswith(b"<"):
167
+ return []
168
+
169
+ root = ET.fromstring(data)
170
  errors = []
171
  for e in root.findall('error'):
172
  err = {
 
185
  print(f"Client Error: {e}")
186
  return []
187
 
 
 
 
188
  client = AtDClient()
189
 
190
  def analyze_text(text):
191
  if not text.strip(): return []
 
192
  errors = client.check_document(text)
 
193
  output = []
194
  last_pos = 0
195
 
 
196
  for err in errors:
197
  word = err['string']
198
+ search_start = last_pos
 
 
 
 
 
199
 
200
+ # Context aware search
201
+ if err['precontext']:
202
+ context_idx = text.find(err['precontext'], last_pos)
203
+ if context_idx != -1:
204
+ search_start = context_idx + len(err['precontext'])
205
+
206
+ idx = text.find(word, search_start)
207
  if idx != -1:
208
+ if idx > last_pos:
209
+ output.append((text[last_pos:idx], None))
 
 
 
210
 
 
 
 
 
 
 
211
  label = f"{err['type']}: {err['description']}"
212
  if err['suggestions']:
213
  label += f" -> {', '.join(err['suggestions'][:3])}"
214
 
215
+ output.append((text[idx:idx+len(word)], label))
216
+ last_pos = idx + len(word)
217
 
 
218
  if last_pos < len(text):
219
  output.append((text[last_pos:], None))
 
220
  return output
221
 
 
 
 
222
  if __name__ == "__main__":
 
223
  setup_server()
 
 
224
  server_proc = start_backend()
225
 
226
+ # Give it a moment to fail if java crashes immediately
227
+ time.sleep(2)
228
+ if server_proc.poll() is not None:
229
+ print("Java server crashed immediately. Checking logs...")
230
+ print(server_proc.stdout)
231
+ print(server_proc.stderr)
232
+ sys.exit(1)
233
+
234
  if wait_for_port():
 
235
  with gr.Blocks(title="AtD Self-Hosted") as demo:
236
+ gr.Markdown("# 🛡️ After The Deadline (Self-Installing)")
237
+ gr.Markdown("Java 17 Compatible | Auto-Patching | Auto-Models")
238
 
239
  with gr.Row():
240
  inp = gr.Textbox(label="Input", placeholder="Type here... e.g., I has a error.", lines=6)