Update app.py
Browse files
app.py
CHANGED
|
@@ -8,18 +8,19 @@ import gradio as gr
|
|
| 8 |
import http.client
|
| 9 |
import urllib.parse
|
| 10 |
import xml.etree.ElementTree as ET
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# =============================================================================
|
| 13 |
# CONFIGURATION
|
| 14 |
# =============================================================================
|
| 15 |
REPO_URL = "https://github.com/Automattic/atd-server-next.git"
|
| 16 |
-
SERVER_DIR = "atd-server-next"
|
| 17 |
MODELS_DIR = os.path.join(SERVER_DIR, "models")
|
| 18 |
MODEL_BASE_URL = "https://openatd.svn.wordpress.org/atd-server/models/"
|
| 19 |
HOST = "127.0.0.1"
|
| 20 |
PORT = 1049
|
| 21 |
|
| 22 |
-
# List of binary models to download
|
| 23 |
MODEL_FILES = [
|
| 24 |
"cnetwork.bin", "cnetwork2.bin", "dictionary.txt", "edits.bin",
|
| 25 |
"endings.bin", "hnetwork.bin", "hnetwork2.bin", "hnetwork4.bin",
|
|
@@ -30,31 +31,52 @@ MODEL_FILES = [
|
|
| 30 |
# =============================================================================
|
| 31 |
# PHASE 1: AUTO-INSTALLATION & SETUP
|
| 32 |
# =============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
def setup_server():
|
| 34 |
-
"""Clones repo, downloads models, and compiles rules."""
|
| 35 |
print("--- [PHASE 0] CHECKING REPOSITORY ---")
|
| 36 |
-
|
| 37 |
-
# 1. Clone Repository if missing
|
| 38 |
if not os.path.exists(SERVER_DIR):
|
| 39 |
print(f"Repository not found. Cloning from {REPO_URL}...")
|
| 40 |
-
|
| 41 |
-
# Use shallow clone (--depth 1) to save space and time
|
| 42 |
-
subprocess.run(["git", "clone", "--depth", "1", REPO_URL, SERVER_DIR], check=True)
|
| 43 |
-
print(" -> Clone successful.")
|
| 44 |
-
except subprocess.CalledProcessError as e:
|
| 45 |
-
print(f" -> FAILED to clone repository: {e}")
|
| 46 |
-
sys.exit(1)
|
| 47 |
-
else:
|
| 48 |
-
print(f" -> Repository found at {SERVER_DIR}")
|
| 49 |
|
| 50 |
print("\n--- [PHASE 1] CHECKING MODELS ---")
|
| 51 |
-
|
| 52 |
if not os.path.exists(MODELS_DIR):
|
| 53 |
-
print(f"Creating directory: {MODELS_DIR}")
|
| 54 |
os.makedirs(MODELS_DIR, exist_ok=True)
|
| 55 |
|
| 56 |
-
# 2. Download Models
|
| 57 |
-
print("Checking model files...")
|
| 58 |
for filename in MODEL_FILES:
|
| 59 |
filepath = os.path.join(MODELS_DIR, filename)
|
| 60 |
if not os.path.exists(filepath):
|
|
@@ -62,48 +84,33 @@ def setup_server():
|
|
| 62 |
print(f"Downloading {filename}...")
|
| 63 |
try:
|
| 64 |
urllib.request.urlretrieve(url, filepath)
|
| 65 |
-
print(f" -> Saved.")
|
| 66 |
except Exception as e:
|
| 67 |
print(f" -> FAILED: {e}")
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
|
| 72 |
-
# 3. Compile Rules
|
| 73 |
print("\n--- [PHASE 2] COMPILING RULES ---")
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
"-jar", "lib/sleep.jar",
|
| 86 |
-
"utils/rules/rules.sl"
|
| 87 |
-
],
|
| 88 |
-
cwd=SERVER_DIR,
|
| 89 |
-
check=True
|
| 90 |
-
)
|
| 91 |
-
print("Rules compiled successfully.")
|
| 92 |
-
except subprocess.CalledProcessError as e:
|
| 93 |
-
print(f"Error compiling rules: {e}")
|
| 94 |
-
# We don't exit here because sometimes it works anyway if previous run succeeded
|
| 95 |
|
| 96 |
# =============================================================================
|
| 97 |
# PHASE 2: SERVER MANAGEMENT
|
| 98 |
# =============================================================================
|
| 99 |
def start_backend():
|
| 100 |
-
"""Starts the Java server in the background."""
|
| 101 |
print("\n--- [PHASE 3] STARTING SERVER ---")
|
| 102 |
-
|
| 103 |
cp_sep = ";" if os.name == 'nt' else ":"
|
| 104 |
|
| 105 |
-
#
|
| 106 |
-
# Linux: lib/sleep.jar:lib/moconti.jar...
|
| 107 |
classpath = f"lib/sleep.jar{cp_sep}lib/moconti.jar{cp_sep}lib/spellutils.jar"
|
| 108 |
sleep_cp = f"lib{cp_sep}service/code"
|
| 109 |
|
|
@@ -121,9 +128,8 @@ def start_backend():
|
|
| 121 |
"httpd.Moconti",
|
| 122 |
"atdconfig.sl"
|
| 123 |
]
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
# Start process, redirect stdout/stderr so we can see logs in HF console
|
| 127 |
return subprocess.Popen(cmd, cwd=SERVER_DIR)
|
| 128 |
|
| 129 |
def wait_for_port(timeout=60):
|
|
@@ -139,23 +145,28 @@ def wait_for_port(timeout=60):
|
|
| 139 |
return False
|
| 140 |
|
| 141 |
# =============================================================================
|
| 142 |
-
# PHASE 3: CLIENT
|
| 143 |
# =============================================================================
|
| 144 |
class AtDClient:
|
| 145 |
-
def __init__(self):
|
| 146 |
-
self.host = HOST
|
| 147 |
-
self.port = PORT
|
| 148 |
-
|
| 149 |
def check_document(self, text):
|
| 150 |
try:
|
| 151 |
-
conn = http.client.HTTPConnection(
|
| 152 |
params = urllib.parse.urlencode({'key': 'gradio', 'data': text})
|
| 153 |
headers = {"Content-Type": "application/x-www-form-urlencoded"}
|
| 154 |
conn.request("POST", "/checkDocument", params, headers)
|
| 155 |
resp = conn.getresponse()
|
| 156 |
-
if resp.status != 200: return []
|
| 157 |
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
errors = []
|
| 160 |
for e in root.findall('error'):
|
| 161 |
err = {
|
|
@@ -174,71 +185,56 @@ class AtDClient:
|
|
| 174 |
print(f"Client Error: {e}")
|
| 175 |
return []
|
| 176 |
|
| 177 |
-
# =============================================================================
|
| 178 |
-
# PHASE 4: GRADIO UI
|
| 179 |
-
# =============================================================================
|
| 180 |
client = AtDClient()
|
| 181 |
|
| 182 |
def analyze_text(text):
|
| 183 |
if not text.strip(): return []
|
| 184 |
-
|
| 185 |
errors = client.check_document(text)
|
| 186 |
-
|
| 187 |
output = []
|
| 188 |
last_pos = 0
|
| 189 |
|
| 190 |
-
# We need to find the errors in the text.
|
| 191 |
for err in errors:
|
| 192 |
word = err['string']
|
| 193 |
-
|
| 194 |
-
# We try to use precontext to verify uniqueness
|
| 195 |
-
search_block = err['precontext'] + word
|
| 196 |
-
|
| 197 |
-
# Try finding with context first
|
| 198 |
-
idx = text.find(search_block, last_pos)
|
| 199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
if idx != -1:
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
else:
|
| 204 |
-
# Fallback: Find just the word
|
| 205 |
-
actual_word_start = text.find(word, last_pos)
|
| 206 |
|
| 207 |
-
if actual_word_start != -1:
|
| 208 |
-
# Add clean text before error
|
| 209 |
-
if actual_word_start > last_pos:
|
| 210 |
-
output.append((text[last_pos:actual_word_start], None))
|
| 211 |
-
|
| 212 |
-
# Add error text
|
| 213 |
label = f"{err['type']}: {err['description']}"
|
| 214 |
if err['suggestions']:
|
| 215 |
label += f" -> {', '.join(err['suggestions'][:3])}"
|
| 216 |
|
| 217 |
-
output.append((text[
|
| 218 |
-
last_pos =
|
| 219 |
|
| 220 |
-
# Add remainder
|
| 221 |
if last_pos < len(text):
|
| 222 |
output.append((text[last_pos:], None))
|
| 223 |
-
|
| 224 |
return output
|
| 225 |
|
| 226 |
-
# =============================================================================
|
| 227 |
-
# MAIN EXECUTION
|
| 228 |
-
# =============================================================================
|
| 229 |
if __name__ == "__main__":
|
| 230 |
-
# 1. Run Setup (Clone, Download, Compile)
|
| 231 |
setup_server()
|
| 232 |
-
|
| 233 |
-
# 2. Start Server
|
| 234 |
server_proc = start_backend()
|
| 235 |
|
| 236 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
if wait_for_port():
|
| 238 |
-
# 4. Launch UI
|
| 239 |
with gr.Blocks(title="AtD Self-Hosted") as demo:
|
| 240 |
-
gr.Markdown("# 🛡️ After The Deadline (Self-Installing
|
| 241 |
-
gr.Markdown("
|
| 242 |
|
| 243 |
with gr.Row():
|
| 244 |
inp = gr.Textbox(label="Input", placeholder="Type here... e.g., I has a error.", lines=6)
|
|
|
|
| 8 |
import http.client
|
| 9 |
import urllib.parse
|
| 10 |
import xml.etree.ElementTree as ET
|
| 11 |
+
import shutil
|
| 12 |
+
import re
|
| 13 |
|
| 14 |
# =============================================================================
|
| 15 |
# CONFIGURATION
|
| 16 |
# =============================================================================
|
| 17 |
REPO_URL = "https://github.com/Automattic/atd-server-next.git"
|
| 18 |
+
SERVER_DIR = "atd-server-next"
|
| 19 |
MODELS_DIR = os.path.join(SERVER_DIR, "models")
|
| 20 |
MODEL_BASE_URL = "https://openatd.svn.wordpress.org/atd-server/models/"
|
| 21 |
HOST = "127.0.0.1"
|
| 22 |
PORT = 1049
|
| 23 |
|
|
|
|
| 24 |
MODEL_FILES = [
|
| 25 |
"cnetwork.bin", "cnetwork2.bin", "dictionary.txt", "edits.bin",
|
| 26 |
"endings.bin", "hnetwork.bin", "hnetwork2.bin", "hnetwork4.bin",
|
|
|
|
| 31 |
# =============================================================================
|
| 32 |
# PHASE 1: AUTO-INSTALLATION & SETUP
|
| 33 |
# =============================================================================
|
| 34 |
+
def patch_server_code():
|
| 35 |
+
"""
|
| 36 |
+
Patches legacy Sleep code to run on modern Java (JDK 9+).
|
| 37 |
+
Removes 'from: lib/spellutils.jar' syntax which causes ClassCastException.
|
| 38 |
+
"""
|
| 39 |
+
print("--- [PHASE 1.5] PATCHING CODE FOR JAVA 17+ ---")
|
| 40 |
+
|
| 41 |
+
# Files that are known to contain the incompatible import syntax
|
| 42 |
+
files_to_check = [
|
| 43 |
+
os.path.join(SERVER_DIR, "lib", "spellcheck.sl"),
|
| 44 |
+
os.path.join(SERVER_DIR, "utils", "spell", "trainspell.sl") # Potentially here too
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
for file_path in files_to_check:
|
| 48 |
+
if not os.path.exists(file_path):
|
| 49 |
+
continue
|
| 50 |
+
|
| 51 |
+
try:
|
| 52 |
+
with open(file_path, "r") as f:
|
| 53 |
+
content = f.read()
|
| 54 |
+
|
| 55 |
+
# Regex to remove 'from: lib/spellutils.jar' but keep the semicolon
|
| 56 |
+
# Matches: from: [whitespace] lib/spellutils.jar
|
| 57 |
+
new_content = re.sub(r'from:\s*lib/spellutils\.jar', '', content)
|
| 58 |
+
|
| 59 |
+
if content != new_content:
|
| 60 |
+
print(f"Patching {file_path}...")
|
| 61 |
+
with open(file_path, "w") as f:
|
| 62 |
+
f.write(new_content)
|
| 63 |
+
print("-> Success: Code patched for modern Java.")
|
| 64 |
+
else:
|
| 65 |
+
print(f"-> {file_path} already clean.")
|
| 66 |
+
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"Error patching {file_path}: {e}")
|
| 69 |
+
|
| 70 |
def setup_server():
|
|
|
|
| 71 |
print("--- [PHASE 0] CHECKING REPOSITORY ---")
|
|
|
|
|
|
|
| 72 |
if not os.path.exists(SERVER_DIR):
|
| 73 |
print(f"Repository not found. Cloning from {REPO_URL}...")
|
| 74 |
+
subprocess.run(["git", "clone", "--depth", "1", REPO_URL, SERVER_DIR], check=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
print("\n--- [PHASE 1] CHECKING MODELS ---")
|
|
|
|
| 77 |
if not os.path.exists(MODELS_DIR):
|
|
|
|
| 78 |
os.makedirs(MODELS_DIR, exist_ok=True)
|
| 79 |
|
|
|
|
|
|
|
| 80 |
for filename in MODEL_FILES:
|
| 81 |
filepath = os.path.join(MODELS_DIR, filename)
|
| 82 |
if not os.path.exists(filepath):
|
|
|
|
| 84 |
print(f"Downloading {filename}...")
|
| 85 |
try:
|
| 86 |
urllib.request.urlretrieve(url, filepath)
|
|
|
|
| 87 |
except Exception as e:
|
| 88 |
print(f" -> FAILED: {e}")
|
| 89 |
+
|
| 90 |
+
# RUN THE PATCH BEFORE COMPILING
|
| 91 |
+
patch_server_code()
|
| 92 |
|
|
|
|
| 93 |
print("\n--- [PHASE 2] COMPILING RULES ---")
|
| 94 |
+
# Only compile if the output file doesn't exist to save startup time on reboots
|
| 95 |
+
if not os.path.exists(os.path.join(SERVER_DIR, "models", "grammar.bin")): # Heuristic check
|
| 96 |
+
try:
|
| 97 |
+
subprocess.run(
|
| 98 |
+
["java", "-Datd.lowmem=true", "-Xmx1024M", "-jar", "lib/sleep.jar", "utils/rules/rules.sl"],
|
| 99 |
+
cwd=SERVER_DIR,
|
| 100 |
+
check=True
|
| 101 |
+
)
|
| 102 |
+
print("Rules compiled successfully.")
|
| 103 |
+
except subprocess.CalledProcessError:
|
| 104 |
+
print("Rule compilation warning (ignoring)...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
# =============================================================================
|
| 107 |
# PHASE 2: SERVER MANAGEMENT
|
| 108 |
# =============================================================================
|
| 109 |
def start_backend():
|
|
|
|
| 110 |
print("\n--- [PHASE 3] STARTING SERVER ---")
|
|
|
|
| 111 |
cp_sep = ";" if os.name == 'nt' else ":"
|
| 112 |
|
| 113 |
+
# We explicitly add spellutils.jar to classpath here, rendering the 'from:' syntax obsolete
|
|
|
|
| 114 |
classpath = f"lib/sleep.jar{cp_sep}lib/moconti.jar{cp_sep}lib/spellutils.jar"
|
| 115 |
sleep_cp = f"lib{cp_sep}service/code"
|
| 116 |
|
|
|
|
| 128 |
"httpd.Moconti",
|
| 129 |
"atdconfig.sl"
|
| 130 |
]
|
| 131 |
+
|
| 132 |
+
# Start in SERVER_DIR so relative paths (./models) work
|
|
|
|
| 133 |
return subprocess.Popen(cmd, cwd=SERVER_DIR)
|
| 134 |
|
| 135 |
def wait_for_port(timeout=60):
|
|
|
|
| 145 |
return False
|
| 146 |
|
| 147 |
# =============================================================================
|
| 148 |
+
# PHASE 3: CLIENT
|
| 149 |
# =============================================================================
|
| 150 |
class AtDClient:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
def check_document(self, text):
|
| 152 |
try:
|
| 153 |
+
conn = http.client.HTTPConnection(HOST, PORT, timeout=5)
|
| 154 |
params = urllib.parse.urlencode({'key': 'gradio', 'data': text})
|
| 155 |
headers = {"Content-Type": "application/x-www-form-urlencoded"}
|
| 156 |
conn.request("POST", "/checkDocument", params, headers)
|
| 157 |
resp = conn.getresponse()
|
|
|
|
| 158 |
|
| 159 |
+
# If server is still warming up, it might return 503 or connection drop
|
| 160 |
+
if resp.status != 200:
|
| 161 |
+
print(f"Server returned status: {resp.status}")
|
| 162 |
+
return []
|
| 163 |
+
|
| 164 |
+
data = resp.read()
|
| 165 |
+
# Basic check for valid XML
|
| 166 |
+
if not data.strip().startswith(b"<"):
|
| 167 |
+
return []
|
| 168 |
+
|
| 169 |
+
root = ET.fromstring(data)
|
| 170 |
errors = []
|
| 171 |
for e in root.findall('error'):
|
| 172 |
err = {
|
|
|
|
| 185 |
print(f"Client Error: {e}")
|
| 186 |
return []
|
| 187 |
|
|
|
|
|
|
|
|
|
|
| 188 |
client = AtDClient()
|
| 189 |
|
| 190 |
def analyze_text(text):
|
| 191 |
if not text.strip(): return []
|
|
|
|
| 192 |
errors = client.check_document(text)
|
|
|
|
| 193 |
output = []
|
| 194 |
last_pos = 0
|
| 195 |
|
|
|
|
| 196 |
for err in errors:
|
| 197 |
word = err['string']
|
| 198 |
+
search_start = last_pos
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
+
# Context aware search
|
| 201 |
+
if err['precontext']:
|
| 202 |
+
context_idx = text.find(err['precontext'], last_pos)
|
| 203 |
+
if context_idx != -1:
|
| 204 |
+
search_start = context_idx + len(err['precontext'])
|
| 205 |
+
|
| 206 |
+
idx = text.find(word, search_start)
|
| 207 |
if idx != -1:
|
| 208 |
+
if idx > last_pos:
|
| 209 |
+
output.append((text[last_pos:idx], None))
|
|
|
|
|
|
|
|
|
|
| 210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
label = f"{err['type']}: {err['description']}"
|
| 212 |
if err['suggestions']:
|
| 213 |
label += f" -> {', '.join(err['suggestions'][:3])}"
|
| 214 |
|
| 215 |
+
output.append((text[idx:idx+len(word)], label))
|
| 216 |
+
last_pos = idx + len(word)
|
| 217 |
|
|
|
|
| 218 |
if last_pos < len(text):
|
| 219 |
output.append((text[last_pos:], None))
|
|
|
|
| 220 |
return output
|
| 221 |
|
|
|
|
|
|
|
|
|
|
| 222 |
if __name__ == "__main__":
|
|
|
|
| 223 |
setup_server()
|
|
|
|
|
|
|
| 224 |
server_proc = start_backend()
|
| 225 |
|
| 226 |
+
# Give it a moment to fail if java crashes immediately
|
| 227 |
+
time.sleep(2)
|
| 228 |
+
if server_proc.poll() is not None:
|
| 229 |
+
print("Java server crashed immediately. Checking logs...")
|
| 230 |
+
print(server_proc.stdout)
|
| 231 |
+
print(server_proc.stderr)
|
| 232 |
+
sys.exit(1)
|
| 233 |
+
|
| 234 |
if wait_for_port():
|
|
|
|
| 235 |
with gr.Blocks(title="AtD Self-Hosted") as demo:
|
| 236 |
+
gr.Markdown("# 🛡️ After The Deadline (Self-Installing)")
|
| 237 |
+
gr.Markdown("Java 17 Compatible | Auto-Patching | Auto-Models")
|
| 238 |
|
| 239 |
with gr.Row():
|
| 240 |
inp = gr.Textbox(label="Input", placeholder="Type here... e.g., I has a error.", lines=6)
|