atd / app.py
cstr's picture
Update app.py
0cd52de verified
raw
history blame
9.18 kB
import os
import sys
import time
import subprocess
import urllib.request
import socket
import gradio as gr
import http.client
import urllib.parse
import xml.etree.ElementTree as ET
import shutil
import re
# =============================================================================
# CONFIGURATION
# =============================================================================
REPO_URL = "https://github.com/Automattic/atd-server-next.git"
SERVER_DIR = "atd-server-next"
MODELS_DIR = os.path.join(SERVER_DIR, "models")
MODEL_BASE_URL = "https://openatd.svn.wordpress.org/atd-server/models/"
HOST = "127.0.0.1"
PORT = 1049
MODEL_FILES = [
"cnetwork.bin", "cnetwork2.bin", "dictionary.txt", "edits.bin",
"endings.bin", "hnetwork.bin", "hnetwork2.bin", "hnetwork4.bin",
"lexicon.bin", "model.bin", "model.zip", "network3f.bin",
"network3p.bin", "not_misspelled.txt", "stringpool.bin", "trigrams.bin"
]
# =============================================================================
# PHASE 1: AUTO-INSTALLATION & SETUP
# =============================================================================
def patch_server_code():
"""
Patches legacy Sleep code to run on modern Java (JDK 9+).
Removes 'from: lib/spellutils.jar' syntax which causes ClassCastException.
"""
print("--- [PHASE 1.5] PATCHING CODE FOR JAVA 17+ ---")
# Files that are known to contain the incompatible import syntax
files_to_check = [
os.path.join(SERVER_DIR, "lib", "spellcheck.sl"),
os.path.join(SERVER_DIR, "utils", "spell", "trainspell.sl") # Potentially here too
]
for file_path in files_to_check:
if not os.path.exists(file_path):
continue
try:
with open(file_path, "r") as f:
content = f.read()
# Regex to remove 'from: lib/spellutils.jar' but keep the semicolon
# Matches: from: [whitespace] lib/spellutils.jar
new_content = re.sub(r'from:\s*lib/spellutils\.jar', '', content)
if content != new_content:
print(f"Patching {file_path}...")
with open(file_path, "w") as f:
f.write(new_content)
print("-> Success: Code patched for modern Java.")
else:
print(f"-> {file_path} already clean.")
except Exception as e:
print(f"Error patching {file_path}: {e}")
def setup_server():
print("--- [PHASE 0] CHECKING REPOSITORY ---")
if not os.path.exists(SERVER_DIR):
print(f"Repository not found. Cloning from {REPO_URL}...")
subprocess.run(["git", "clone", "--depth", "1", REPO_URL, SERVER_DIR], check=True)
print("\n--- [PHASE 1] CHECKING MODELS ---")
if not os.path.exists(MODELS_DIR):
os.makedirs(MODELS_DIR, exist_ok=True)
for filename in MODEL_FILES:
filepath = os.path.join(MODELS_DIR, filename)
if not os.path.exists(filepath):
url = MODEL_BASE_URL + filename
print(f"Downloading {filename}...")
try:
urllib.request.urlretrieve(url, filepath)
except Exception as e:
print(f" -> FAILED: {e}")
# RUN THE PATCH BEFORE COMPILING
patch_server_code()
print("\n--- [PHASE 2] COMPILING RULES ---")
# Only compile if the output file doesn't exist to save startup time on reboots
if not os.path.exists(os.path.join(SERVER_DIR, "models", "grammar.bin")): # Heuristic check
try:
subprocess.run(
["java", "-Datd.lowmem=true", "-Xmx1024M", "-jar", "lib/sleep.jar", "utils/rules/rules.sl"],
cwd=SERVER_DIR,
check=True
)
print("Rules compiled successfully.")
except subprocess.CalledProcessError:
print("Rule compilation warning (ignoring)...")
# =============================================================================
# PHASE 2: SERVER MANAGEMENT
# =============================================================================
def start_backend():
print("\n--- [PHASE 3] STARTING SERVER ---")
cp_sep = ";" if os.name == 'nt' else ":"
# We explicitly add spellutils.jar to classpath here, rendering the 'from:' syntax obsolete
classpath = f"lib/sleep.jar{cp_sep}lib/moconti.jar{cp_sep}lib/spellutils.jar"
sleep_cp = f"lib{cp_sep}service/code"
cmd = [
"java",
"-Dfile.encoding=UTF-8",
"-XX:+AggressiveHeap",
"-XX:+UseParallelGC",
"-Datd.lowmem=true",
"-Dbind.interface=127.0.0.1",
f"-Dserver.port={PORT}",
f"-Dsleep.classpath={sleep_cp}",
"-Dsleep.debug=24",
"-classpath", classpath,
"httpd.Moconti",
"atdconfig.sl"
]
# Start in SERVER_DIR so relative paths (./models) work
return subprocess.Popen(cmd, cwd=SERVER_DIR)
def wait_for_port(timeout=60):
print(f"Waiting for port {PORT}...")
start = time.time()
while time.time() - start < timeout:
try:
with socket.create_connection((HOST, PORT), timeout=1):
print("Server is Online!")
return True
except (ConnectionRefusedError, OSError):
time.sleep(1)
return False
# =============================================================================
# PHASE 3: CLIENT
# =============================================================================
class AtDClient:
def check_document(self, text):
try:
conn = http.client.HTTPConnection(HOST, PORT, timeout=5)
params = urllib.parse.urlencode({'key': 'gradio', 'data': text})
headers = {"Content-Type": "application/x-www-form-urlencoded"}
conn.request("POST", "/checkDocument", params, headers)
resp = conn.getresponse()
# If server is still warming up, it might return 503 or connection drop
if resp.status != 200:
print(f"Server returned status: {resp.status}")
return []
data = resp.read()
# Basic check for valid XML
if not data.strip().startswith(b"<"):
return []
root = ET.fromstring(data)
errors = []
for e in root.findall('error'):
err = {
'string': e.find('string').text,
'description': e.find('description').text,
'type': e.find('type').text,
'precontext': e.find('precontext').text or "",
'suggestions': []
}
sug = e.find('suggestions')
if sug is not None:
err['suggestions'] = [o.text for o in sug.findall('option') if o.text]
errors.append(err)
return errors
except Exception as e:
print(f"Client Error: {e}")
return []
client = AtDClient()
def analyze_text(text):
if not text.strip(): return []
errors = client.check_document(text)
output = []
last_pos = 0
for err in errors:
word = err['string']
search_start = last_pos
# Context aware search
if err['precontext']:
context_idx = text.find(err['precontext'], last_pos)
if context_idx != -1:
search_start = context_idx + len(err['precontext'])
idx = text.find(word, search_start)
if idx != -1:
if idx > last_pos:
output.append((text[last_pos:idx], None))
label = f"{err['type']}: {err['description']}"
if err['suggestions']:
label += f" -> {', '.join(err['suggestions'][:3])}"
output.append((text[idx:idx+len(word)], label))
last_pos = idx + len(word)
if last_pos < len(text):
output.append((text[last_pos:], None))
return output
if __name__ == "__main__":
setup_server()
server_proc = start_backend()
# Give it a moment to fail if java crashes immediately
time.sleep(2)
if server_proc.poll() is not None:
print("Java server crashed immediately. Checking logs...")
print(server_proc.stdout)
print(server_proc.stderr)
sys.exit(1)
if wait_for_port():
with gr.Blocks(title="AtD Self-Hosted") as demo:
gr.Markdown("# 🛡️ After The Deadline (Self-Installing)")
gr.Markdown("Java 17 Compatible | Auto-Patching | Auto-Models")
with gr.Row():
inp = gr.Textbox(label="Input", placeholder="Type here... e.g., I has a error.", lines=6)
out = gr.HighlightedText(label="Corrections", combine_adjacent=True)
btn = gr.Button("Check Text", variant="primary")
btn.click(analyze_text, inputs=inp, outputs=out)
demo.launch(server_name="0.0.0.0", server_port=7860)
else:
print("FATAL: Server did not start.")
server_proc.kill()