Spaces:

WARAJA
/

Tzefa

Sleeping

App Files Files Community

WARAJA commited on Mar 7

Commit

35febfb

verified ·

1 Parent(s): 40e0d16

Update Tzefa Space (full pipeline)

Browse files

Files changed (9) hide show

app.py +434 -488
demo.png +2 -2
language/ErrorCorrection.py +474 -364
language/Number2Name.py +24 -15
language/__pycache__/topy.cpython-313.pyc +0 -0
language/createdpython.py +821 -596
language/dialects.py +175 -0
language/topy.py +281 -351
requirements.txt +18 -17

app.py CHANGED Viewed

@@ -1,488 +1,434 @@
-"""
-Tzefa - Complete Pipeline Demo Space
-Image -> Binarization -> Line Segmentation -> Word Segmentation -> OCR ->
-Error Correction -> Compilation -> Execution
-All models loaded from their HF repos. Modular: swap weights and this updates.
-Language files (ErrorCorrection, topy, createdpython, Number2Name) are bundled in language/
-"""
-import os
-import gc
-import sys
-import subprocess
-import importlib
-import traceback
-import cv2
-import torch
-import numpy as np
-from PIL import Image
-import gradio as gr
-from huggingface_hub import hf_hub_download
-import segmentation_models_pytorch as smp
-import torch.nn as nn
-import torch.nn.functional as F
-from transformers import TrOCRProcessor, VisionEncoderDecoderModel
-from ultralytics import YOLO
-# Add language/ to path so ErrorCorrection can import Number2Name etc.
-SPACE_DIR = os.path.dirname(os.path.abspath(__file__))
-sys.path.insert(0, SPACE_DIR)
-from language import ErrorCorrection, topy
-# ══════════════════════════════════════════════════════════════
-# CONFIG
-# ══════════════════════════════════════════════════════════════
-# Fetches the token from your Space's Secrets for downloading private models
-HF_TOKEN = os.environ.get("HF_TOKEN")
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-BIN_B3_REPO = "WARAJA/Model"
-BIN_B3_FILE = "b3_model.pth"
-BIN_B5_REPO = "WARAJA/b5_model"
-BIN_B5_FILE = "b5_model.pth"
-YOLO_REPO   = "WARAJA/Tzefa-Line-Segmentation-YOLO"
-YOLO_FILE   = "best.pt"
-TROCR_REPO  = "WARAJA/Tzefa-Word-OCR-TrOCR"
-TROCR_BASE_PROC = "microsoft/trocr-small-stage1"
-TILE_SIZE = 640
-YOLO_IMGSZ = 640
-TARGET_WORDS = 3
-MAX_DILATE_ITERS = 200
-# ══════════════════════════════════════════════════════════════
-# 1. BINARIZATION
-# ══════════════════════════════════════════════════════════════
-class HighResMAnet(nn.Module):
-    def __init__(self, encoder_name="mit_b5", classes=1):
-        super().__init__()
-        self.base_model = smp.MAnet(
-            encoder_name=encoder_name, encoder_weights=None,
-            in_channels=3, classes=classes, encoder_depth=5,
-            decoder_channels=(256, 128, 64, 32, 16),
-        )
-        self.high_res_stem = nn.Sequential(
-            nn.Conv2d(3, 16, 3, padding=1), nn.BatchNorm2d(16), nn.ReLU(True),
-            nn.Conv2d(16, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(True),
-        )
-        self.final_fusion = nn.Sequential(
-            nn.Conv2d(48, 16, 3, padding=1), nn.ReLU(True),
-            nn.Conv2d(16, classes, 1),
-        )
-    def forward(self, x):
-        hr = self.high_res_stem(x)
-        feat = self.base_model.encoder(x)
-        dec = self.base_model.decoder(feat)
-        return self.final_fusion(torch.cat([dec, hr], dim=1))
-def _load_bin_models():
-    models = {}
-    b3_path = hf_hub_download(BIN_B3_REPO, BIN_B3_FILE, token=HF_TOKEN, repo_type="space")
-    m3 = smp.Unet(encoder_name="mit_b3", encoder_weights=None, in_channels=3, classes=1)
-    ckpt3 = torch.load(b3_path, map_location=DEVICE)
-    m3.load_state_dict(ckpt3.get("model_state_dict", ckpt3))
-    models["mit_b3 (Standard)"] = m3.to(DEVICE).eval()
-    b5_path = hf_hub_download(BIN_B5_REPO, BIN_B5_FILE, token=HF_TOKEN, repo_type="model")
-    m5 = HighResMAnet(encoder_name="mit_b5")
-    ckpt5 = torch.load(b5_path, map_location=DEVICE)
-    m5.load_state_dict(ckpt5.get("model_state_dict", ckpt5))
-    models["mit_b5 (HighRes)"] = m5.to(DEVICE).eval()
-    return models
-def _preprocess_tile(pil_img):
-    arr = np.array(pil_img).astype(np.float32) / 255.0
-    mean, std = np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225])
-    return torch.from_numpy(((arr - mean) / std).transpose(2, 0, 1))
-def binarize(pil_img, model):
-    orig_w, orig_h = pil_img.size
-    pad_w = (TILE_SIZE - orig_w % TILE_SIZE) % TILE_SIZE
-    pad_h = (TILE_SIZE - orig_h % TILE_SIZE) % TILE_SIZE
-    padded = Image.new("RGB", (orig_w + pad_w, orig_h + pad_h), (255, 255, 255))
-    padded.paste(pil_img, (0, 0))
-    nw, nh = padded.size
-    canvas = Image.new("L", (nw, nh), 255)
-    for y in range(0, nh, TILE_SIZE):
-        for x in range(0, nw, TILE_SIZE):
-            tile = padded.crop((x, y, x + TILE_SIZE, y + TILE_SIZE))
-            t = _preprocess_tile(tile).unsqueeze(0).to(DEVICE).float()
-            with torch.no_grad():
-                logits = model(t)
-                if logits.shape[-2:] != (TILE_SIZE, TILE_SIZE):
-                    logits = F.interpolate(logits, (TILE_SIZE, TILE_SIZE), mode="bilinear")
-                mask = (torch.sigmoid(logits) > 0.5).float().cpu().numpy()[0, 0]
-            canvas.paste(Image.fromarray(((1.0 - mask) * 255).astype(np.uint8)), (x, y))
-    return canvas.crop((0, 0, orig_w, orig_h))
-# ══════════════════════════════════════════════════════════════
-# 2. LINE SEGMENTATION
-# ══════════════════════════════════════════════════════════════
-def _load_yolo():
-    path = hf_hub_download(YOLO_REPO, YOLO_FILE, token=HF_TOKEN, repo_type="model")
-    return YOLO(path)
-def segment_lines(bin_arr, yolo_model):
-    img_rgb = cv2.cvtColor(bin_arr, cv2.COLOR_GRAY2RGB) if len(bin_arr.shape) == 2 else bin_arr
-    orig_h, orig_w = img_rgb.shape[:2]
-    results = yolo_model.predict(img_rgb, imgsz=YOLO_IMGSZ, conf=0.18, iou=0.18, verbose=False)
-    truelines = []
-    if len(results) > 0 and results[0].obb is not None:
-        obbs = sorted(results[0].obb.xyxyxyxy.cpu().numpy(), key=lambda p: np.min(p[:, 1]))
-        for pts in obbs:
-            rx0, rx1 = np.min(pts[:, 0]), np.max(pts[:, 0])
-            ry0, ry1 = np.min(pts[:, 1]), np.max(pts[:, 1])
-            pad = (rx1 - rx0) * 0.12
-            x0 = int(np.clip(rx0 - pad, 0, orig_w))
-            x1 = int(np.clip(rx1 + pad, 0, orig_w))
-            y0, y1 = int(np.clip(ry0, 0, orig_h)), int(np.clip(ry1, 0, orig_h))
-            if x1 - x0 > 0 and y1 - y0 > 0:
-                truelines.append((x0, y0, x1 - x0, y1 - y0))
-    return truelines
-# ══════════════════════════════════════════════════════════════
-# 3. WORD SEGMENTATION
-# ══════════════════════════════════════════════════════════════
-def _get_word_boxes(dilated, min_w, min_h):
-    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    boxes = sorted([b for b in [cv2.boundingRect(c) for c in contours] if b[2] >= min_w and b[3] >= min_h],
-                   key=lambda b: b[0])
-    return boxes
-def segment_words(bin_arr, lines):
-    words_dict = {}
-    for i, (lx, ly, lw, lh) in enumerate(lines):
-        ih, iw = bin_arr.shape[:2]
-        ly, lx = max(0, ly), max(0, lx)
-        lh, lw = min(lh, ih - ly), min(lw, iw - lx)
-        if lw <= 0 or lh <= 0:
-            continue
-        crop = bin_arr[ly:ly+lh, lx:lx+lw]
-        inv = cv2.bitwise_not(crop)
-        min_ww, min_wh = max(5, int(lw * 0.02)), max(5, int(lh * 0.25))
-        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
-        dilated, prev, found = inv.copy(), None, False
-        for _ in range(MAX_DILATE_ITERS):
-            dilated = cv2.dilate(dilated, kernel, iterations=1)
-            boxes = _get_word_boxes(dilated, min_ww, min_wh)
-            if len(boxes) == TARGET_WORDS:
-                prev = boxes; found = True; break
-            elif len(boxes) < TARGET_WORDS:
-                break
-            else:
-                prev = boxes
-        if not found and prev and len(prev) > TARGET_WORDS:
-            while len(prev) > TARGET_WORDS:
-                gaps = [(prev[j+1][0] - (prev[j][0]+prev[j][2]), j) for j in range(len(prev)-1)]
-                _, mi = min(gaps)
-                b1, b2 = prev[mi], prev[mi+1]
-                merged = (min(b1[0],b2[0]), min(b1[1],b2[1]),
-                          max(b1[0]+b1[2],b2[0]+b2[2])-min(b1[0],b2[0]),
-                          max(b1[1]+b1[3],b2[1]+b2[3])-min(b1[1],b2[1]))
-                prev = list(prev); prev[mi] = merged; prev.pop(mi+1)
-            found = True
-        if not found or not prev or len(prev) != TARGET_WORDS:
-            continue
-        line_words = {}
-        for wi, (wx, wy, ww, wh) in enumerate(prev):
-            line_words[wi+1] = (wx, wx+ww)
-        words_dict[i+1] = line_words
-    return words_dict
-# ══════════════════════════════════════════════════════════════
-# 4. OCR
-# ══════════════════════════════════════════════════════════════
-def _load_trocr():
-    proc = TrOCRProcessor.from_pretrained(TROCR_BASE_PROC, use_fast=False)
-    model = VisionEncoderDecoderModel.from_pretrained(TROCR_REPO, token=HF_TOKEN).to(DEVICE).eval()
-    return proc, model
-def _pad_aspect(img, max_ratio=4.0):
-    w, h = img.size
-    if w <= max_ratio * h:
-        return img
-    th = int(w / max_ratio)
-    pad = th - h
-    from PIL import ImageOps
-    return ImageOps.expand(img, (0, pad//2, 0, pad - pad//2), fill=(255,255,255))
-def ocr_word(img_pil, proc, model):
-    if img_pil.mode != "RGB":
-        img_pil = img_pil.convert("RGB")
-    img_pil = _pad_aspect(img_pil)
-    pv = proc(img_pil, return_tensors="pt").pixel_values.to(DEVICE)
-    with torch.no_grad():
-        ids = model.generate(pv)
-    txt = proc.batch_decode(ids, skip_special_tokens=True)[0]
-    parts = txt.split()
-    return max(parts, key=len) if parts else txt
-# ══════════════════════════════════════════════════════════════
-# 5. VISUALIZATION
-# ══════════════════════════════════════════════════════════════
-def draw_line_bboxes(img_arr, bboxes):
-    vis = cv2.cvtColor(img_arr, cv2.COLOR_GRAY2RGB) if len(img_arr.shape) == 2 else img_arr.copy()
-    for i, (x, y, w, h) in enumerate(bboxes):
-        cv2.rectangle(vis, (x, y), (x+w, y+h), (255, 50, 50), 2)
-        cv2.putText(vis, str(i+1), (x, max(y-5, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (50, 50, 255), 2)
-    return vis
-def draw_word_bboxes(img_arr, word_tuples):
-    vis = cv2.cvtColor(img_arr, cv2.COLOR_GRAY2RGB) if len(img_arr.shape) == 2 else img_arr.copy()
-    colors = [(50, 220, 50), (50, 180, 255), (255, 180, 50)]
-    for lt in word_tuples:
-        for wi, (text, (x1, y1, x2, y2)) in enumerate(lt):
-            c = colors[wi % len(colors)]
-            cv2.rectangle(vis, (x1, y1), (x2, y2), c, 2)
-            cv2.putText(vis, text, (x1, max(y1-4, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.45, c, 1)
-    return vis
-# ══════════════════════════════════════════════════════════════
-# 6. CLEAR VRAM
-# ══════════════════════════════════════════════════════════════
-def clear_vram():
-    gc.collect()
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-# ══════════════════════════════════════════════════════════════
-# 7. CODE EXECUTION
-# ══════════════════════════════════════════════════════════════
-def execute_code(compiled_code):
-    try:
-        result = subprocess.run(
-            [sys.executable, "-c", compiled_code],
-            capture_output=True, text=True, timeout=15,
-            cwd=SPACE_DIR,
-        )
-        output = result.stdout
-        if result.stderr:
-            output += "\n--- STDERR ---\n" + result.stderr
-        if result.returncode != 0:
-            output += f"\n[Process exited with code {result.returncode}]"
-        return output.strip() if output.strip() else "(no output)"
-    except subprocess.TimeoutExpired:
-        return "[Execution timed out after 15 seconds]"
-    except Exception as e:
-        return f"[Execution error: {e}]"
-# ══════════════════════════════════════════════════════════════
-# 8. FULL PIPELINE
-# ══════════════════════════════════════════════════════════════
-def run_full_pipeline(input_image, bin_model_choice):
-    """Returns: binarized, line_vis, word_vis, raw_ocr, corrected, compiled, execution, status"""
-    if input_image is None:
-        return None, None, None, "", "", "", "", "No image provided."
-    if isinstance(input_image, np.ndarray):
-        pil_img = Image.fromarray(input_image).convert("RGB")
-    else:
-        pil_img = input_image.convert("RGB")
-    status = []
-    # Reset language global state between runs
-    importlib.reload(ErrorCorrection)
-    importlib.reload(topy)
-    # ── Stage 1: Binarization ──
-    try:
-        status.append("[1/6] Binarization...")
-        bin_models = _load_bin_models()
-        model = bin_models[bin_model_choice]
-        bin_pil = binarize(pil_img, model)
-        bin_arr = np.array(bin_pil)
-        del bin_models; clear_vram()
-        status.append("  OK")
-    except Exception as e:
-        status.append(f"  Failed!\n\n--- ERROR LOG ---\n{traceback.format_exc()}")
-        return None, None, None, "", "", "", "", "\n".join(status)
-    # ── Stage 2: Line Segmentation ──
-    try:
-        status.append("[2/6] Line Segmentation...")
-        yolo_model = _load_yolo()
-        truelines = segment_lines(bin_arr, yolo_model)
-        del yolo_model; clear_vram()
-        status.append(f"  Found {len(truelines)} lines")
-        line_vis = draw_line_bboxes(bin_arr, truelines)
-    except Exception as e:
-        status.append(f"  Failed!\n\n--- ERROR LOG ---\n{traceback.format_exc()}")
-        return bin_arr, None, None, "", "", "", "", "\n".join(status)
-    # ── Stage 3: Word Seg + OCR ──
-    try:
-        status.append("[3/6] Word Segmentation + OCR...")
-        words = segment_words(bin_arr, truelines)
-        proc, trocr_model = _load_trocr()
-        all_line_tuples, raw_lines = [], []
-        for ln in sorted(words.keys()):
-            if ln - 1 >= len(truelines):
-                continue
-            lx, ly, lw, lh = truelines[ln - 1]
-            line_tuples = []
-            for wn in sorted(words[ln].keys()):
-                wx1, wx2 = words[ln][wn]
-                ax1, ax2 = max(0, int(lx + wx1)), min(bin_arr.shape[1], int(lx + wx2))
-                ay1, ay2 = max(0, ly - 20), min(bin_arr.shape[0], ly + lh + 20)
-                crop_pil = Image.fromarray(bin_arr[ay1:ay2, ax1:ax2])
-                text = ocr_word(crop_pil, proc, trocr_model)
-                line_tuples.append((text, (ax1, ay1, ax2, ay2)))
-            raw_lines.append(" ".join(t[0] for t in line_tuples))
-            all_line_tuples.append(line_tuples)
-        del proc, trocr_model; clear_vram()
-        word_vis = draw_word_bboxes(bin_arr, all_line_tuples)
-        raw_text = "\n".join(raw_lines)
-        status.append(f"  {len(raw_lines)} lines recognized")
-    except Exception as e:
-        status.append(f"  Failed!\n\n--- ERROR LOG ---\n{traceback.format_exc()}")
-        return bin_arr, line_vis, None, "", "", "", "", "\n".join(status)
-    # ── Stage 4: Error Correction ──
-    try:
-        status.append("[4/6] Error Correction...")
-        ErrorCorrection.sendlines(len(truelines))
-        index_list, corrected_lines = [], []
-        for line_entries in all_line_tuples:
-            if not line_entries:
-                corrected_lines.append(""); index_list.append(0); continue
-            raw_tokens = [t[0].upper() for t in line_entries]
-            while len(raw_tokens) < 3:
-                raw_tokens.append("")
-            raw_tokens = raw_tokens[:3]
-            cleaned_first, index, _ = ErrorCorrection.handelfirstword(raw_tokens[0])
-            index_list.append(index)
-            simpler = ErrorCorrection.listsimplefunc[index]
-            if simpler[1] == 0:
-                bucket_idx = simpler[2]
-                if isinstance(bucket_idx, int) and bucket_idx < len(ErrorCorrection.listall):
-                    bucket = ErrorCorrection.listall[bucket_idx]
-                    if raw_tokens[1] and raw_tokens[1] not in bucket:
-                        bucket.append(raw_tokens[1])
-            corrected_lines.append(f"{cleaned_first} {raw_tokens[1]} {raw_tokens[2]}")
-        corrected_text = "\n".join(corrected_lines)
-        status.append("  OK")
-    except Exception as e:
-        status.append(f"  Failed!\n\n--- ERROR LOG ---\n{traceback.format_exc()}")
-        return bin_arr, line_vis, word_vis, raw_text, "", "", "", "\n".join(status)
-    # ── Stage 5: Compilation ──
-    try:
-        status.append("[5/6] Compilation...")
-        linelist = []
-        for i in range(len(corrected_lines)):
-            idx = index_list[i] if i < len(index_list) else 0
-            line_obj = ErrorCorrection.toline(corrected_lines[i], idx, ErrorCorrection.giveindents())
-            linelist.append(line_obj)
-        listfunctions_out, listezfunctions_out = ErrorCorrection.giveinstructions()
-        topy.getinstructions(listfunctions_out, listezfunctions_out)
-        compiled = ["from language.createdpython import *"]
-        counterindent = 0
-        for i in range(1, len(linelist) + 1):
-            counterindent += topy.listofindentchanges[i]
-            compiled.append("    " * counterindent + topy.makepredict(linelist[i - 1], i))
-        compiled.append("printvars()")
-        compiled_code = "\n".join(compiled)
-        status.append("  OK")
-    except Exception as e:
-        status.append(f"  Failed!\n\n--- ERROR LOG ---\n{traceback.format_exc()}")
-        return bin_arr, line_vis, word_vis, raw_text, corrected_text, "", "", "\n".join(status)
-    # ── Stage 6: Execution ──
-    try:
-        status.append("[6/6] Execution...")
-        exec_output = execute_code(compiled_code)
-        # If the pipeline successfully made it here, overwrite the status logs
-        # with the actual output of the code so it displays in the main terminal box.
-        final_status = exec_output
-    except Exception as e:
-        exec_output = f"Execution error: {e}"
-        status.append(f"  Failed!\n\n--- ERROR LOG ---\n{traceback.format_exc()}")
-        final_status = "\n".join(status)
-    return bin_arr, line_vis, word_vis, raw_text, corrected_text, compiled_code, exec_output, final_status
-# ══════════════════════════════════════════════════════════════
-# 9. GRADIO UI
-# ═══════════════════════��══════════════════════════════════════
-with gr.Blocks(title="Tzefa - Handwritten Code to Execution", theme=gr.themes.Soft()) as demo:
-    gr.Markdown(
-        "# Tzefa - Handwritten Code to Execution\n"
-        "Upload a photo of handwritten Tzefa code. The pipeline runs binarization, "
-        "line detection, word OCR, error correction, compilation, and execution."
-    )
-    with gr.Row():
-        with gr.Column(scale=1):
-            input_image = gr.Image(type="pil", label="Upload Image")
-            bin_choice = gr.Dropdown(
-                choices=["mit_b3 (Standard)", "mit_b5 (HighRes)"],
-                value="mit_b5 (HighRes)",
-                label="Binarization Model",
-            )
-            run_btn = gr.Button("Run Full Pipeline", variant="primary", size="lg")
-        with gr.Column(scale=1):
-            status_box = gr.Textbox(label="Terminal / Pipeline Status", lines=12, interactive=False)
-    with gr.Tabs():
-        with gr.Tab("Binarized"):
-            bin_out = gr.Image(label="Binarized Image")
-        with gr.Tab("Line Detection"):
-            line_out = gr.Image(label="Line Bounding Boxes")
-        with gr.Tab("Word Detection + OCR"):
-            word_out = gr.Image(label="Word Bboxes with OCR Labels")
-        with gr.Tab("Raw OCR"):
-            raw_out = gr.Textbox(label="Raw OCR (before correction)", lines=15, interactive=False)
-        with gr.Tab("Error Corrected"):
-            corrected_out = gr.Textbox(label="After Error Correction", lines=15, interactive=False)
-        with gr.Tab("Compiled Python"):
-            compiled_out = gr.Code(language="python", label="Generated Python Code")
-        with gr.Tab("Execution Output"):
-            exec_out = gr.Textbox(label="Program Output", lines=10, interactive=False)
-    run_btn.click(
-        fn=run_full_pipeline,
-        inputs=[input_image, bin_choice],
-        outputs=[bin_out, line_out, word_out, raw_out, corrected_out, compiled_out, exec_out, status_box],
-        api_name="predict"
-    )
-    gr.Examples(
-        examples=[["demo.png", "mit_b5 (HighRes)"]],
-        inputs=[input_image, bin_choice],
-        label="Example Images"
-    )
-    gr.Markdown(
-        "### Resources\n"
-        "| Component | Link |\n"
-        "|-----------|------|\n"
-        "| Binarization Demo | [WARAJA/Tzefa-Binarization](https://huggingface.co/spaces/WARAJA/Tzefa-Binarization) |\n"
-        "| b5 Model | [WARAJA/b5_model](https://huggingface.co/WARAJA/b5_model) |\n"
-        "| YOLO Model | [WARAJA/Tzefa-Line-Segmentation-YOLO](https://huggingface.co/WARAJA/Tzefa-Line-Segmentation-YOLO) |\n"
-        "| TrOCR Model | [WARAJA/Tzefa-Word-OCR-TrOCR](https://huggingface.co/WARAJA/Tzefa-Word-OCR-TrOCR) |\n"
-        "| Binarization Dataset | [WARAJA/Tzefa-Binarization-Dataset](https://huggingface.co/datasets/WARAJA/Tzefa-Binarization-Dataset) |\n"
-        "| Line Seg Dataset | [WARAJA/Tzefa-Line-Segmentation-Dataset](https://huggingface.co/datasets/WARAJA/Tzefa-Line-Segmentation-Dataset) |\n"
-        "| Word OCR Dataset | [WARAJA/Tzefa-Word-OCR-Dataset](https://huggingface.co/datasets/WARAJA/Tzefa-Word-OCR-Dataset) |"
-    )
-if __name__ == "__main__":
-    demo.queue().launch()

+"""
+Tzefa - Complete Pipeline Demo Space
+Image → Binarization → Line Segmentation → Word Segmentation → OCR →
+Error Correction → Compilation → Execution
+Supports:
+  - Dialect toggle: 3-word (classic) / 4-word (verbose)
+  - Line segmentation toggle: YOLO (trained model) / Surya (general detector)
+  - Binarization model toggle: mit_b3 / mit_b5
+"""
+import os
+import gc
+import sys
+import subprocess
+import importlib
+import traceback
+import cv2
+import torch
+import numpy as np
+from PIL import Image
+import gradio as gr
+from huggingface_hub import hf_hub_download
+import segmentation_models_pytorch as smp
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+from ultralytics import YOLO
+SPACE_DIR = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, SPACE_DIR)
+from language.dialects import THREE_WORD, FOUR_WORD, CAPS_ONLY, MIXED_CASE
+from language.ErrorCorrection import TzefaParser
+from language import topy
+# ══════════════════════════════════════════════════════════════
+# CONFIG
+# ══════════════════════════════════════════════════════════════
+HF_TOKEN = os.environ.get("HF_TOKEN")
+DEVICE   = "cuda" if torch.cuda.is_available() else "cpu"
+BIN_B3_REPO     = "WARAJA/Model"
+BIN_B3_FILE     = "b3_model.pth"
+BIN_B5_REPO     = "WARAJA/b5_model"
+BIN_B5_FILE     = "b5_model.pth"
+YOLO_REPO       = "WARAJA/Tzefa-Line-Segmentation-YOLO"
+YOLO_FILE       = "best.pt"
+TROCR_REPO      = "WARAJA/Tzefa-Word-OCR-TrOCR"
+TROCR_BASE_PROC = "microsoft/trocr-small-stage1"
+TILE_SIZE        = 640
+YOLO_IMGSZ       = 640
+MAX_DILATE_ITERS = 200
+_DIALECT_MAP = {"4-word (verbose)": FOUR_WORD, "3-word (classic)": THREE_WORD}
+_CASING_MAP  = {"CAPS only": CAPS_ONLY, "Mixed case": MIXED_CASE}
+# ══════════════════════════════════════════════════════════════
+# 1. BINARIZATION
+# ══════════════════════════════════════════════════════════════
+class HighResMAnet(nn.Module):
+    def __init__(self, encoder_name="mit_b5", classes=1):
+        super().__init__()
+        self.base_model = smp.MAnet(
+            encoder_name=encoder_name, encoder_weights=None,
+            in_channels=3, classes=classes, encoder_depth=5,
+            decoder_channels=(256, 128, 64, 32, 16),
+        )
+        self.high_res_stem = nn.Sequential(
+            nn.Conv2d(3, 16, 3, padding=1), nn.BatchNorm2d(16), nn.ReLU(True),
+            nn.Conv2d(16, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(True),
+        )
+        self.final_fusion = nn.Sequential(
+            nn.Conv2d(48, 16, 3, padding=1), nn.ReLU(True),
+            nn.Conv2d(16, classes, 1),
+        )
+    def forward(self, x):
+        hr   = self.high_res_stem(x)
+        feat = self.base_model.encoder(x)
+        dec  = self.base_model.decoder(feat)
+        return self.final_fusion(torch.cat([dec, hr], dim=1))
+def _load_bin_models():
+    models = {}
+    b3_path = hf_hub_download(BIN_B3_REPO, BIN_B3_FILE, token=HF_TOKEN, repo_type="space")
+    m3 = smp.Unet(encoder_name="mit_b3", encoder_weights=None, in_channels=3, classes=1)
+    ckpt3 = torch.load(b3_path, map_location=DEVICE)
+    m3.load_state_dict(ckpt3.get("model_state_dict", ckpt3))
+    models["mit_b3 (Standard)"] = m3.to(DEVICE).eval()
+    b5_path = hf_hub_download(BIN_B5_REPO, BIN_B5_FILE, token=HF_TOKEN, repo_type="model")
+    m5 = HighResMAnet(encoder_name="mit_b5")
+    ckpt5 = torch.load(b5_path, map_location=DEVICE)
+    m5.load_state_dict(ckpt5.get("model_state_dict", ckpt5))
+    models["mit_b5 (HighRes)"] = m5.to(DEVICE).eval()
+    return models
+def _preprocess_tile(pil_img):
+    arr  = np.array(pil_img).astype(np.float32) / 255.0
+    mean = np.array([0.485, 0.456, 0.406])
+    std  = np.array([0.229, 0.224, 0.225])
+    return torch.from_numpy(((arr - mean) / std).transpose(2, 0, 1))
+def binarize(pil_img, model):
+    orig_w, orig_h = pil_img.size
+    pad_w  = (TILE_SIZE - orig_w % TILE_SIZE) % TILE_SIZE
+    pad_h  = (TILE_SIZE - orig_h % TILE_SIZE) % TILE_SIZE
+    padded = Image.new("RGB", (orig_w + pad_w, orig_h + pad_h), (255, 255, 255))
+    padded.paste(pil_img, (0, 0))
+    nw, nh = padded.size
+    canvas = Image.new("L", (nw, nh), 255)
+    for y in range(0, nh, TILE_SIZE):
+        for x in range(0, nw, TILE_SIZE):
+            tile = padded.crop((x, y, x + TILE_SIZE, y + TILE_SIZE))
+            t = _preprocess_tile(tile).unsqueeze(0).to(DEVICE).float()
+            with torch.no_grad():
+                logits = model(t)
+                if logits.shape[-2:] != (TILE_SIZE, TILE_SIZE):
+                    logits = F.interpolate(logits, (TILE_SIZE, TILE_SIZE), mode="bilinear")
+                mask = (torch.sigmoid(logits) > 0.5).float().cpu().numpy()[0, 0]
+            canvas.paste(Image.fromarray(((1.0 - mask) * 255).astype(np.uint8)), (x, y))
+    return canvas.crop((0, 0, orig_w, orig_h))
+# ══════════════════════════════════════════════════════════════
+# 2. LINE SEGMENTATION
+# ══════════════════════════════════════════════════════════════
+def _load_yolo():
+    path = hf_hub_download(YOLO_REPO, YOLO_FILE, token=HF_TOKEN, repo_type="model")
+    return YOLO(path)
+def segment_lines_yolo(bin_arr, yolo_model):
+    img_rgb  = cv2.cvtColor(bin_arr, cv2.COLOR_GRAY2RGB) if len(bin_arr.shape) == 2 else bin_arr
+    orig_h, orig_w = img_rgb.shape[:2]
+    results  = yolo_model.predict(img_rgb, imgsz=YOLO_IMGSZ, conf=0.2, iou=0.2, verbose=False)
+    truelines = []
+    if len(results) > 0 and results[0].obb is not None:
+        obbs = sorted(results[0].obb.xyxyxyxy.cpu().numpy(), key=lambda p: np.min(p[:, 1]))
+        for pts in obbs:
+            rx0, rx1 = np.min(pts[:, 0]), np.max(pts[:, 0])
+            ry0, ry1 = np.min(pts[:, 1]), np.max(pts[:, 1])
+            pad = (rx1 - rx0) * 0.12
+            x0 = int(np.clip(rx0 - pad, 0, orig_w))
+            x1 = int(np.clip(rx1 + pad, 0, orig_w))
+            y0 = int(np.clip(ry0, 0, orig_h))
+            y1 = int(np.clip(ry1, 0, orig_h))
+            if x1 - x0 > 0 and y1 - y0 > 0:
+                truelines.append((x0, y0, x1 - x0, y1 - y0))
+    return truelines
+_surya_predictor = None
+def segment_lines_surya(bin_arr):
+    global _surya_predictor
+    os.environ.setdefault("DETECTOR_TEXT_THRESHOLD", "0.75")
+    os.environ.setdefault("DETECTOR_BLANK_THRESHOLD", "0.45")
+    try:
+        from surya.detection import DetectionPredictor
+    except ImportError:
+        raise RuntimeError("surya-ocr not installed. Add 'surya-ocr' to requirements.txt.")
+    if _surya_predictor is None:
+        _surya_predictor = DetectionPredictor()
+    img_rgb   = cv2.cvtColor(bin_arr, cv2.COLOR_GRAY2RGB) if len(bin_arr.shape) == 2 else bin_arr
+    pil_image = Image.fromarray(img_rgb)
+    predictions = _surya_predictor([pil_image])
+    CONF_THRESHOLD = 0.6
+    raw = []
+    if predictions and predictions[0].bboxes:
+        for bbox in predictions[0].bboxes:
+            conf = getattr(bbox, "confidence", 1.0)
+            if conf < CONF_THRESHOLD:
+                continue
+            x1, y1, x2, y2 = bbox.bbox
+            if (x2 - x1) > 5 and (y2 - y1) > 5:
+                raw.append([float(x1), float(y1), float(x2), float(y2)])
+    raw.sort(key=lambda b: (b[1] + b[3]) / 2)
+    def overlaps_v(a, b):
+        return a[1] < b[3] and b[1] < a[3]
+    merged = []
+    for box in raw:
+        placed = False
+        for m in merged:
+            if overlaps_v(m, box):
+                m[0] = min(m[0], box[0]); m[1] = min(m[1], box[1])
+                m[2] = max(m[2], box[2]); m[3] = max(m[3], box[3])
+                placed = True; break
+        if not placed:
+            merged.append(list(box))
+    merged.sort(key=lambda b: b[1])
+    return [(int(b[0]), int(b[1]), int(b[2]-b[0]), int(b[3]-b[1])) for b in merged]
+# ══════════════════════════════════════════════════════════════
+# 3. WORD SEGMENTATION
+# ══════════════════════════════════════════════════════════════
+def _get_word_boxes(dilated, min_w, min_h):
+    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    return sorted(
+        [b for b in [cv2.boundingRect(c) for c in contours] if b[2] >= min_w and b[3] >= min_h],
+        key=lambda b: b[0],
+    )
+def segment_words(bin_arr, lines, target_words):
+    words_dict = {}
+    for i, (lx, ly, lw, lh) in enumerate(lines):
+        ih, iw = bin_arr.shape[:2]
+        ly, lx = max(0, ly), max(0, lx)
+        lh, lw = min(lh, ih - ly), min(lw, iw - lx)
+        if lw <= 0 or lh <= 0:
+            continue
+        crop   = bin_arr[ly:ly+lh, lx:lx+lw]
+        inv    = cv2.bitwise_not(crop)
+        min_ww = max(5, int(lw * 0.02))
+        min_wh = max(5, int(lh * 0.25))
+        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
+        dilated, prev, found = inv.copy(), None, False
+        for _ in range(MAX_DILATE_ITERS):
+            dilated = cv2.dilate(dilated, kernel, iterations=1)
+            boxes   = _get_word_boxes(dilated, min_ww, min_wh)
+            if len(boxes) == target_words:
+                prev = boxes; found = True; break
+            elif len(boxes) < target_words:
+                break
+            else:
+                prev = boxes
+        if not found and prev and len(prev) > target_words:
+            while len(prev) > target_words:
+                gaps = [(prev[j+1][0] - (prev[j][0]+prev[j][2]), j) for j in range(len(prev)-1)]
+                _, mi = min(gaps)
+                b1, b2 = prev[mi], prev[mi+1]
+                merged = (
+                    min(b1[0],b2[0]), min(b1[1],b2[1]),
+                    max(b1[0]+b1[2],b2[0]+b2[2])-min(b1[0],b2[0]),
+                    max(b1[1]+b1[3],b2[1]+b2[3])-min(b1[1],b2[1]),
+                )
+                prev = list(prev); prev[mi] = merged; prev.pop(mi+1)
+            found = True
+        if not found or not prev or len(prev) != target_words:
+            continue
+        words_dict[i+1] = {wi+1: (wx, wx+ww) for wi, (wx, wy, ww, wh) in enumerate(prev)}
+    return words_dict
+# ══════════════════════════════════════════════════════════════
+# 4. OCR
+# ══════════════════════════════════════════════════════════════
+def _load_trocr():
+    proc  = TrOCRProcessor.from_pretrained(TROCR_BASE_PROC, use_fast=False)
+    model = VisionEncoderDecoderModel.from_pretrained(TROCR_REPO, token=HF_TOKEN).to(DEVICE).eval()
+    return proc, model
+def _pad_aspect(img, max_ratio=4.0):
+    w, h = img.size
+    if w <= max_ratio * h:
+        return img
+    th  = int(w / max_ratio)
+    pad = th - h
+    from PIL import ImageOps
+    return ImageOps.expand(img, (0, pad//2, 0, pad - pad//2), fill=(255, 255, 255))
+def ocr_word(img_pil, proc, model):
+    if img_pil.mode != "RGB":
+        img_pil = img_pil.convert("RGB")
+    img_pil = _pad_aspect(img_pil)
+    pv = proc(img_pil, return_tensors="pt").pixel_values.to(DEVICE)
+    with torch.no_grad():
+        ids = model.generate(pv)
+    txt   = proc.batch_decode(ids, skip_special_tokens=True)[0]
+    parts = txt.split()
+    return max(parts, key=len) if parts else txt
+# ══════════════════════════════════════════════════════════════
+# 5. VISUALISATION
+# ══════════════════════════════════════════════════════════════
+def draw_line_bboxes(img_arr, bboxes):
+    vis = cv2.cvtColor(img_arr, cv2.COLOR_GRAY2RGB) if len(img_arr.shape) == 2 else img_arr.copy()
+    for i, (x, y, w, h) in enumerate(bboxes):
+        cv2.rectangle(vis, (x, y), (x+w, y+h), (255, 50, 50), 2)
+        cv2.putText(vis, str(i+1), (x, max(y-5, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (50, 50, 255), 2)
+    return vis
+def draw_word_bboxes(img_arr, word_tuples):
+    vis    = cv2.cvtColor(img_arr, cv2.COLOR_GRAY2RGB) if len(img_arr.shape) == 2 else img_arr.copy()
+    colors = [(50, 220, 50), (50, 180, 255), (255, 180, 50), (220, 50, 220)]
+    for lt in word_tuples:
+        for wi, (text, (x1, y1, x2, y2)) in enumerate(lt):
+            c = colors[wi % len(colors)]
+            cv2.rectangle(vis, (x1, y1), (x2, y2), c, 2)
+            cv2.putText(vis, text, (x1, max(y1-4, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.45, c, 1)
+    return vis
+# ══════════════════════════════════════════════════════════════
+# 6. UTILITIES
+# ══════════════════════════════════════════════════════════════
+def clear_vram():
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+def execute_code(compiled_code):
+    try:
+        result = subprocess.run(
+            [sys.executable, "-c", compiled_code],
+            capture_output=True, text=True, timeout=15,
+            cwd=SPACE_DIR,
+        )
+        output = result.stdout
+        if result.stderr:
+            output += "\n--- STDERR ---\n" + result.stderr
+        if result.returncode != 0:
+            output += f"\n[Process exited with code {result.returncode}]"
+        return output.strip() if output.strip() else "(no output)"
+    except subprocess.TimeoutExpired:
+        return "[Execution timed out after 15 seconds]"
+    except Exception as e:
+        return f"[Execution error: {e}]"
+# ══════════════════════════════════════════════════════════════
+# 7. FULL PIPELINE
+# ══════════════════════════════════════════════════════════════
+def run_full_pipeline(input_image, bin_model_choice, dialect_choice, casing_choice, seg_method):
+    if input_image is None:
+        return None, None, None, "", "", "", "", "No image provided."
+    if isinstance(input_image, np.ndarray):
+        pil_img = Image.fromarray(input_image).convert("RGB")
+    else:
+        pil_img = input_image.convert("RGB")
+    dialect = _DIALECT_MAP.get(dialect_choice, FOUR_WORD)
+    casing  = _CASING_MAP.get(casing_choice, CAPS_ONLY)
+    status  = []
+    # Fresh language state for every run
+    importlib.reload(topy)
+    parser       = TzefaParser(dialect=dialect, casing=casing)
+    target_words = parser.expected_words_per_line
+    # ── Stage 1: Binarization ──
+    try:
+        status.append("[1/6] Binarization...")
+        bin_models = _load_bin_models()
+        bin_pil    = binarize(pil_img, bin_models[bin_model_choice])
+        bin_arr    = np.array(bin_pil)
+        del bin_models; clear_vram()
+        status.append("  OK")
+    except Exception as e:
+        return None, None, None, "", "", "", "", f"Binarization failed: {e}"
+    # ── Stage 2: Line Segmentation ──
+    try:
+        status.append(f"[2/6] Line Segmentation ({seg_method})...")
+        if seg_method == "Surya":
+            truelines = segment_lines_surya(bin_arr)
+        else:
+            yolo_model = _load_yolo()
+            truelines  = segment_lines_yolo(bin_arr, yolo_model)
+            del yolo_model; clear_vram()
+        status.append(f"  OK  {len(truelines)} lines")
+        line_vis = draw_line_bboxes(bin_arr, truelines)
+    except Exception as e:
+        return bin_arr, None, None, "", "", "", "", f"Line Seg failed: {e}\n{traceback.format_exc()}"
+    # ── Stage 3: Word Seg + OCR ──
+    try:
+        status.append("[3/6] Word Segmentation + OCR...")
+        words             = segment_words(bin_arr, truelines, target_words)
+        proc, trocr_model = _load_trocr()
+        all_line_tuples, raw_lines = [], []
+        for ln in sorted(words.keys()):
+            if ln - 1 >= len(truelines):
+                continue
+            lx, ly, lw, lh = truelines[ln - 1]
+            line_tuples = []
+            for wn in sorted(words[ln].keys()):
+                wx1, wx2 = words[ln][wn]
+                ax1 = max(0, int(lx + wx1))
+                ax2 = min(bin_arr.shape[1], int(lx + wx2))
+                ay1 = max(0, ly - 20)
+                ay2 = min(bin_arr.shape[0], ly + lh + 20)
+                text = ocr_word(Image.fromarray(bin_arr[ay1:ay2, ax1:ax2]), proc, trocr_model)
+                line_tuples.append((text, (ax1, ay1, ax2, ay2)))
+            raw_lines.append(" ".join(t[0] for t in line_tuples))
+            all_line_tuples.append(line_tuples)
+        del proc, trocr_model; clear_vram()
+        word_vis = draw_word_bboxes(bin_arr, all_line_tuples)
+        raw_text = "\n".join(raw_lines)
+        status.append(f"  OK  {len(raw_lines)} lines recognised")
+    except Exception as e:
+        return bin_arr, line_vis, None, "", "", "", "", f"OCR failed: {e}\n{traceback.format_exc()}"
+    # ── Stage 4: Error Correction ──
+    try:
+        status.append("[4/6] Error Correction...")
+        parser.init_indent_table(len(truelines))
+        corrected_lines, bytecode_list = [], []
+        for line_entries in all_line_tuples:
+            if not line_entries:
+                corrected_lines.append("")
+                bytecode_list.append(["MAKE", "INTEGER", "TEMPORARY", "0"])
+                continue
+            raw_tokens = [t[0] for t in line_entries]
+            while len(raw_tokens) < target_words:
+                raw_tokens.append("")
+            raw_tokens  = raw_tokens[:target_words]
+            normalised  = parser.normalize_source_line(raw_tokens)
+            bytecode    = parser.parse_line(normalised)
+            bytecode_list.append(bytecode)
+            corrected_lines.append(" ".join(bytecode))   # post-correction output
+        corrected_text = "\n".join(corrected_lines)
+        status.append("  OK")
+    except Exception as e:
+        return bin_arr, line_vis, word_vis, raw_text, "", "", "", \
+               f"Error Correction failed: {e}\n{traceback.format_exc()}"

demo.png CHANGED Viewed

Git LFS Details

SHA256: 96bb166ee0d15a28d3815b9548335479a48be821dad26a6f9dbfa2c4903d2d87
Pointer size: 132 Bytes
Size of remote file: 8.64 MB

Git LFS Details

SHA256: b36d53a25224741dffa9e1cc009c6282995c9fbf900efaf2bdfaf682918f1298
Pointer size: 132 Bytes
Size of remote file: 8.57 MB

language/ErrorCorrection.py CHANGED Viewed

@@ -1,373 +1,483 @@
 from language import Number2Name
 from fast_edit_distance import edit_distance
-def giveinstructions():
-    ### returns instructions for each function in the language for topy
-    return listfunctions, listezfunc
-listofindents = []
-def updatesizelistofindnets(size):
-    global listofindents
-    listofindents = [0] * (size + 1)
-def tosimple(func):
-    simpler = ["a", "b", "c", "d"]
-    simpler[0] = func[0]
-    if func[1].startswith("NEW"):
-        simpler[1] = 0
-    else:
-        simpler[1] = 1
-    i = 1
-    j = 2
-    if func[i].endswith("INT"):
-        simpler[j] = 0
-    elif func[i].endswith("STR"):
-        simpler[j] = 1
-    elif func[i].endswith("LIST"):
-        simpler[j] = 2
-    elif func[i].endswith("BOOL"):
-        simpler[j] = 3
-    elif func[i].endswith("COND"):
-        simpler[j] = 4
-    elif func[i].endswith("STATE"):
-        simpler[j] = 5
-    elif func[i].endswith("TYPE"):
-        simpler[j] = 6
-    elif func[i].endswith("FUNC"):
-        simpler[j] = 7
-    elif func[i].endswith("TRUTH"):
-        simpler[j] = 8
-    elif func[i].endswith("COMPARE"):
-        simpler[j] = 9
-    elif func[i].endswith("NUMNAME"):
-        simpler[j] = 10
-    elif func[i].endswith("TEXT"):
-        simpler[j] = 11
-    i = 2
-    j = 3
-    if func[i].endswith("INT"):
-        simpler[j] = 0
-    elif func[i].endswith("STR"):
-        simpler[j] = 1
-    elif func[i].endswith("LIST"):
-        simpler[j] = 2
-    elif func[i].endswith("BOOL"):
-        simpler[j] = 3
-    elif func[i].endswith("COND"):
-        simpler[j] = 4
-    elif func[i].endswith("STATE"):
-        simpler[j] = 5
-    elif func[i].endswith("TYPE"):
-        simpler[j] = 6
-    elif func[i].endswith("FUNC"):
-        simpler[j] = 7
-    elif func[i].endswith("TRUTH"):
-        simpler[j] = 8
-    elif func[i].endswith("COMPARE"):
-        simpler[j] = 9
-    elif func[i].endswith("NUMNAME"):
-        simpler[j] = 10
-    elif func[i].endswith("TEXT"):
-        simpler[j] = 11
-    simpler.append(0)
-    return simpler
-# CHANGED: Replaced NUM/INT with NUMNAME for immediate value reading (Index 10)
-listfunctions = [
-    ["MAKEINTEGER", "NEWINT", "NUMNAME"],
-    ["MAKEBOOLEAN", "NEWBOOL", "TRUTH"],
-    ["MAKESTR", "NEWSTR", "TEXT"],
-    ["NEWLIST", "NEWLIST", "NUMNAME"],
-    ["BASICCONDITION", "NEWCOND", "COMPARE"],
-    ["LEFTSIDE", "COND", "INT"],
-    ["RIGHTSIDE", "COND", "INT"],
-    ["CHANGECOMPARE", "COND", "COMPARE"],
-    ["WHILE", "COND", "NUMNAME"],
-    ["COMPARE", "COND", "NUMNAME"],
-    ["ELSECOMPARE", "COND", "NUMNAME"],
-    ["ITERATE", "LIST", "NUMNAME"],
-    ["WHILETRUE", "BOOL", "NUMNAME"],
-    ["IFTRUE", "BOOL", "NUMNAME"],
-    ["ELSEIF", "BOOL", "NUMNAME"],
-    ["INTEGERFUNCTION", "NEWFUNC", "TYPE"],
-    ["STRINGFUNCTION", "NEWFUNC", "TYPE"],
-    ["LISTFUNCTION", "NEWFUNC", "TYPE"],
-    ["RETURN", "VALUE", "STATE"],
-    ["PRINTSTRING", "STR", "STATE"],
-    ["PRINTINTEGER", "INT", "STATE"],
-    ["SETINDEX", "LIST", "INT"],
-    ["TYPETOINT", "STR", "INT"],
-    ["GETSTRING", "LIST", "STR"],
-    ["GETINTEGER", "LIST", "INT"],
-    ["WRITEINTEGER", "LIST", "INT"],
-    ["WRITESTRING", "LIST", "STR"],
-    ["WRITEBOOL", "LIST", "BOOL"],
-    ["WRITELIST", "LIST", "LIST"],
-    ["GETLIST", "LIST", "LIST"],
-    ["GETBOOL", "LIST", "BOOL"],
-    ["GETTYPE", "LIST", "STR"],
-    ["LENGTH", "LIST", "INT"],
-    ["ADDVALUES", "INT", "INT"],
-    ["MULTIPLY", "INT", "INT"],
-    ["MATHPOW", "INT", "INT"],
-    ["DIVIDE", "INT", "INT"],
-    ["SIMPLEDIVIDE", "INT", "INT"],
-    ["SUBTRACT", "INT", "INT"],
-    ["MODULO", "INT", "INT"],
-    ["COMBINE", "STR", "STR"],
-    ["BLANKSPACES", "STR", "NUMNAME"],
-    ["ADDSIZE", "LIST", "INT"],
-    ["ASSSIGNINT", "INT", "INT"],
-    ["STRINGASSIGN", "STR", "STR"],
-    ["COPYLIST", "LIST", "LIST"],
-]
-listsimplefunc = [tosimple(i) for i in listfunctions]
-listofindents = []
-listezfunc = [i[0] for i in listfunctions]
-# Variable Names only
-listintegers = ["TEMPORARY", "LOCALINT", "LOOPINTEGER"]
-# Immediate Number Names (Index 10)
-listnumnames = []
-word_to_num = {}
-for i in range(101):
-    name = Number2Name.get_name(i)
-    listnumnames.append(name)
-    word_to_num[name] = str(i)
-liststrings = ["TEMPSTRING", "GLOBALSTR", "LOOPSTRING", "INTEGER", "STRING", "LIST", "BOOLEAN"]
-listlists = ["GLOBALLIST", "LOOPLIST"]
-listconds = ["THETRUTH"]
-listbools = ["LOOPBOOL"]
-liststate = ["STAY", "BREAK"]
-listype = ["INTEGER", "STRING", "LIST", "BOOLEAN"]
-lookuptype = {"INTEGER": "INT", "STRING": "STR", "LIST": "LIST", "BOOLEAN": "BOOL"}
-listtruth = ["TRUE", "FALSE"]
-listcompare = ["EQUALS", "BIGEQUALS", "BIGGER"]
-listtext = []  # Placeholder for raw text arguments (Index 11)
-listall = [
-    listintegers,
-    liststrings,
-    listlists,
-    listbools,
-    listconds,
-    liststate,
-    listype,
-    listezfunc,
-    listtruth,
-    listcompare,
-    listnumnames,  # Index 10: Immediates
-    listtext,  # Index 11: Text
 ]
-thetype = []
-insidefunction = False
-counter = 0
-def getsimples():
-    return listsimplefunc
-def sendlines(i):
-    global listofindents
-    listofindents = [0] * max(i + 1, 1000)
-def giveindents():
-    return listofindents
-def ocr_edit_distance(word1, word2):
-    """
-    Custom Levenshtein distance tailored for OCR.
-    Heavily penalizes distant letter swaps (H vs Q), but forgives common OCR shapes.
-    """
-    word1, word2 = word1.upper(), word2.upper()
-    # Common OCR confusions get a low penalty (0.5).
-    # Add more to this dictionary as you find specific model confusions!
-    low_cost_subs = {
-        ('O', '0'): 0.5, ('0', 'O'): 0.5,
-        ('I', '1'): 0.5, ('1', 'I'): 0.5,
-        ('I', 'L'): 0.5, ('L', 'I'): 0.5,
-        ('S', '5'): 0.5, ('5', 'S'): 0.5,
-        ('Z', '2'): 0.5, ('2', 'Z'): 0.5,
-        ('C', 'O'): 0.5, ('O', 'C'): 0.5,
-        ('C', 'G'): 0.5, ('G', 'C'): 0.5,
-        ('B', '8'): 0.5, ('8', 'B'): 0.5,
-        ('D', 'O'): 0.5, ('O', 'D'): 0.5,
-        ('E', 'F'): 0.5, ('F', 'E'): 0.5,
-        ('A', '4'): 0.5, ('4', 'A'): 0.5,
-    }
-    m, n = len(word1), len(word2)
-    dp = [[0.0] * (n + 1) for _ in range(m + 1)]
-    for i in range(m + 1):
-        dp[i][0] = i * 1.0  # Cost of deletion
-    for j in range(n + 1):
-        dp[0][j] = j * 1.0  # Cost of insertion
-    for i in range(1, m + 1):
-        for j in range(1, n + 1):
-            if word1[i-1] == word2[j-1]:
-                cost = 0.0
             else:
-                sub_pair = (word1[i-1], word2[j-1])
-                # Generic substitution is penalized heavily (2.0)
-                cost = low_cost_subs.get(sub_pair, 2.0)
-            dp[i][j] = min(
-                dp[i-1][j] + 1.0,      # deletion
-                dp[i][j-1] + 1.0,      # insertion
-                dp[i-1][j-1] + cost    # substitution
-            )
-    return dp[m][n]
-def findword(somelist, word, use_ocr_weights=False):
-    """
-    Find the closest match to `word` in `somelist`.
-    use_ocr_weights=True  → ocr_edit_distance (custom weighted, no cap)
-                            used for function/command name lookups where OCR
-                            can produce arbitrarily garbled prefixes/suffixes.
-    use_ocr_weights=False → standard edit_distance with a generous cap (32)
-                            used for argument vocab lookups (short words, small lists).
-    """
-    min_dist = 999
-    tobereturned = [word, 0]
-    lentobereturned = 16
-    for b in range(len(somelist)):
-        lenword = len(word)
-        i = somelist[b]
-        lenofi = len(i)
-        if i == word:
-            return [i, b]
         else:
             if use_ocr_weights:
-                distance = ocr_edit_distance(word, i)
             else:
-                distance = edit_distance(word, i, 32)
-            if distance < min_dist:
-                min_dist = distance
-                tobereturned = [i, b]
-                lentobereturned = len(tobereturned[0])
-            elif distance == min_dist:
-                if abs(lenword - lenofi) < abs(lenword - lentobereturned):
-                    tobereturned = [i, b]
-                    lentobereturned = len(tobereturned[0])
-    return tobereturned
-def handelfirstword(firstword):
-    func, index = findword(listezfunc, firstword, use_ocr_weights=True)
-    # Check if Arg2 (Index 2 in definition) is NUMNAME (Index 10 in listall)
-    # We use listfunctions directly to check the string type
-    if listfunctions[index][2] == "NUMNAME":
-        return (func, index, 1)
-    else:
-        return (func, index, 0)
-def toline(line, index, listofindents):
-    global counter
-    global thetype
-    global insidefunction
-    disthreeline = line.split(" ")
-    threeline = ["", "", ""]
-    problem = False
-    threeline[0] = listezfunc[index]
-    simpler = listsimplefunc[index]
-    if threeline[0] == "STRINGFUNCTION":
-        if insidefunction:
-            problem = True
-        else:
-            insidefunction = True
-            threeline[1] = disthreeline[1]
-            threeline[2] = findword(listype, disthreeline[2])[0]
-            thetype.append(threeline[2])
-            newsomething = [threeline[1], "STR", lookuptype[threeline[2]]]
-            listezfunc.append(newsomething[0])
-            listfunctions.append(newsomething)
-            simplerer = tosimple(newsomething)
-            listsimplefunc.append(simplerer)
-    elif threeline[0] == "INTEGERFUNCTION":
-        if insidefunction:
-            problem = True
-        else:
-            insidefunction = True
-            threeline[1] = disthreeline[1]
-            threeline[2] = findword(listype, disthreeline[2])[0]
-            thetype.append(threeline[2])
-            newsomething = [threeline[1], "INT", lookuptype[threeline[2]]]
-            listezfunc.append(newsomething[0])
-            listfunctions.append(newsomething)
-            simplerer = tosimple(newsomething)
-            listsimplefunc.append(simplerer)
-    elif threeline[0] == "LISTFUNCTION":
-        if insidefunction:
-            problem = True
-        else:
-            insidefunction = True
-            threeline[1] = disthreeline[1]
-            threeline[2] = findword(listype, disthreeline[2])[0]
-            thetype.append(threeline[2])
-            newsomething = [threeline[1], "LIST", lookuptype[threeline[2]]]
-            listezfunc.append(newsomething[0])
-            listfunctions.append(newsomething)
-            simplerer = tosimple(newsomething)
-            listsimplefunc.append(simplerer)
-    elif simpler[0] == "RETURN":
-        if len(thetype) == 0:
-            problem = True
-        else:
-            threeline[0] = "RETURN"
-            threeline[1] = findword(listall[listype.index(thetype[-1])], disthreeline[1])[0]
-            threeline[2] = findword(liststate, disthreeline[2])[0]
-            if threeline[2] == "BREAK":
-                insidefunction = False
-                thetype.pop()
-                listofindents[counter] = -1
-    else:
-        # Arg 1
-        if simpler[1] == 0:
-            listall[simpler[2]].append(disthreeline[1])
-            threeline[1] = disthreeline[1]
-        else:
-            threeline[1] = findword(listall[simpler[2]], disthreeline[1])[0]
-            # If Arg 1 is a NUMNAME, replace with actual value
-            if simpler[2] == 10:
-                threeline[1] = word_to_num[threeline[1]]
-        # Arg 2
-        if simpler[3] < len(listall):
-            threeline[2] = findword(listall[simpler[3]], disthreeline[2])[0]
-            # If Arg 2 is a NUMNAME, replace with actual value
-            if simpler[3] == 10:
-                threeline[2] = word_to_num[threeline[2]]
-        else:
-            threeline[2] = disthreeline[2]
-    # Use the now-numeric value in threeline[2] for indents
-    # Only actual control flow: WHILE, ITERATE, COMPARE, ELSECOMPARE, WHILETRUE, IFTRUE, ELSEIF
-    # NOT BASICCONDITION, CHANGECOMPARE (these aren't control flow, their arg2 isn't a line number)
-    control_flow = {"WHILE", "ITERATE", "COMPARE", "ELSECOMPARE", "WHILETRUE", "IFTRUE", "ELSEIF"}
-    if threeline[0] in control_flow:
-        listofindents[counter] = 1
-        listofindents[int(threeline[2])] = -1
-    elif threeline[0] == "DEFINE":
-        listfunctions.append(threeline[0])
-        listofindents[int(counter)] = 1
-    counter += 1
-    if len(threeline[1]) == 0 or len(threeline[2]) == 0:
-        problem = True
-    return threeline

+"""
+ErrorCorrection.py – Tzefa source-text parser and error-correcting compiler front-end.
+TzefaParser converts raw text lines (e.g. from OCR) into validated 4-word
+bytecode tuples consumed by topy.make_instruction().
+"""
+from __future__ import annotations
+from typing import Any, Dict, List, Tuple
 from language import Number2Name
+from language.dialects import (
+    THREE_WORD, CAPS_ONLY,
+    normalize_line, words_per_line, ALU_VERBS,
+)
+from language import topy
 from fast_edit_distance import edit_distance
+# ---------------------------------------------------------------------------
+# Instruction definitions — now in 4-word form
+# ---------------------------------------------------------------------------
+# Each entry: [VERB, TYPE, ARG1_KIND, ARG2_KIND]
+#
+# ARG_KIND values:
+#   "NEWINT"    – declares a new integer name
+#   "NEWSTR"    – declares a new string name
+#   "NEWBOOL"   – declares a new boolean name
+#   "NEWLIST"   – declares a new list name
+#   "NEWCOND"   – declares a new condition name
+#   "NEWFUNC"   – declares a new function name
+#   "INT"       – existing integer var
+#   "STR"       – existing string var
+#   "LIST"      – existing list var
+#   "BOOL"      – existing boolean var
+#   "COND"      – existing condition
+#   "STATE"     – STAY / BREAK
+#   "TYPE"      – INTEGER / STRING / LIST / BOOLEAN
+#   "TRUTH"     – TRUE / FALSE
+#   "COMPARE"   – EQUALS / BIGEQUALS / BIGGER
+#   "NUMNAME"   – numeric name (ZERO … ONEHUNDRED)
+#   "TEXT"      – free text (no correction)
+#   "VALUE"     – context-dependent (return var, resolved at parse time)
+_BUILTIN_INSTRUCTIONS: List[List[str]] = [
+    # Variable declarations
+    ["MAKE",      "INTEGER",   "NEWINT",   "NUMNAME"],
+    ["MAKE",      "BOOLEAN",   "NEWBOOL",  "TRUTH"],
+    ["MAKE",      "STRING",    "NEWSTR",   "TEXT"],
+    ["MAKE",      "LIST",      "NEWLIST",  "NUMNAME"],
+    ["MAKE",      "CONDITION", "NEWCOND",  "COMPARE"],
+    # Condition manipulation
+    ["SET",       "LEFT",      "COND",     "INT"],
+    ["SET",       "RIGHT",     "COND",     "INT"],
+    ["CHANGE",    "COMPARE",   "COND",     "COMPARE"],
+    # Control flow
+    ["WHILE",     "CONDITION", "COND",     "NUMNAME"],
+    ["IF",        "CONDITION", "COND",     "NUMNAME"],
+    ["ELIF",      "CONDITION", "COND",     "NUMNAME"],
+    ["ITERATE",   "LIST",      "LIST",     "NUMNAME"],
+    ["WHILE",     "BOOLEAN",   "BOOL",     "NUMNAME"],
+    ["IF",        "BOOLEAN",   "BOOL",     "NUMNAME"],
+    ["ELIF",      "BOOLEAN",   "BOOL",     "NUMNAME"],
+    # Function definition
+    ["FUNCTION",  "INTEGER",   "NEWFUNC",  "TYPE"],
+    ["FUNCTION",  "STRING",    "NEWFUNC",  "TYPE"],
+    ["FUNCTION",  "LIST",      "NEWFUNC",  "TYPE"],
+    # Return
+    ["RETURN",    "VALUE",     "VALUE",    "STATE"],
+    # Print
+    ["PRINT",     "STRING",    "STR",      "STATE"],
+    ["PRINT",     "INTEGER",   "INT",      "STATE"],
+    # Assignment / copy
+    ["SET",       "INTEGER",   "INT",      "INT"],
+    ["SET",       "STRING",    "STR",      "STR"],
+    ["SET",       "LIST",      "LIST",     "LIST"],
+    ["SET",       "INDEX",     "LIST",     "NUMNAME"],
+    # Type introspection
+    ["TYPE",      "TOINT",     "STR",      "INT"],
+    # List read
+    ["GET",       "STRING",    "LIST",     "STR"],
+    ["GET",       "INTEGER",   "LIST",     "INT"],
+    ["GET",       "BOOLEAN",   "LIST",     "BOOL"],
+    ["GET",       "LIST",      "LIST",     "LIST"],
+    ["GET",       "TYPE",      "LIST",     "STR"],
+    ["GET",       "LENGTH",    "LIST",     "INT"],
+    # List write
+    ["WRITE",     "INTEGER",   "LIST",     "INT"],
+    ["WRITE",     "STRING",    "LIST",     "STR"],
+    ["WRITE",     "BOOLEAN",   "LIST",     "BOOL"],
+    ["WRITE",     "LIST",      "LIST",     "LIST"],
+    # Arithmetic — layout: [VERB, DEST, SRC1, SRC2]
+    # arg1_kind=INT is the dest (existing or new), arg2/3 are sources
+    ["ADD",       "INT",       "INT",      "INT"],
+    ["MULTIPLY",  "INT",       "INT",      "INT"],
+    ["POWER",     "INT",       "INT",      "INT"],
+    ["DIVIDE",    "INT",       "INT",      "INT"],
+    ["SIMPLEDIVIDE","INT",     "INT",      "INT"],
+    ["SUBTRACT",  "INT",       "INT",      "INT"],
+    ["MODULO",    "INT",       "INT",      "INT"],
+    # String ops — COMBINE layout: [COMBINE, DEST, SRC1, SRC2]
+    ["COMBINE",   "STR",       "STR",      "STR"],
+    ["PAD",       "STRING",    "STR",      "NUMNAME"],
+    # List resize — ADD SIZE layout: [ADD, SIZE, listname, int_amount]
+    ["ADD",       "SIZE",      "LIST",     "INT"],
 ]
+# Which kinds declare new names (start with "NEW")
+_NEW_KINDS = {"NEWINT", "NEWSTR", "NEWBOOL", "NEWLIST", "NEWCOND", "NEWFUNC"}
+# Kind → bucket index in the all_names list
+_KIND_TO_BUCKET: Dict[str, int] = {
+    "INT": 0, "NEWINT": 0,
+    "STR": 1, "NEWSTR": 1,
+    "LIST": 2, "NEWLIST": 2,
+    "BOOL": 3, "NEWBOOL": 3,
+    "COND": 4, "NEWCOND": 4,
+    "STATE": 5,
+    "TYPE": 6,
+    "NEWFUNC": 7,
+    "TRUTH": 8,
+    "COMPARE": 9,
+    "NUMNAME": 10,
+    "TEXT": 11,
+    "VALUE": -1,  # resolved dynamically
+}
+# The lookup type for function return types
+_FUNC_TYPE_MAP: Dict[str, str] = {
+    "INTEGER": "INT", "STRING": "STR", "LIST": "LIST", "BOOLEAN": "BOOL",
+}
+class TzefaParser:
+    """Parse and error-correct Tzefa source lines into 4-word bytecode."""
+    def __init__(
+        self,
+        dialect: str = THREE_WORD,
+        casing: str = CAPS_ONLY,
+    ) -> None:
+        self.dialect = dialect
+        self.casing = casing
+        # Build instruction table from the static definitions
+        self.instructions: List[List[str]] = [row[:] for row in _BUILTIN_INSTRUCTIONS]
+        # Opcode keys: (VERB, TYPE) tuples for lookup
+        self.opcode_keys: List[Tuple[str, str]] = [(r[0], r[1]) for r in self.instructions]
+        # Name buckets for fuzzy-matching (index-aligned with _KIND_TO_BUCKET)
+        self.all_names: List[List[str]] = [
+            # 0: INT names
+            ["TEMPORARY", "LOCALINT", "LOOPINTEGER"],
+            # 1: STR names
+            ["TEMPSTRING", "GLOBALSTR", "LOOPSTRING",
+             "INTEGER", "STRING", "LIST", "BOOLEAN"],
+            # 2: LIST names
+            ["GLOBALLIST", "LOOPLIST"],
+            # 3: BOOL names
+            ["LOOPBOOL"],
+            # 4: COND names
+            ["THETRUTH"],
+            # 5: STATE
+            ["STAY", "BREAK"],
+            # 6: TYPE
+            ["INTEGER", "STRING", "LIST", "BOOLEAN"],
+            # 7: opcode verbs (populated below)
+            [],
+            # 8: TRUTH
+            ["TRUE", "FALSE"],
+            # 9: COMPARE
+            ["EQUALS", "BIGEQUALS", "BIGGER"],
+            # 10: NUMNAME
+            [],
+            # 11: TEXT (free, no correction)
+            [],
+        ]
+        # Populate bucket 7 (opcode verbs) from instruction table
+        seen_verbs: set = set()
+        for row in self.instructions:
+            key = (row[0], row[1])
+            label = f"{row[0]}_{row[1]}"
+            if label not in seen_verbs:
+                seen_verbs.add(label)
+                self.all_names[7].append(label)
+        # Numeric name immediates
+        self.word_to_num: Dict[str, str] = {}
+        for i in range(101):
+            name = Number2Name.get_name(i)
+            self.all_names[10].append(name)
+            self.word_to_num[name] = str(i)
+        # Build verb→[valid types] lookup for sequential word matching
+        self._verb_to_types: Dict[str, List[str]] = {}
+        for row in self.instructions:
+            v, t = row[0], row[1]
+            if v not in self._verb_to_types:
+                self._verb_to_types[v] = []
+            if t not in self._verb_to_types[v]:
+                self._verb_to_types[v].append(t)
+        # Deduplicated verb list (order preserved, for fuzzy matching)
+        # Always include CALL even before functions are registered
+        self._verb_list: List[str] = ["CALL"]
+        for row in self.instructions:
+            if row[0] not in self._verb_list:
+                self._verb_list.append(row[0])
+        # Indent tracking
+        self.indent_table: List[int] = []
+        # Function definition state
+        self.function_type_stack: List[str] = []
+        self.inside_function: bool = False
+        self.line_counter: int = 0
+    # ------------------------------------------------------------------
+    # Public interface
+    # ------------------------------------------------------------------
+    @property
+    def expected_words_per_line(self) -> int:
+        return words_per_line(self.dialect)
+    def normalize_source_line(self, raw_tokens: List[str]) -> List[str]:
+        """Normalize raw tokens into a canonical 4-word CAPS tuple."""
+        return normalize_line(raw_tokens, self.dialect, self.casing)
+    def init_indent_table(self, line_count: int) -> None:
+        """Allocate the indent-change table for *line_count* lines."""
+        self.indent_table = [0] * max(line_count + 2, 1002)
+    def get_indent_table(self) -> List[int]:
+        return self.indent_table
+    def match_opcode(self, verb: str, type_word: str) -> Tuple[int, List[str]]:
+        """Exact lookup of (verb, type_word) → instruction row."""
+        key = (verb, type_word)
+        for i, k in enumerate(self.opcode_keys):
+            if k == key:
+                return i, self.instructions[i]
+        return 0, self.instructions[0]
+    def parse_line(self, quad: List[str]) -> List[str]:
+        """
+        Sequential error-correction:
+          W1 → fuzzy match against verb list
+          W2 → fuzzy match against valid types for that verb
+               (ALU: dest var auto-registered; CALL: known function names)
+          W3,W4 → resolved by the spec (arg1_kind, arg2_kind)
+        """
+        while len(quad) < 4:
+            quad.append("")
+        # ── W1: verb ─────────────────────────────────────────────────────────
+        verb = self.find_word(self._verb_list, quad[0], use_ocr_weights=True)[0]
+        # ── ALU fast path (W2 = dest var, W3/W4 = sources) ──────────────────
+        if verb in ALU_VERBS:
+            # ADD SIZE is the non-ALU outlier — treat normally
+            if verb == "ADD":
+                size_types = self._verb_to_types.get("ADD", [])
+                w2 = self.find_word(size_types, quad[1], use_ocr_weights=True)[0]
+                if w2 == "SIZE":
+                    # fall through to standard path
+                    type_word = "SIZE"
+                    verb = "ADD"
+                    _, spec = self.match_opcode(verb, type_word)
+                    result = [verb, type_word,
+                              self._resolve_arg(spec[2], quad[2]),
+                              self._resolve_arg(spec[3], quad[3])]
+                    self.line_counter += 1
+                    return result
+            if verb == "COMBINE":
+                dest = self._resolve_arg("STR", quad[1])
+                src1 = self._resolve_arg("STR", quad[2])
+                src2 = self._resolve_arg("STR", quad[3])
+            else:
+                dest = self._resolve_arg("INT", quad[1])
+                src1 = self._resolve_arg("INT", quad[2])
+                src2 = self._resolve_arg("INT", quad[3])
+            self.line_counter += 1
+            return [verb, dest, src1, src2]
+        # ── CALL (W2 = function name, W3 = input var, W4 = output var) ───────
+        if verb == "CALL":
+            known_funcs = [k[1] for k in self.opcode_keys if k[0] == "CALL"]
+            func_name = self.find_word(known_funcs, quad[1], use_ocr_weights=True)[0] if known_funcs else quad[1]
+            func_spec = next((r for r in self.instructions if r[0] == "CALL" and r[1] == func_name), None)
+            arg1 = self._resolve_arg(func_spec[2] if func_spec else "INT", quad[2])
+            arg2 = self._resolve_arg("INT", quad[3])
+            self.line_counter += 1
+            return ["CALL", func_name, arg1, arg2]
+        # ── W2: type keyword for this verb ───────────────────────────────────
+        valid_types = self._verb_to_types.get(verb, [])
+        type_word = self.find_word(valid_types, quad[1], use_ocr_weights=True)[0] if valid_types else quad[1]
+        # ── Look up full spec ─────────────────────────────────────────────────
+        _, spec = self.match_opcode(verb, type_word)
+        arg1_kind, arg2_kind = spec[2], spec[3]
+        result = [verb, type_word, "", ""]
+        # ── FUNCTION ─────────────────────────────────────────────────────────
+        if verb == "FUNCTION":
+            if not self.inside_function:
+                self.inside_function = True
+                func_name = quad[2]   # new name, register as-is
+                param_type = self.find_word(self.all_names[6], quad[3], use_ocr_weights=True)[0]
+                result[2] = func_name
+                result[3] = param_type
+                self.function_type_stack.append(type_word)
+                topy.register_user_function(
+                    func_name,
+                    _FUNC_TYPE_MAP.get(type_word, "INT"),
+                    _FUNC_TYPE_MAP.get(param_type, "INT"),
+                )
+                self.opcode_keys.append(("CALL", func_name))
+                self.instructions.append(["CALL", func_name, "INT", "INT"])
+                if "CALL" not in self._verb_to_types:
+                    self._verb_to_types["CALL"] = []
+                if func_name not in self._verb_to_types["CALL"]:
+                    self._verb_to_types["CALL"].append(func_name)
+                label = f"CALL_{func_name}"
+                if label not in self.all_names[7]:
+                    self.all_names[7].append(label)
+        # ── RETURN ────────────────────────────────────────────────────────────
+        elif verb == "RETURN":
+            if self.function_type_stack:
+                ret_kind = _FUNC_TYPE_MAP.get(self.function_type_stack[-1], "INT")
+                bucket = _KIND_TO_BUCKET.get(ret_kind, 0)
+                result[2] = self.find_word(self.all_names[bucket], quad[2], use_ocr_weights=True)[0]
             else:
+                result[2] = quad[2]
+            result[3] = self.find_word(self.all_names[5], quad[3], use_ocr_weights=True)[0]
+            if result[3] == "BREAK" and self.function_type_stack:
+                self.inside_function = False
+                self.function_type_stack.pop()
+                self.indent_table[self.line_counter] = -1
+        # ── Everything else ───────────────────────────────────────────────────
         else:
+            result[2] = self._resolve_arg(arg1_kind, quad[2])
+            result[3] = self._resolve_arg(arg2_kind, quad[3])
+        # Control-flow indent tracking
+        if verb in {"WHILE", "IF", "ELIF", "ITERATE"}:
+            self.indent_table[self.line_counter] = 1
+            try:
+                self.indent_table[int(result[3])] = -1
+            except (ValueError, IndexError):
+                pass
+        self.line_counter += 1
+        return result
+    # ------------------------------------------------------------------
+    # Argument resolution
+    # ------------------------------------------------------------------
+    def _resolve_arg(self, kind: str, raw: str) -> str:
+        """Resolve a single argument against its kind's name bucket via fuzzy-match."""
+        if not kind or kind == "VALUE":
+            return raw
+        bucket_idx = _KIND_TO_BUCKET.get(kind, -1)
+        if bucket_idx < 0 or bucket_idx >= len(self.all_names):
+            return raw
+        # New-name kinds: register as-is, no correction
+        if kind in _NEW_KINDS:
+            if raw and raw not in self.all_names[bucket_idx]:
+                self.all_names[bucket_idx].append(raw)
+            return raw
+        # NUMNAME: digit strings pass through, words get fuzzy-matched then converted
+        if kind == "NUMNAME" and raw.isdigit():
+            return raw
+        # Fuzzy-match against the bucket — always, no exceptions
+        matched, _ = self.find_word(self.all_names[bucket_idx], raw, use_ocr_weights=True)
+        if kind == "NUMNAME":
+            matched = self.word_to_num.get(matched, matched)
+        return matched
+    # ------------------------------------------------------------------
+    # Edit distance helpers
+    # ------------------------------------------------------------------
+    @staticmethod
+    def ocr_edit_distance(word1: str, word2: str) -> float:
+        """Levenshtein distance with reduced cost for common OCR confusions."""
+        word1, word2 = word1.upper(), word2.upper()
+        _LOW_COST: Dict[Tuple[str, str], float] = {
+            ('O', '0'): 0.5, ('0', 'O'): 0.5,
+            ('I', '1'): 0.5, ('1', 'I'): 0.5,
+            ('I', 'L'): 0.5, ('L', 'I'): 0.5,
+            ('S', '5'): 0.5, ('5', 'S'): 0.5,
+            ('Z', '2'): 0.5, ('2', 'Z'): 0.5,
+            ('C', 'O'): 0.5, ('O', 'C'): 0.5,
+            ('C', 'G'): 0.5, ('G', 'C'): 0.5,
+            ('B', '8'): 0.5, ('8', 'B'): 0.5,
+            ('D', 'O'): 0.5, ('O', 'D'): 0.5,
+            ('E', 'F'): 0.5, ('F', 'E'): 0.5,
+            ('A', '4'): 0.5, ('4', 'A'): 0.5,
+        }
+        m, n = len(word1), len(word2)
+        dp = [[0.0] * (n + 1) for _ in range(m + 1)]
+        for i in range(m + 1):
+            dp[i][0] = float(i)
+        for j in range(n + 1):
+            dp[0][j] = float(j)
+        for i in range(1, m + 1):
+            for j in range(1, n + 1):
+                if word1[i - 1] == word2[j - 1]:
+                    cost = 0.0
+                else:
+                    cost = _LOW_COST.get((word1[i - 1], word2[j - 1]), 2.0)
+                dp[i][j] = min(
+                    dp[i - 1][j] + 1.0,
+                    dp[i][j - 1] + 1.0,
+                    dp[i - 1][j - 1] + cost,
+                )
+        return dp[m][n]
+    @staticmethod
+    def find_word(
+        name_list: List[str],
+        word: str,
+        use_ocr_weights: bool = False,
+    ) -> Tuple[str, int]:
+        """Return the closest match to *word* in *name_list* and its index."""
+        if not name_list:
+            return word, 0
+        min_dist = 999.0
+        best: List[Any] = [word, 0]
+        best_len = 16
+        word_len = len(word)
+        for idx, item in enumerate(name_list):
+            if item == word:
+                return item, idx
             if use_ocr_weights:
+                dist = TzefaParser.ocr_edit_distance(word, item)
             else:
+                dist = float(edit_distance(word, item, 32))
+            item_len = len(item)
+            if dist < min_dist:
+                min_dist = dist
+                best = [item, idx]
+                best_len = item_len
+            elif dist == min_dist:
+                if abs(word_len - item_len) < abs(word_len - best_len):
+                    best = [item, idx]
+                    best_len = item_len
+        return tuple(best)

language/Number2Name.py CHANGED Viewed

@@ -1,15 +1,24 @@
-def get_name(number):
-    namearray = ['ZERO', 'ONE', 'TWO', 'THREE', 'FOUR', 'FIVE', 'SIX', 'SEVEN', 'EIGHT', 'NINE', 'TEN', 'ELEVEN',
-                 'TWELVE', 'THIRTEEN', 'FOURTEEN', 'FIFTEEN', 'SIXTEEN', 'SEVENTEEN', 'EIGHTEEN', 'NINETEEN', 'TWENTY',
-                 'TWENTYONE', 'TWENTYTWO', 'TWENTYTHREE', 'TWENTYFOUR', 'TWENTYFIVE', 'TWENTYSIX', 'TWENTYSEVEN',
-                 'TWENTYEIGHT', 'TWENTYNINE', 'THIRTY', 'THIRTYONE', 'THIRTYTWO', 'THIRTYTHREE', 'THIRTYFOUR',
-                 'THIRTYFIVE', 'THIRTYSIX', 'THIRTYSEVEN', 'THIRTYEIGHT', 'THIRTYNINE', 'FORTY', 'FORTYONE', 'FORTYTWO',
-                 'FORTYTHREE', 'FORTYFOUR', 'FORTYFIVE', 'FORTYSIX', 'FORTYSEVEN', 'FORTYEIGHT', 'FORTYNINE', 'FIFTY',
-                 'FIFTYONE', 'FIFTYTWO', 'FIFTYTHREE', 'FIFTYFOUR', 'FIFTYFIVE', 'FIFTYSIX', 'FIFTYSEVEN', 'FIFTYEIGHT',
-                 'FIFTYNINE', 'SIXTY', 'SIXTYONE', 'SIXTYTWO', 'SIXTYTHREE', 'SIXTYFOUR', 'SIXTYFIVE', 'SIXTYSIX',
-                 'SIXTYSEVEN', 'SIXTYEIGHT', 'SIXTYNINE', 'SEVENTY', 'SEVENTYONE', 'SEVENTYTWO', 'SEVENTYTHREE',
-                 'SEVENTYFOUR', 'SEVENTYFIVE', 'SEVENTYSIX', 'SEVENTYSEVEN', 'SEVENTYEIGHT', 'SEVENTYNINE', 'EIGHTY',
-                 'EIGHTYONE', 'EIGHTYTWO', 'EIGHTYTHREE', 'EIGHTYFOUR', 'EIGHTYFIVE', 'EIGHTYSIX', 'EIGHTYSEVEN',
-                 'EIGHTYEIGHT', 'EIGHTYNINE', 'NINETY', 'NINETYONE', 'NINETYTWO', 'NINETYTHREE', 'NINETYFOUR',
-                 'NINETYFIVE', 'NINETYSIX', 'NINETYSEVEN', 'NINETYEIGHT', 'NINETYNINE', 'ONEHUNDRED']
-    return namearray[number]

+# python
+from typing import Tuple
+NUMBER_NAMES: Tuple[str, ...] = (
+    'ZERO', 'ONE', 'TWO', 'THREE', 'FOUR', 'FIVE', 'SIX', 'SEVEN', 'EIGHT', 'NINE', 'TEN', 'ELEVEN',
+    'TWELVE', 'THIRTEEN', 'FOURTEEN', 'FIFTEEN', 'SIXTEEN', 'SEVENTEEN', 'EIGHTEEN', 'NINETEEN', 'TWENTY',
+    'TWENTYONE', 'TWENTYTWO', 'TWENTYTHREE', 'TWENTYFOUR', 'TWENTYFIVE', 'TWENTYSIX', 'TWENTYSEVEN',
+    'TWENTYEIGHT', 'TWENTYNINE', 'THIRTY', 'THIRTYONE', 'THIRTYTWO', 'THIRTYTHREE', 'THIRTYFOUR',
+    'THIRTYFIVE', 'THIRTYSIX', 'THIRTYSEVEN', 'THIRTYEIGHT', 'THIRTYNINE', 'FORTY', 'FORTYONE', 'FORTYTWO',
+    'FORTYTHREE', 'FORTYFOUR', 'FORTYFIVE', 'FORTYSIX', 'FORTYSEVEN', 'FORTYEIGHT', 'FORTYNINE', 'FIFTY',
+    'FIFTYONE', 'FIFTYTWO', 'FIFTYTHREE', 'FIFTYFOUR', 'FIFTYFIVE', 'FIFTYSIX', 'FIFTYSEVEN', 'FIFTYEIGHT',
+    'FIFTYNINE', 'SIXTY', 'SIXTYONE', 'SIXTYTWO', 'SIXTYTHREE', 'SIXTYFOUR', 'SIXTYFIVE', 'SIXTYSIX',
+    'SIXTYSEVEN', 'SIXTYEIGHT', 'SIXTYNINE', 'SEVENTY', 'SEVENTYONE', 'SEVENTYTWO', 'SEVENTYTHREE',
+    'SEVENTYFOUR', 'SEVENTYFIVE', 'SEVENTYSIX', 'SEVENTYSEVEN', 'SEVENTYEIGHT', 'SEVENTYNINE', 'EIGHTY',
+    'EIGHTYONE', 'EIGHTYTWO', 'EIGHTYTHREE', 'EIGHTYFOUR', 'EIGHTYFIVE', 'EIGHTYSIX', 'EIGHTYSEVEN',
+    'EIGHTYEIGHT', 'EIGHTYNINE', 'NINETY', 'NINETYONE', 'NINETYTWO', 'NINETYTHREE', 'NINETYFOUR',
+    'NINETYFIVE', 'NINETYSIX', 'NINETYSEVEN', 'NINETYEIGHT', 'NINETYNINE', 'ONEHUNDRED'
+)
+def get_name(number: int) -> str:
+    """Convert an integer (0-100) to its English uppercase name."""
+    if not 0 <= number < len(NUMBER_NAMES):
+        raise ValueError(f'number out of range: {number}')
+    return NUMBER_NAMES[number]

language/__pycache__/topy.cpython-313.pyc ADDED Viewed

Binary file (22.1 kB). View file

language/createdpython.py CHANGED Viewed

@@ -1,760 +1,985 @@
-### finally found the bug
-### here it is:
-### lets say function f calls a new instance of itself
-### the new instance creates cond b and a new instance of itself
-### the new instance closes and returns to the instance with  cond b that also closes itself
-### now the original instance still owns cond b since no function was called to clean the stack or dict between the exiting calls
-dicte = {"EQUALS": 0, "BIGEQUALS": 1, "BIGGER": 2}
-listfunctions = [(lambda x, y: x == y), (lambda x, y: x >= y), (lambda x, y: x > y)]
 import sys
-import language.Number2Name as Number2Name
-linecount = 0
-currentline = 0
-linelimit = 1000
-functionlimit = 25
-functioncount = 0
-printed = ""
-alltheconds = {}
-reserveconds = {}
-reserveloc = {}
-reserveglob = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
-def line(linenum):
-    global currentline
-    currentline = linenum
-    # if("LISTOFTWO" in allthevars["LIST"]):
-    # print(linenum,getvar("LIST","LISTOFTWO").values)
     return True
 class Node:
-    def __init__(self, value):
         self.value = value
-        self.next = None
-    def setnext(self, nextvalue):
-        self.next = Node(value=nextvalue)
-    def setnextnode(self, next):
-        self.next = next
-    def getnext(self):
         return self.next
-    def getvalue(self):
         return self.value
 class Stack:
     def __init__(self):
-        self.top = None
-        self.list = []
-    def isempty(self):
         return len(self.list) == 0
-    def push(self, value):
         self.list.append(value)
-    def pop(self):
         return self.list.pop()
-functioncalls = Stack()
-def addcond(name, compare):
-    global alltheconds
-    if (name in alltheconds):
-        errore.varexistserror(name)
-    else:
-        alltheconds[name] = COND(compare)
-def addlocalcond(name, compare):
-    global alltheconds, allthelocalconds
-    if (name in alltheconds):
-        errore.varexistserror(name)
-    else:
-        alltheconds[name] = COND(compare)
-        allthelocalconds[name] = alltheconds[name]
-    return allthelocalconds
-# import dis
-# print(dis.dis(addlocalcond))
-def movetonewconds(localconds):
-    global stackoflocalconds
-    global alltheconds
-    global reserveconds
-    for i in localconds:
-        del alltheconds[i]
-    stackoflocalconds.push(localconds)
-def backtooldconds():
-    global stackoflocalconds
-    popped = stackoflocalconds.pop()
-    global alltheconds
-    global reserveconds
-    for i in reserveconds:
-        if i in alltheconds:
-            del alltheconds[i]
-    reserveconds = {}
-    for i in popped:
-        alltheconds[i] = popped[i]
-        reserveconds[i] = popped[i]
-    return popped
-def getcond(name):
-    global alltheconds
-    if (name in alltheconds):
-        return alltheconds[name]
-    else:
-        errore.doesntexisterror(name)
-def printvars():
-    global allthevars, printed
-    print("END OF PROGRAM")
-    print()
-    for i in allthevars:
-        print("All the vars used from type " + i)
-        for j in allthevars[i]:
-            if (allthevars[i][j].iswritable()):
-                print(j + " : " + allthevars[i][j].tostring())
-            else:
-                pass
-        print("")
-    print("All that was printed during the program")
-    print(printed)
-def addvar(type, name, value):
-    global allthevars
-    if name in allthevars[type]:
-        errore.varexistserror(name)
-    if (type == "LIST"):
-        allthevars[type][name] = LIST(name, value, True, True, type)
-    else:
-        allthevars[type][name] = VALUE(name, value, True, True, type)
-def getvar(type, name):
-    global allthevars
-    if (name in allthevars[type]):
-        return allthevars[type][name]
-    else:
-        errore.doesntexisterror(name)
-def addlocalvar(type, name, value):
-    global dictlocalvars, allthevars
-    allthelocalrvars = dictlocalvars
-    if name in allthevars[type]:
-        errore.varexistserror(name)
-    if (type == "LIST"):
-        allthevars[type][name] = LIST(name, value, True, True, type)
-        allthelocalrvars[type][name] = allthevars[type][name]
-    else:
-        allthevars[type][name] = VALUE(name, value, True, True, type)
-        allthelocalrvars[type][name] = allthevars[type][name]
-def switchtonewcall(dict1: dict, dict2: dict, stack: Stack):
-    for i in dict2:
-        for j in dict2[i]:
-            del dict1[i][j]
-    stack.push(dict2)
-    return {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
-def returntoolastcall(dict1: dict):
-    stack = localsvars
-    lastcall = stack.pop()
-    global reserveloc
-    for i in reserveloc:
-        for j in reserveloc[i]:
-            if j in dict1[i]:
-                del dict1[i][j]
-    reserveloc = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
-    for i in lastcall:
-        for j in lastcall[i]:
-            dict1[i][j] = lastcall[i][j]
-            reserveloc[i][j] = lastcall[i][j]
-    return lastcall
-def Print(var, newline):
-    global printed
-    toprint = var.tostring() + newline * '\n' + ' ' * (1 - newline)
-    print(toprint, end='')
-    printed = printed + toprint
-class VALUE:
-    def __init__(self, name, value, readable, writable, TYPE):
         self.name = name
         self.value = value
         self.readable = readable
         self.writable = writable
-        self.type = TYPE
-    def write(self, value):
-        if (self.writable == True):
             self.value = value
         else:
-            errore.writeerror(self.name, value)
-    def forcewrite(self, value):
         self.value = value
-    def read(self):
-        if (self.readable == True):
             return self.value
         else:
-            errore.readerror(self.name)
-    def forceread(self):
         return self.value
-    def changeread(self, readstatus):
-        self.readable = readstatus
-    def changewrite(self, writestatus):
-        self.writable = writestatus
-    def getname(self):
         return self.name
-    def iswritable(self):
         return self.writable
-    def isreadable(self):
         return self.readable
-    def tostring(self):
         return str(self.value)
-    def givetype(self):
         return self.type
-    def override(self, value):
         self.value = value
-    def makecopy(self):
-        return VALUE(self.name, self.value, self.readable, True, self.type)
-    def copyvar(self, vari):
-        if (self.type != vari.type):
-            errore.typeerror(self.name, self.type, vari.type)
         else:
-            if (vari.isreadable() == False):
-                errore.readerror(vari.getname())
             else:
-                self.value = vari.value
-def add(Vali1, vali2):
-    getvar("INT", "TEMPORARY").forcewrite(getvar("INT", Vali1).read() + getvar("INT", vali2).read())
-def dec(Vali1, vali2):
-    getvar("INT", "TEMPORARY").forcewrite(getvar("INT", Vali1).read() - getvar("INT", vali2).read())
-def mult(Vali1, vali2):
-    getvar("INT", "TEMPORARY").forcewrite(getvar("INT", Vali1).read() * getvar("INT", vali2).read())
-def div(Vali1, vali2):
-    if (getvar("INT", vali2).read() == 0):
-        errore.DIVZEROERROR(vali2)
-    getvar("INT", "TEMPORARY").forcewrite(getvar("INT", Vali1).read() // getvar("INT", vali2).read())
-def betterdiv(Vali1, vali2):
-    if (getvar("INT", vali2).read() == 0):
-        errore.DIVZEROERROR(vali2)
-    getvar("INT", "TEMPORARY").forcewrite(getvar("INT", Vali1).read() / getvar("INT", vali2).read())
-def pow(Vali1, vali2):
-    getvar("INT", "TEMPORARY").forcewrite(getvar("INT", Vali1).read() ** getvar("INT", vali2).read())
-def mod(Vali1, vali2):
-    if (getvar("INT", vali2).read() == 0):
-        errore.DIVZEROERROR(vali2)
-    getvar("INT", "TEMPORARY").forcewrite(int(getvar("INT", Vali1).read() % getvar("INT", vali2).read()))
-def comb(Vali1, vali2):
-    getvar("STR", "TEMPSTRING").forcewrite(getvar("STR", Vali1).read() + getvar("STR", vali2).read())
-def addsize(Vali1, vali2):
-    getvar("LIST", Vali1).addsize(getvar("INT", vali2).read())
-def assignlist(Vali1, vali2):
-    getvar("LIST", Vali1).copyvar(getvar("LIST", vali2))
-def assignstr(Vali1, vali2):
-    getvar("STR", Vali1).copyvar(getvar("STR", vali2))
-def assignint(Vali1, vali2):
-    getvar("INT", Vali1).copyvar(getvar("INT", vali2))
-def blankspaces(Vali1, Vali2):
-    getvar("STR", Vali1).write(getvar("STR", Vali1).read() + ' ' * Vali2)
-def typetoint(vali1, vali2):
-    lookuptable = {"INT": 0, "STR": 1, "BOOLEAN": 2, "LIST": 3}
-    if (getvar('STR', vali1).read() in lookuptable):
-        getvar('INT', vali2).write(lookuptable[getvar('STR', vali1).read()])
-    else:
-        errore.typetointerror(getvar('STR', vali1).read())
-class COND:
-    def __init__(self, compare):
-        self.index = dicte[compare]
-        self.left = VALUE("0", 0, False, False, "INT")
-        self.right = VALUE("0", 0, False, False, "INT")
-    def changecompare(self, compare):
-        self.index = dicte[compare]
-    def changeleft(self, left):
         self.left = left
-    def changeright(self, right):
         self.right = right
-    def giveresult(self):
-        return listfunctions[self.index](self.left.read(), self.right.read())
-    def givetype(self):
         return self.type
-class EERROR(Exception):
-    global sizeoflistinuse, wantedindex
-    def __init__(self):
-        pass
-    def nameerror(self, type, name):
-        global currentline
-        globalline = currentline
-        print("Error Line: " + str(globalline))
-        print("Var of name " + name + " doesn't exist as type " + type)
-        print(" ")
-        printvars()
-        sys.exit(0)
-    def makeeindexrror(self, sizeoflistinuse, wantedindex, name, name2):
-        global currentline
-        globalline = currentline
-        print("Error Line: " + str(globalline))
-        print("Tried to change index of list " + name + " with size of " + str(sizeoflistinuse) + " to value " + str(
-            wantedindex) + " placed in " + str(name2))
-        print(" ")
-        printvars()
-        sys.exit(0)
-    def DIVZEROERROR(self, name):
-        global currentline
-        globalline = currentline
-        print("Error Line: " + str(globalline))
-        print("Cant divide by zero and var " + name + " has value of zero")
-        print(" ")
-        printvars()
-        sys.exit(0)
-    def doesntexisterror(self, name):
-        global currentline
-        globalline = currentline
-        print("Error Line: " + str(globalline))
-        print("No object with name " + str(name) + " exists")
-        printvars()
-        sys.exit(0)
-    def writeerror(self, name, value):
-        global currentline
-        globalline = currentline
-        print("Error Line: " + str(globalline))
-        print("Tried to write value of " + str(value) + " to unwritable variable " + name)
-        print(" ")
-        printvars()
-        sys.exit(0)
-    def typetointerror(self, value):
-        global currentline
-        globalline = currentline
-        print("Error Line: " + str(globalline))
-        print("No type such as " + str(value))
-        print(" ")
-        printvars()
-        sys.exit(0)
-    def readerror(self, name):
-        global currentline
-        globalline = currentline
-        print("Error Line: " + str(globalline))
-        print("Tried to read from unreadable variable " + name)
-        print(" ")
-        printvars()
-        sys.exit(0)
-    def linelimiterror(self):
-        global currentline
-        globalline = currentline
-        print("Error Line: " + str(currentline))
-        print("Program ran for too long")
-        print(" ")
-        printvars()
-        sys.exit(0)
-    def overflowerror(self, functioncalls):
-        global currentline
-        globalline = currentline
-        print("Error Line: " + str(globalline))
-        print("Executing too many  function calls ")
-        print("List of function calls")
-        for i in functioncalls:
-            pass
-        print(" ")
-        printvars()
-        sys.exit(0)
-    def cantchangeindexerror(self, name, value):
-        global currentline
-        globalline = currentline
-        print("Error Line: " + str(globalline))
-        print("Tried to change indexes of list " + name + " and add size  " + str(value) + " but list is unwritable")
-    def varexistserror(self, name):
-        global currentline
-        globalline = currentline
-        print("Error Line: " + str(globalline))
-        print("Tried to create object with name " + name + " but var already exists")
-        print(" ")
-        printvars()
-        sys.exit(0)
-    def typeerror(self, name, type1, type2):
-        global currentline
-        globalline = currentline
-        print("Error Line: " + str(globalline))
-        print("Mismatch of types  " + type1 + " and " + type2 + " in list " + name)
-        print(" ")
-        printvars()
-        sys.exit(0)
-class LIST:
-    def __init__(self, name, size, readable, writable, TYPE):
-        self.size = size
-        self.index = 0
-        self.values = [VALUE(name=(str(name) + " " + str(i)), value=0, writable=True, readable=True, TYPE="INT") for i
-                       in range(size)]
-        self.types = ["INT" for i in range(size)]
-        self.readable = readable
-        self.writable = writable
-        self.name = name
-        self.type = TYPE
-    def addsize(self, added):
-        if (self.writable):
-            self.size = self.size + added
-            self.values = [
-                self.values[i] if listfunctions[2](self.size, i) else VALUE(name=(str(i) + " " + str(i)), value=0,
-                                                                            writable=True, readable=True, TYPE="INT")
-                for i in range(self.size + added)]
-        else:
-            errore.cantchangeindexerror(self.name, added)
-    def forceaddsize(self, added):
-        self.size = self.size + added
-        self.values = [
-            self.values[i] if listfunctions[2](self.size, i) else VALUE(name=(str(i) + " " + str(i)), value=0,
-                                                                        writable=True, readable=True, TYPE="INT") for i
-            in range(self.size + added)]
-    def changeindex(self, newindex):
-        if (self.readable):
-            if (newindex >= self.size):
-                errore.makeeindexrror(newindex, self.size, self.name)
-            else:
-                self.index = newindex
-        else:
-            errore.readerror(self.name)
-    def forcechangeindex(self, newindex):
-        if (newindex >= self.size):
-            errore.makeeindexrror(newindex, self.size, self.name)
-        else:
-            self.index = newindex
-    def placevalue(self, name, type):
-        if (self.writable):
-            thevar = getvar(type, name)
-            if (thevar.isreadable()):
-                self.types[self.index] = type
-                self.values[self.index] = thevar.makecopy()
-            else:
-                errore.readerror(name)
-        else:
-            errore.writeerror(self.name, name)
-    def forceplacevalue(self, name, type):
-        thevar = getvar(type, name)
-        if (thevar.isreadable()):
-            self.types[self.index] = type
-            self.values[self.index] = thevar.makecopy()
-        else:
-            errore.readerror()
-    def returnvalue(self):
-        if (self.readable):
-            return self.values[self.index]
-        else:
-            errore.readerror(self.name)
-    def read(self):
-        if (self.readable):
-            return self.values[self.index]
-        else:
-            errore.readerror(self.name)
-    def forcereturnvalue(self):
-        return self.values[self.index]
-    def copybyvalue(self, newvalue: VALUE):
-        if (self.types[self.index] == newvalue.givetype()):
-            newvalue.write(self.values[self.index].read())
-        else:
-            errore.typeerror(name=self.name, type1=self.types[self.index], type2=newvalue.givetype())
-    def returntype(self):
-        if (self.readable):
-            return self.types[self.index]
-        else:
-            errore.readerror(self.name)
-    def forcereturntype(self):
-        return self.types[self.index]
-    def tostring(self):
-        strei = ""
-        for i in self.values:
-            strei = strei + str(i.tostring()) + " "
-        return "[ " + strei + " ]"
-    def tostringoftypes(self):
-        if (self.readable):
-            stre = ""
-            for i in self.types:
-                stre = stre + i[0]
-            return stre
-        else:
-            errore.readerror(self.name)
-    def forcetostringoftypes(self):
-        stre = ""
-        for i in self.types:
-            stre = stre + i[0]
-        return stre
-    def changeread(self, readstatus):
-        self.readable = readstatus
-    def changewrite(self, writestatus):
-        self.writable = writestatus
-    def getname(self):
-        return self.name
-    def iswritable(self):
-        return self.writable
-    def isreadable(self):
-        return self.readable
-    def getvalues(self):
-        return self.values
-    def gettypes(self):
-        return self.types
-    def getsize(self):
-        return self.size
-    def makecopy(self):
-        thelist = LIST(self.name, self.size, self.readable, True, self.type)
-        thelist.types = self.types.copy()
-        thelist.values = [val.makecopy() for val in self.values]
-        return thelist
-    def override(self, values, types, size):
-        self.values = values
-        self.types = types
-        self.size = size
-    def givetype(self):
-        return self.type
-    def copyvar(self, listi):
-        if (self.type != listi.type):
-            errore.typeerror(self.name, self.type, listi.type)
-        else:
-            if (listi.isreadable() == False):
-                errore.readerror(listi.getname())
-            else:
-                self.type = 'LIST'
-                self.types = listi.types.copy()
-                self.size = listi.size
-                self.values = [var.makecopy() for var in listi.values]
-def join(listi: LIST, linee):
-    if (listi.isreadable() == False):
-        errore.readerror(listi.name)
-    for i in range(listi.size):
-        listi.index = i
-        value = listi.values[i]
-        typi = listi.types[i]
-        goodloopvars[typi].copyvar(value)
-        getvar('STR', 'TEMPSTRING').forcewrite(typi)
-        line(linee)
-        endline()
-        yield value
-def returntooldlocals(dictofglobals, dictoflocals):
-    global reserveglob
-    for i in reserveglob:
-        for j in reserveglob[i]:
-            del dictofglobals[i][j]
-    reserveglob = {"INT": {}, "STR": {}, "LIST": {}}
-    for i in dictoflocals:
-        for j in dictoflocals[i]:
-            dictofglobals[i][j] = dictoflocals[i][j]
-            reserveglob[i][j] = dictoflocals[i][j]
-def endline():
-    global errore, linecount
-    linecount += 1
-    if (linecount == linelimit):
-        errore.linelimiterror()
-    else:
-        return True
-def updateline():
-    global errore, linecount
-    linecount += 1
-    if (linecount == linelimit):
-        errore.linelimiterror()
     else:
         return True
-def updatelinewithcall(type, namevarinput, function, typeoutput, nameoutput, lini):
-    line(lini)
-    global allthevars, functioncount, localsstack, programlocals, nameofprogramlocals, dictlocalvars, functionlimit, allthelocalconds, nameofprogramlocals
-    localsstack.push(programlocals)
-    varinput = getvar(type, namevarinput)
-    for i in nameofprogramlocals:
-        if (i == 'STR'):
-            allthevars[i][nameofprogramlocals[i]] = VALUE(nameofprogramlocals[i], '', False, False, 'STR')
-        elif (i == 'INT'):
-            allthevars[i][nameofprogramlocals[i]] = VALUE(nameofprogramlocals[i], 0, False, False, 'INT')
         else:
-            allthevars[i][nameofprogramlocals[i]] = LIST(nameofprogramlocals[i], 8, False, False, 'LIST')
-    programlocals = {"INT": {"LOCALINT": allthevars["INT"]["LOCALINT"]},
-                     "STR": {"LOCALSTR": allthevars["STR"]["LOCALSTR"]},
-                     "LIST": {"LOCALLIST": allthevars["LIST"]["LOCALLIST"]}}
-    allthevars[type][nameofprogramlocals[type]].copyvar(varinput)
-    vartosend = allthevars[type][nameofprogramlocals[type]]
-    functioncount += 1
-    if (functioncount == functionlimit):
-        errore.overflowerror(functioncalls)
     else:
-        dictlocalvars = switchtonewcall(allthevars, dictlocalvars, localsvars)
-        vartosend.changeread(True)
-        vartosend.changewrite(True)
-        movetonewconds(allthelocalconds)
-        allthelocalconds = {}
-        output = getvar(typeoutput, nameoutput)
-        outi = function()
-        output.copyvar(outi)
-        endline()
-def updatelineexitingcall(type, namevaroutput):
-    global allthevars, localsstack, functioncount, allthelocalconds, dictlocalvars, localsvars
-    thevar = getvar(type, namevaroutput)
-    functioncount = functioncount - 1
-    popped = localsstack.pop()
-    ### because i can't remember
-    ### get old "global"
-    returntooldlocals(allthevars, popped)
-    ### get old locals
-    dictlocalvars = returntoolastcall(allthevars)
-    ### get old conds
-    allthelocalconds = backtooldconds()
-    endline()
-    return thevar
-localsvars = Stack()  #### the stack for locals created in function
-localsvars.push({"INT": {}, "STR": {}, "LIST": {}})
-localsstack = Stack()  ###### the stack of the program locals
-LOOPINTEGER = VALUE(name="LOOPINTEGER", value=0, readable=True, writable=False, TYPE="INT")
-LOOPSTRING = VALUE(name="LOOPSTRING", value="", readable=True, writable=False, TYPE="STR")
-LOOPBOOL = VALUE(name="LOOPBOOL", value=True, readable=True, writable=False, TYPE="BOOL")
-LOOPLIST = LIST(name="LOOPLIST", size=8, readable=True, writable=False, TYPE="LIST")
-TEMPORARY = VALUE(name="TEMPORARY", value=0, readable=True, writable=True, TYPE="INT")
-LOCALINT = VALUE(name="LOCALINT", value=0, readable=0, writable=0, TYPE="INT")
-loopvars = {"INT": {"LOOPINTEGER": LOOPINTEGER}, "STR": {"LOOPSTRING": LOOPSTRING}, "LIST": {"LOOPLIST": LOOPLIST},
-            "BOOLEAN": {"LOOPBOOL": LOOPBOOL}}
-goodloopvars = {"INT": LOOPINTEGER, "STR": LOOPSTRING, "LIST": LOOPLIST, "BOOLEAN": LOOPBOOL}
-TEMPSTRING = VALUE(name="TEMPSTRING", value="", readable=True, writable=False, TYPE="STR")
-LOCALSTR = VALUE(name="LOCALSTR", value="", readable=0, writable=0, TYPE="STR")
-INTEGER = VALUE(name="INTEGER", value="INT", readable=True, writable=False, TYPE="STR")
-STRING = VALUE(name="STRING", value="STR", readable=True, writable=False, TYPE="STR")
-LISTI = VALUE(name="LIST", value="LIST", readable=True, writable=False, TYPE="STR")
-BOOLEAN = VALUE(name="BOOLEAN", value="BOOLEAN", readable=True, writable=False, TYPE="STR")
-LOCALLIST = LIST(name="LOCALLIST", size=8, readable=0, writable=0, TYPE="LIST")
-THETRUTH = COND('EQUALS')
-THETRUTH.changeleft(TEMPORARY)
-THETRUTH.changeright(TEMPORARY)
-allthevars = {"INT": {"LOOPINTEGER": LOOPINTEGER, "TEMPORARY": TEMPORARY, "LOCALINT": LOCALINT},
-              "STR": {"LOOPSTRING": LOOPSTRING, "TEMPSTRING": TEMPORARY, "LOCALSTR": LOCALSTR, "INTEGER": INTEGER,
-                      "STRING": STRING, "LIST": LISTI, "BOOLEAN": BOOLEAN},
-              "LIST": {"LOOPLIST": LOOPLIST, "LOCALLIST": LOCALLIST}, "BOOLEAN": {"LOOPBOOL": LOOPBOOL}}
 # Number names (ZERO..ONEHUNDRED) are compile-time constants only.
 # They are resolved to plain integer literals by the compiler (toline/word_to_num)
-# and must NOT live in allthevars["INT"] — that would prevent users from naming
 # their own variables ONE, ZERO, etc.
-examplelocalvars = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
-dictlocalvars = examplelocalvars.copy()
-programlocals = {"INT": {"LOCALINT": allthevars["INT"]["LOCALINT"]}, "STR": {"LOCALSTR": allthevars["STR"]["LOCALSTR"]},
-                 "LIST": {"LOCALLIST": allthevars["LIST"]["LOCALLIST"]}}
-nameofprogramlocals = {"INT": "LOCALINT", "STR": "LOCALSTR", "LIST": "LOCALLIST"}
-stackoflocalconds = Stack()
-localsstack.push(programlocals)
-dictlocalvars = examplelocalvars.copy()
-allthelocalconds = {}
-errore = EERROR()
-alltheconds['THETRUTH'] = THETRUTH

 import sys
+from typing import Dict, List, Any, Callable, Optional
+# --- Comparison operator tables ---
+COMPARE_OP_INDEX: Dict[str, int] = {"EQUALS": 0, "BIGEQUALS": 1, "BIGGER": 2}
+COMPARE_OPS: List[Callable[[Any, Any], bool]] = [
+    (lambda x, y: x == y),
+    (lambda x, y: x >= y),
+    (lambda x, y: x > y)
+]
+# --- VM execution counters and limits ---
+line_count: int = 0
+current_line: int = 0
+line_limit: int = 1000
+function_limit: int = 25
+function_count: int = 0
+printed_output: str = ""
+# --- Forward declarations for types ---
+class Value: pass
+class VmList: pass
+class Condition: pass
+# --- Condition registries ---
+cond_registry: Dict[str, Condition] = {}
+saved_conds: Dict[str, Condition] = {}
+# --- Local variable save slots ---
+saved_locals: Dict[str, Any] = {}
+saved_globals: Dict[str, Dict[str, Any]] = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
+# ---------------------------------------------------------------------------
+# Line tracking
+# ---------------------------------------------------------------------------
+def set_current_line(line_num: int) -> bool:
+    """Record the current executing line number for error reporting."""
+    global current_line
+    current_line = line_num
     return True
+# ---------------------------------------------------------------------------
+# Node / Stack
+# ---------------------------------------------------------------------------
 class Node:
+    """Singly-linked list node used internally by Stack."""
+    def __init__(self, value: Any):
         self.value = value
+        self.next: Optional['Node'] = None
+    def set_next(self, next_value: Any) -> None:
+        """Create and link a new node with the given value."""
+        self.next = Node(value=next_value)
+    def set_next_node(self, next_node: 'Node') -> None:
+        """Directly link to an existing node."""
+        self.next = next_node
+    def get_next(self) -> Optional['Node']:
+        """Return the next node."""
         return self.next
+    def get_value(self) -> Any:
+        """Return the value stored in this node."""
         return self.value
 class Stack:
+    """Simple LIFO stack backed by a Python list."""
     def __init__(self):
+        self.top: Optional[Node] = None
+        self.list: List[Any] = []
+    def is_empty(self) -> bool:
+        """Return True if the stack holds no elements."""
         return len(self.list) == 0
+    def push(self, value: Any) -> None:
+        """Push a value onto the top of the stack."""
         self.list.append(value)
+    def pop(self) -> Any:
+        """Pop and return the top value of the stack."""
         return self.list.pop()
+function_call_stack = Stack()
+# ---------------------------------------------------------------------------
+# Error Handler
+# ---------------------------------------------------------------------------
+class ErrorHandler(Exception):
+    """Centralised VM error reporter: prints diagnostics and terminates execution."""
+    def name_error(self, var_type: str, name: str) -> None:
+        """Report that a variable of the given type and name does not exist."""
+        print(f"Error Line: {current_line}")
+        print(f"Var of name {name} doesn't exist as type {var_type}")
+        print(" ")
+        print_vars()
+        sys.exit(0)
+    def index_error(self, list_name: str, bad_index: int, list_size: int) -> None:
+        """Report an out-of-bounds list index access."""
+        print(f"Error Line: {current_line}")
+        print(f"Tried to change index of list {list_name} with size of {list_size} to out-of-bounds index {bad_index}")
+        print(" ")
+        print_vars()
+        sys.exit(0)
+    def div_zero_error(self, var_name: str) -> None:
+        """Report a division-by-zero attempt."""
+        print(f"Error Line: {current_line}")
+        print(f"Cant divide by zero and var {var_name} has value of zero")
+        print(" ")
+        print_vars()
+        sys.exit(0)
+    def doesnt_exist_error(self, name: str) -> None:
+        """Report that no object with the given name exists."""
+        print(f"Error Line: {current_line}")
+        print(f"No object with name {name} exists")
+        print_vars()
+        sys.exit(0)
+    def write_error(self, name: str, value: Any) -> None:
+        """Report an attempt to write to an unwritable variable."""
+        print(f"Error Line: {current_line}")
+        print(f"Tried to write value of {value} to unwritable variable {name}")
+        print(" ")
+        print_vars()
+        sys.exit(0)
+    def type_to_int_error(self, value: str) -> None:
+        """Report that a string does not correspond to a known type name."""
+        print(f"Error Line: {current_line}")
+        print(f"No type such as {value}")
+        print(" ")
+        print_vars()
+        sys.exit(0)
+    def read_error(self, name: str) -> None:
+        """Report an attempt to read from an unreadable variable."""
+        print(f"Error Line: {current_line}")
+        print(f"Tried to read from unreadable variable {name}")
+        print(" ")
+        print_vars()
+        sys.exit(0)
+    def line_limit_error(self) -> None:
+        """Report that the program exceeded the maximum allowed line count."""
+        print(f"Error Line: {current_line}")
+        print("Program ran for too long")
+        print(" ")
+        print_vars()
+        sys.exit(0)
+    def overflow_error(self, call_stack: Stack) -> None:
+        """Report a function call stack overflow."""
+        print(f"Error Line: {current_line}")
+        print("Executing too many function calls")
+        print(" ")
+        print_vars()
+        sys.exit(0)
+    def cant_change_index_error(self, name: str, added: int) -> None:
+        """Report an attempt to resize an unwritable list."""
+        print(f"Error Line: {current_line}")
+        print(f"Tried to change indexes of list {name} and add size {added} but list is unwritable")
+    def var_exists_error(self, name: str) -> None:
+        """Report an attempt to create a variable whose name is already taken."""
+        print(f"Error Line: {current_line}")
+        print(f"Tried to create object with name {name} but var already exists")
+        print(" ")
+        print_vars()
+        sys.exit(0)
+    def type_error(self, name: str, type1: str, type2: str) -> None:
+        """Report a type mismatch during a variable assignment."""
+        print(f"Error Line: {current_line}")
+        print(f"Mismatch of types {type1} and {type2} in variable {name}")
+        print(" ")
+        print_vars()
+        sys.exit(0)
+# ---------------------------------------------------------------------------
+# Value
+# ---------------------------------------------------------------------------
+class Value:
+    """A single typed, named VM variable with read/write permission flags."""
+    def __init__(self, name: str, value: Any, readable: bool, writable: bool, type_name: str):
         self.name = name
         self.value = value
         self.readable = readable
         self.writable = writable
+        self.type = type_name
+    def write(self, value: Any) -> None:
+        """Write a value if writable, otherwise raise a write error."""
+        if self.writable:
             self.value = value
         else:
+            error_handler.write_error(self.name, value)
+    def force_write(self, value: Any) -> None:
+        """Write a value unconditionally, bypassing the writable flag."""
         self.value = value
+    def read(self) -> Any:
+        """Read the value if readable, otherwise raise a read error."""
+        if self.readable:
             return self.value
         else:
+            error_handler.read_error(self.name)
+    def force_read(self) -> Any:
+        """Read the value unconditionally, bypassing the readable flag."""
         return self.value
+    def set_readable(self, readable: bool) -> None:
+        """Set the readable permission flag."""
+        self.readable = readable
+    def set_writable(self, writable: bool) -> None:
+        """Set the writable permission flag."""
+        self.writable = writable
+    def get_name(self) -> str:
+        """Return the variable's name."""
         return self.name
+    def is_writable(self) -> bool:
+        """Return True if the variable is writable."""
         return self.writable
+    def is_readable(self) -> bool:
+        """Return True if the variable is readable."""
         return self.readable
+    def to_string(self) -> str:
+        """Return a string representation of the stored value."""
         return str(self.value)
+    def give_type(self) -> str:
+        """Return the type string of this variable."""
         return self.type
+    def override(self, value: Any) -> None:
+        """Directly overwrite the stored value, bypassing all checks."""
         self.value = value
+    def make_copy(self) -> 'Value':
+        """Return a readable, writable copy of this variable."""
+        return Value(self.name, self.value, self.readable, True, self.type)
+    def copy_var(self, source: 'Value') -> None:
+        """Copy the value from source into this variable, with type and read checks."""
+        if self.type != source.type:
+            error_handler.type_error(self.name, self.type, source.type)
         else:
+            if not source.is_readable():
+                error_handler.read_error(source.get_name())
             else:
+                self.value = source.value
+# ---------------------------------------------------------------------------
+# VmList
+# ---------------------------------------------------------------------------
+class VmList:
+    """A fixed-size, typed VM list whose elements are Value objects."""
+    def __init__(self, name: str, size: int, readable: bool, writable: bool, type_name: str):
+        self.size = size
+        self.index = 0
+        self.values: List[Value] = [
+            Value(name=(str(name) + " " + str(i)), value=0, writable=True, readable=True, type_name="INT")
+            for i in range(size)
+        ]
+        self.types: List[str] = ["INT" for _ in range(size)]
+        self.readable = readable
+        self.writable = writable
+        self.name = name
+        self.type = type_name
+    def add_size(self, added: int) -> None:
+        """Grow the list by `added` elements if writable."""
+        if self.writable:
+            old_size = self.size
+            self.size = self.size + added
+            # Recreate values list with previous values preserved or new slots added
+            # Note: The logic here mirrors the original, which re-checks 'compare_ops' in a
+            # way that copies old indices and creates new ones.
+            self.values = [
+                self.values[i] if i < old_size
+                else Value(name=(str(self.name) + " " + str(i)), value=0, writable=True, readable=True, type_name="INT")
+                for i in range(self.size)
+            ]
+            self.types = self.types + ["INT" for _ in range(added)]
+        else:
+            error_handler.cant_change_index_error(self.name, added)
+    def force_add_size(self, added: int) -> None:
+        """Grow the list by `added` elements unconditionally."""
+        old_size = self.size
+        self.size = self.size + added
+        self.values = [
+            self.values[i] if i < old_size
+            else Value(name=(str(self.name) + " " + str(i)), value=0, writable=True, readable=True, type_name="INT")
+            for i in range(self.size)
+        ]
+        self.types = self.types + ["INT" for _ in range(added)]
+    def change_index(self, new_index: int) -> None:
+        """Set the active index if readable and in bounds."""
+        if self.readable:
+            if new_index >= self.size:
+                error_handler.index_error(self.name, new_index, self.size)
+            else:
+                self.index = new_index
+        else:
+            error_handler.read_error(self.name)
+    def force_change_index(self, new_index: int) -> None:
+        """Set the active index unconditionally, still checking bounds."""
+        if new_index >= self.size:
+            error_handler.index_error(self.name, new_index, self.size)
+        else:
+            self.index = new_index
+    def place_value(self, name: str, var_type: str) -> None:
+        """Copy a variable into the current index slot if writable."""
+        if self.writable:
+            source = get_var(var_type, name)
+            if source.is_readable():
+                self.types[self.index] = var_type
+                self.values[self.index] = source.make_copy()
+            else:
+                error_handler.read_error(name)
+        else:
+            error_handler.write_error(self.name, name)
+    def force_place_value(self, name: str, var_type: str) -> None:
+        """Copy a variable into the current index slot unconditionally."""
+        source = get_var(var_type, name)
+        if source.is_readable():
+            self.types[self.index] = var_type
+            self.values[self.index] = source.make_copy()
+        else:
+            error_handler.read_error(name)
+    def read_value(self) -> Value:
+        """Return the Value at the current index if readable."""
+        if self.readable:
+            return self.values[self.index]
+        else:
+            error_handler.read_error(self.name)
+            return self.values[0] # Should be unreachable due to sys.exit
+    def read(self) -> Value:
+        """Return the Value at the current index if readable (alias for read_value)."""
+        return self.read_value()
+    def force_read_value(self) -> Value:
+        """Return the Value at the current index unconditionally."""
+        return self.values[self.index]
+    def copy_element_to(self, dest_value: Value) -> None:
+        """Write the current element's value into dest_value, with type checking."""
+        if self.types[self.index] == dest_value.give_type():
+            dest_value.write(self.values[self.index].read())
+        else:
+            error_handler.type_error(
+                name=self.name,
+                type1=self.types[self.index],
+                type2=dest_value.give_type()
+            )
+    def read_type(self) -> str:
+        """Return the type string of the element at the current index if readable."""
+        if self.readable:
+            return self.types[self.index]
+        else:
+            error_handler.read_error(self.name)
+            return ""
+    def force_read_type(self) -> str:
+        """Return the type string of the element at the current index unconditionally."""
+        return self.types[self.index]
+    def to_string(self) -> str:
+        """Return a bracketed string of all element values."""
+        parts = ""
+        for val in self.values:
+            parts = parts + str(val.to_string()) + " "
+        return "[ " + parts + " ]"
+    def to_type_string(self) -> str:
+        """Return a string of the first-character type codes for all elements if readable."""
+        if self.readable:
+            return "".join(t[0] for t in self.types)
+        else:
+            error_handler.read_error(self.name)
+            return ""
+    def force_to_type_string(self) -> str:
+        """Return a string of the first-character type codes for all elements unconditionally."""
+        return "".join(t[0] for t in self.types)
+    def set_readable(self, readable: bool) -> None:
+        """Set the readable permission flag."""
+        self.readable = readable
+    def set_writable(self, writable: bool) -> None:
+        """Set the writable permission flag."""
+        self.writable = writable
+    def get_name(self) -> str:
+        """Return the list's name."""
+        return self.name
+    def is_writable(self) -> bool:
+        """Return True if the list is writable."""
+        return self.writable
+    def is_readable(self) -> bool:
+        """Return True if the list is readable."""
+        return self.readable
+    def get_values(self) -> List[Value]:
+        """Return the raw list of Value elements."""
+        return self.values
+    def get_types(self) -> List[str]:
+        """Return the list of type strings for each element."""
+        return self.types
+    def get_size(self) -> int:
+        """Return the current size of the list."""
+        return self.size
+    def make_copy(self) -> 'VmList':
+        """Return a full deep copy of this list as a writable instance."""
+        copy = VmList(self.name, self.size, self.readable, True, self.type)
+        copy.types = self.types.copy()
+        copy.values = [val.make_copy() for val in self.values]
+        return copy
+    def override(self, values: List[Value], types: List[str], size: int) -> None:
+        """Directly replace the list's contents, bypassing all checks."""
+        self.values = values
+        self.types = types
+        self.size = size
+    def give_type(self) -> str:
+        """Return the type string of this list object."""
+        return self.type
+    def copy_var(self, source_list: 'VmList') -> None:
+        """Copy all content from source_list into this list, with type and read checks."""
+        if self.type != source_list.type:
+            error_handler.type_error(self.name, self.type, source_list.type)
+        else:
+            if not source_list.is_readable():
+                error_handler.read_error(source_list.get_name())
+            else:
+                self.type = 'LIST'
+                self.types = source_list.types.copy()
+                self.size = source_list.size
+                self.values = [var.make_copy() for var in source_list.values]
+# ---------------------------------------------------------------------------
+# Condition
+# ---------------------------------------------------------------------------
+class Condition:
+    """A named conditional that compares two Value objects using a stored operator."""
+    def __init__(self, compare: str):
+        self.compare_index = COMPARE_OP_INDEX[compare]
+        self.left = Value("0", 0, False, False, "INT")
+        self.right = Value("0", 0, False, False, "INT")
+        self.type = "COND"
+    def set_compare(self, compare: str) -> None:
+        """Change the comparison operator."""
+        self.compare_index = COMPARE_OP_INDEX[compare]
+    def set_left(self, left: Value) -> None:
+        """Set the left operand Value."""
         self.left = left
+    def set_right(self, right: Value) -> None:
+        """Set the right operand Value."""
         self.right = right
+    def evaluate(self) -> bool:
+        """Evaluate the condition and return the boolean result."""
+        return COMPARE_OPS[self.compare_index](self.left.read(), self.right.read())
+    def give_type(self) -> str:
+        """Return the type string of this object."""
         return self.type
+# ---------------------------------------------------------------------------
+# Condition helpers
+# ---------------------------------------------------------------------------
+def add_cond(name, compare):
+    """Create a new global condition with the given comparison operator."""
+    global cond_registry
+    if name in cond_registry:
+        error_handler.var_exists_error(name)
+    else:
+        cond_registry[name] = Condition(compare)
+def add_local_cond(name, compare):
+    """Create a new condition scoped to the current function call."""
+    global cond_registry, current_local_conds
+    if name in cond_registry:
+        error_handler.var_exists_error(name)
+    else:
+        cond_registry[name] = Condition(compare)
+        current_local_conds[name] = cond_registry[name]
+    return current_local_conds
+def push_local_conds(local_conds):
+    """Save the current local conditions onto the stack and remove them from the registry."""
+    global local_conds_stack, cond_registry, saved_conds
+    for name in local_conds:
+        del cond_registry[name]
+    local_conds_stack.push(local_conds)
+def pop_local_conds():
+    """Restore the previous function's local conditions from the stack."""
+    global local_conds_stack, cond_registry, saved_conds
+    popped = local_conds_stack.pop()
+    for name in saved_conds:
+        if name in cond_registry:
+            del cond_registry[name]
+    saved_conds = {}
+    for name in popped:
+        cond_registry[name] = popped[name]
+        saved_conds[name] = popped[name]
+    return popped
+def get_cond(name):
+    """Look up and return a condition by name, raising an error if absent."""
+    global cond_registry
+    if name in cond_registry:
+        return cond_registry[name]
+    else:
+        error_handler.doesnt_exist_error(name)
+# ---------------------------------------------------------------------------
+# Debug / print helpers
+# ---------------------------------------------------------------------------
+def print_vars():
+    """Print all current variable values and the accumulated output buffer."""
+    global var_registry, printed_output
+    print("END OF PROGRAM")
+    print()
+    for var_type in var_registry:
+        print("All the vars used from type " + var_type)
+        for var_name in var_registry[var_type]:
+            if var_registry[var_type][var_name].is_writable():
+                print(var_name + " : " + var_registry[var_type][var_name].to_string())
+        print("")
+    print("All that was printed during the program")
+    print(printed_output)
+# ---------------------------------------------------------------------------
+# Variable helpers
+# ---------------------------------------------------------------------------
+def add_var(var_type, name, value):
+    """Add a new global variable of the given type and initial value."""
+    global var_registry
+    if name in var_registry[var_type]:
+        error_handler.var_exists_error(name)
+    if var_type == "LIST":
+        var_registry[var_type][name] = VmList(name, value, True, True, var_type)
+    else:
+        var_registry[var_type][name] = Value(name, value, True, True, var_type)
+def get_var(var_type, name):
+    """Return the variable object for the given type and name."""
+    global var_registry
+    if name in var_registry[var_type]:
+        return var_registry[var_type][name]
+    else:
+        error_handler.doesnt_exist_error(name)
+def add_local_var(var_type, name, value):
+    """Add a new variable scoped to the current function call."""
+    global current_local_vars, var_registry
+    if name in var_registry[var_type]:
+        error_handler.var_exists_error(name)
+    if var_type == "LIST":
+        var_registry[var_type][name] = VmList(name, value, True, True, var_type)
+        current_local_vars[var_type][name] = var_registry[var_type][name]
+    else:
+        var_registry[var_type][name] = Value(name, value, True, True, var_type)
+        current_local_vars[var_type][name] = var_registry[var_type][name]
+def push_local_vars(var_registry_ref: dict, local_vars: dict, stack: Stack):
+    """Save current local variables to the stack and clear them from the registry."""
+    for var_type in local_vars:
+        for name in local_vars[var_type]:
+            del var_registry_ref[var_type][name]
+    stack.push(local_vars)
+    return {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
+def pop_local_vars(var_registry_ref: dict):
+    """Restore the previous call's local variables from the stack."""
+    stack = local_vars_stack
+    last_call = stack.pop()
+    global saved_locals
+    for var_type in saved_locals:
+        for name in saved_locals[var_type]:
+            if name in var_registry_ref[var_type]:
+                del var_registry_ref[var_type][name]
+    saved_locals = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
+    for var_type in last_call:
+        for name in last_call[var_type]:
+            var_registry_ref[var_type][name] = last_call[var_type][name]
+            saved_locals[var_type][name] = last_call[var_type][name]
+    return last_call
+# ---------------------------------------------------------------------------
+# VM print
+# ---------------------------------------------------------------------------
+def vm_print(var, newline):
+    """Print a VM variable's value and append it to the output buffer."""
+    global printed_output
+    text = var.to_string() + newline * '\n' + ' ' * (1 - newline)
+    print(text, end='')
+    printed_output = printed_output + text
+# ---------------------------------------------------------------------------
+# Arithmetic / string operations
+# ---------------------------------------------------------------------------
+def vm_add(var1, var2):
+    """Add two INT variables and store the result in TEMPORARY."""
+    get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() + get_var("INT", var2).read())
+def vm_sub(var1, var2):
+    """Subtract var2 from var1 and store the result in TEMPORARY."""
+    get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() - get_var("INT", var2).read())
+def vm_mul(var1, var2):
+    """Multiply two INT variables and store the result in TEMPORARY."""
+    get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() * get_var("INT", var2).read())
+def vm_div(var1, var2):
+    """Integer-divide var1 by var2 and store the result in TEMPORARY."""
+    if get_var("INT", var2).read() == 0:
+        error_handler.div_zero_error(var2)
+    get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() // get_var("INT", var2).read())
+def vm_float_div(var1, var2):
+    """Float-divide var1 by var2 and store the result in TEMPORARY."""
+    if get_var("INT", var2).read() == 0:
+        error_handler.div_zero_error(var2)
+    get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() / get_var("INT", var2).read())
+def vm_pow(var1, var2):
+    """Raise var1 to the power of var2 and store the result in TEMPORARY."""
+    get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() ** get_var("INT", var2).read())
+def vm_mod(var1, var2):
+    """Compute var1 modulo var2 and store the result in TEMPORARY."""
+    if get_var("INT", var2).read() == 0:
+        error_handler.div_zero_error(var2)
+    get_var("INT", "TEMPORARY").force_write(int(get_var("INT", var1).read() % get_var("INT", var2).read()))
+def vm_concat(var1, var2):
+    """Concatenate two STR variables and store the result in TEMPSTRING."""
+    get_var("STR", "TEMPSTRING").force_write(get_var("STR", var1).read() + get_var("STR", var2).read())
+# ---------------------------------------------------------------------------
+# Explicit-destination ALU operations (4-word dialect)
+# ---------------------------------------------------------------------------
+def vm_add_to(dest: str, var1: str, var2: str) -> None:
+    """Add two INT variables and store the result in *dest*."""
+    get_var("INT", dest).force_write(get_var("INT", var1).read() + get_var("INT", var2).read())
+def vm_sub_to(dest: str, var1: str, var2: str) -> None:
+    """Subtract var2 from var1 and store the result in *dest*."""
+    get_var("INT", dest).force_write(get_var("INT", var1).read() - get_var("INT", var2).read())
+def vm_mul_to(dest: str, var1: str, var2: str) -> None:
+    """Multiply two INT variables and store the result in *dest*."""
+    get_var("INT", dest).force_write(get_var("INT", var1).read() * get_var("INT", var2).read())
+def vm_div_to(dest: str, var1: str, var2: str) -> None:
+    """Integer-divide var1 by var2 and store the result in *dest*."""
+    if get_var("INT", var2).read() == 0:
+        error_handler.div_zero_error(var2)
+    get_var("INT", dest).force_write(get_var("INT", var1).read() // get_var("INT", var2).read())
+def vm_float_div_to(dest: str, var1: str, var2: str) -> None:
+    """Float-divide var1 by var2 and store the result in *dest*."""
+    if get_var("INT", var2).read() == 0:
+        error_handler.div_zero_error(var2)
+    get_var("INT", dest).force_write(get_var("INT", var1).read() / get_var("INT", var2).read())
+def vm_pow_to(dest: str, var1: str, var2: str) -> None:
+    """Raise var1 to the power of var2 and store the result in *dest*."""
+    get_var("INT", dest).force_write(get_var("INT", var1).read() ** get_var("INT", var2).read())
+def vm_mod_to(dest: str, var1: str, var2: str) -> None:
+    """Compute var1 modulo var2 and store the result in *dest*."""
+    if get_var("INT", var2).read() == 0:
+        error_handler.div_zero_error(var2)
+    get_var("INT", dest).force_write(int(get_var("INT", var1).read() % get_var("INT", var2).read()))
+def vm_concat_to(dest: str, var1: str, var2: str) -> None:
+    """Concatenate two STR variables and store the result in *dest*."""
+    get_var("STR", dest).force_write(get_var("STR", var1).read() + get_var("STR", var2).read())
+def vm_list_grow(list_name, size_var):
+    """Increase the size of a LIST variable by the value of an INT variable."""
+    get_var("LIST", list_name).add_size(get_var("INT", size_var).read())
+def vm_assign_list(dest, src):
+    """Copy a LIST variable from src into dest."""
+    get_var("LIST", dest).copy_var(get_var("LIST", src))
+def vm_assign_str(dest, src):
+    """Copy a STR variable from src into dest."""
+    get_var("STR", dest).copy_var(get_var("STR", src))
+def vm_assign_int(dest, src):
+    """Copy an INT variable from src into dest."""
+    get_var("INT", dest).copy_var(get_var("INT", src))
+def vm_pad_str(var_name, num_spaces):
+    """Append a fixed number of blank spaces to a STR variable."""
+    get_var("STR", var_name).write(get_var("STR", var_name).read() + ' ' * num_spaces)
+def vm_type_to_int(type_str_var, dest_int_var):
+    """Write the integer index of a type-name string variable into an INT variable."""
+    lookup = {"INT": 0, "STR": 1, "BOOLEAN": 2, "LIST": 3}
+    type_str = get_var('STR', type_str_var).read()
+    if type_str in lookup:
+        get_var('INT', dest_int_var).write(lookup[type_str])
+    else:
+        error_handler.type_to_int_error(type_str)
+# ---------------------------------------------------------------------------
+# Program-locals save/restore
+# ---------------------------------------------------------------------------
+def restore_program_locals(global_vars: dict, saved_program_locals: dict):
+    """Restore program-level local variables into the global registry after a function call."""
+    global saved_globals
+    for var_type in saved_globals:
+        for name in saved_globals[var_type]:
+            del global_vars[var_type][name]
+    saved_globals = {"INT": {}, "STR": {}, "LIST": {}}
+    for var_type in saved_program_locals:
+        for name in saved_program_locals[var_type]:
+            global_vars[var_type][name] = saved_program_locals[var_type][name]
+            saved_globals[var_type][name] = saved_program_locals[var_type][name]
+# ---------------------------------------------------------------------------
+# Line tick
+# ---------------------------------------------------------------------------
+def tick_line():
+    """Increment the execution line counter and abort if the line limit is exceeded."""
+    global line_count
+    line_count += 1
+    if line_count == line_limit:
+        error_handler.line_limit_error()
     else:
         return True
+# ---------------------------------------------------------------------------
+# Function call enter / exit
+# ---------------------------------------------------------------------------
+def enter_function_call(input_type, input_var_name, function, output_type, output_var_name, call_line):
+    """Push a new function call frame, execute the function, and copy its return value."""
+    set_current_line(call_line)
+    global var_registry, function_count, program_locals_stack, current_program_locals
+    global program_local_names, current_local_vars, function_limit, current_local_conds
+    program_locals_stack.push(current_program_locals)
+    var_input = get_var(input_type, input_var_name)
+    # Reset program-local slots to unreadable/unwritable defaults
+    for slot_type in program_local_names:
+        slot_name = program_local_names[slot_type]
+        if slot_type == 'STR':
+            var_registry[slot_type][slot_name] = Value(slot_name, '', False, False, 'STR')
+        elif slot_type == 'INT':
+            var_registry[slot_type][slot_name] = Value(slot_name, 0, False, False, 'INT')
         else:
+            var_registry[slot_type][slot_name] = VmList(slot_name, 8, False, False, 'LIST')
+    current_program_locals = {
+        "INT":  {"LOCALINT":  var_registry["INT"]["LOCALINT"]},
+        "STR":  {"LOCALSTR":  var_registry["STR"]["LOCALSTR"]},
+        "LIST": {"LOCALLIST": var_registry["LIST"]["LOCALLIST"]},
+    }
+    var_registry[input_type][program_local_names[input_type]].copy_var(var_input)
+    var_to_send = var_registry[input_type][program_local_names[input_type]]
+    function_count += 1
+    if function_count == function_limit:
+        error_handler.overflow_error(function_call_stack)
     else:
+        current_local_vars = push_local_vars(var_registry, current_local_vars, local_vars_stack)
+        var_to_send.set_readable(True)
+        var_to_send.set_writable(True)
+        push_local_conds(current_local_conds)
+        current_local_conds = {}
+        output = get_var(output_type, output_var_name)
+        result = function()
+        output.copy_var(result)
+        tick_line()
+def exit_function_call(return_type, return_var_name):
+    """Pop the current function call frame and return the named output variable."""
+    global var_registry, program_locals_stack, function_count, current_local_conds
+    global current_local_vars, local_vars_stack
+    return_var = get_var(return_type, return_var_name)
+    function_count -= 1
+    saved_program_locals = program_locals_stack.pop()
+    # Restore program-level globals
+    restore_program_locals(var_registry, saved_program_locals)
+    # Restore call-level locals
+    current_local_vars = pop_local_vars(var_registry)
+    # Restore conditions
+    current_local_conds = pop_local_conds()
+    tick_line()
+    return return_var
+# ---------------------------------------------------------------------------
+# VM initialisation
+# ---------------------------------------------------------------------------
+local_vars_stack = Stack()       # stack of local variable dicts per function call
+local_vars_stack.push({"INT": {}, "STR": {}, "LIST": {}})
+program_locals_stack = Stack()   # stack of program-local variable dicts
+LOOP_INTEGER = Value(name="LOOPINTEGER",  value=0,    readable=True,  writable=False, type_name="INT")
+LOOP_STRING  = Value(name="LOOPSTRING",   value="",   readable=True,  writable=False, type_name="STR")
+LOOP_BOOL    = Value(name="LOOPBOOL",     value=True, readable=True,  writable=False, type_name="BOOL")
+LOOP_LIST    = VmList( name="LOOPLIST",     size=8,     readable=True,  writable=False, type_name="LIST")
+TEMPORARY    = Value(name="TEMPORARY",    value=0,    readable=True,  writable=True,  type_name="INT")
+LOCAL_INT    = Value(name="LOCALINT",     value=0,    readable=False, writable=False, type_name="INT")
+TEMP_STRING  = Value(name="TEMPSTRING",   value="",   readable=True,  writable=False, type_name="STR")
+LOCAL_STR    = Value(name="LOCALSTR",     value="",   readable=False, writable=False, type_name="STR")
+LOCAL_LIST   = VmList( name="LOCALLIST",    size=8,     readable=False, writable=False, type_name="LIST")
+TYPE_INT_VAL     = Value(name="INTEGER", value="INT",     readable=True, writable=False, type_name="STR")
+TYPE_STR_VAL     = Value(name="STRING",  value="STR",     readable=True, writable=False, type_name="STR")
+TYPE_LIST_VAL    = Value(name="LIST",    value="LIST",    readable=True, writable=False, type_name="STR")
+TYPE_BOOLEAN_VAL = Value(name="BOOLEAN", value="BOOLEAN", readable=True, writable=False, type_name="STR")
+loop_var_registry = {
+    "INT":     {"LOOPINTEGER": LOOP_INTEGER},
+    "STR":     {"LOOPSTRING":  LOOP_STRING},
+    "LIST":    {"LOOPLIST":    LOOP_LIST},
+    "BOOLEAN": {"LOOPBOOL":    LOOP_BOOL},
+}
+loop_var_by_type = {
+    "INT":     LOOP_INTEGER,
+    "STR":     LOOP_STRING,
+    "LIST":    LOOP_LIST,
+    "BOOLEAN": LOOP_BOOL,
+}
+THE_TRUTH = Condition('EQUALS')
+THE_TRUTH.set_left(TEMPORARY)
+THE_TRUTH.set_right(TEMPORARY)
+var_registry = {
+    "INT": {
+        "LOOPINTEGER": LOOP_INTEGER,
+        "TEMPORARY":   TEMPORARY,
+        "LOCALINT":    LOCAL_INT,
+    },
+    "STR": {
+        "LOOPSTRING": LOOP_STRING,
+        "TEMPSTRING": TEMP_STRING,
+        "LOCALSTR":   LOCAL_STR,
+        "INTEGER":    TYPE_INT_VAL,
+        "STRING":     TYPE_STR_VAL,
+        "LIST":       TYPE_LIST_VAL,
+        "BOOLEAN":    TYPE_BOOLEAN_VAL,
+    },
+    "LIST": {
+        "LOOPLIST":  LOOP_LIST,
+        "LOCALLIST": LOCAL_LIST,
+    },
+    "BOOLEAN": {
+        "LOOPBOOL": LOOP_BOOL,
+    },
+}
 # Number names (ZERO..ONEHUNDRED) are compile-time constants only.
 # They are resolved to plain integer literals by the compiler (toline/word_to_num)
+# and must NOT live in var_registry["INT"] — that would prevent users from naming
 # their own variables ONE, ZERO, etc.
+empty_local_vars      = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
+current_local_vars    = empty_local_vars.copy()
+current_program_locals = {
+    "INT":  {"LOCALINT":  var_registry["INT"]["LOCALINT"]},
+    "STR":  {"LOCALSTR":  var_registry["STR"]["LOCALSTR"]},
+    "LIST": {"LOCALLIST": var_registry["LIST"]["LOCALLIST"]},
+}
+program_local_names = {"INT": "LOCALINT", "STR": "LOCALSTR", "LIST": "LOCALLIST"}
+local_conds_stack  = Stack()
+current_local_conds = {}
+program_locals_stack.push(current_program_locals)
+error_handler = ErrorHandler()
+cond_registry['THETRUTH'] = THE_TRUTH

language/dialects.py ADDED Viewed

	@@ -0,0 +1,175 @@

+"""
+dialects.py – Dialect normalisation for Tzefa source lines.
+The canonical internal bytecode is a **4-word tuple**::
+    [VERB, TYPE, ARG1, ARG2]
+Two source dialects produce these tuples:
+  THREE_WORD  – ``OPCODE ARG1 ARG2``  (classic, expanded to 4-word internally)
+  FOUR_WORD   – ``VERB TYPE ARG1 ARG2`` (verbose, already native)
+Two casing modes:
+  CAPS_ONLY   – every token is UPPERCASE
+  MIXED_CASE  – commands Titlecase, user vars lowercase; all uppercased internally
+"""
+from __future__ import annotations
+from typing import Dict, List, Tuple
+# ---------------------------------------------------------------------------
+# Public constants
+# ---------------------------------------------------------------------------
+THREE_WORD: str = "three_word"
+FOUR_WORD: str = "four_word"
+CAPS_ONLY: str = "caps_only"
+MIXED_CASE: str = "mixed_case"
+# ---------------------------------------------------------------------------
+# 3-word → 4-word expansion table
+# ---------------------------------------------------------------------------
+# Every classic 3-word opcode maps to a (VERB, TYPE) pair.
+THREE_TO_FOUR: Dict[str, Tuple[str, str]] = {
+    # Variable declarations
+    "MAKEINTEGER":    ("MAKE",    "INTEGER"),
+    "MAKESTR":        ("MAKE",    "STRING"),
+    "MAKEBOOLEAN":    ("MAKE",    "BOOLEAN"),
+    "NEWLIST":        ("NEW",     "LIST"),
+    "BASICCONDITION": ("NEW",     "CONDITION"),
+    # Assignment / copy
+    "ASSSIGNINT":     ("SET",     "INTEGER"),
+    "STRINGASSIGN":   ("SET",     "STRING"),
+    "COPYLIST":       ("SET",     "LIST"),
+    "SETINDEX":       ("SET",     "INDEX"),
+    "LEFTSIDE":       ("SET",     "LEFT"),
+    "RIGHTSIDE":      ("SET",     "RIGHT"),
+    # Condition
+    "CHANGECOMPARE":  ("CHANGE",  "COMPARE"),
+    # Control flow
+    "WHILE":          ("WHILE",   "CONDITION"),
+    "WHILETRUE":      ("WHILE",   "BOOLEAN"),
+    "COMPARE":        ("IF",      "CONDITION"),
+    "IFTRUE":         ("IF",      "BOOLEAN"),
+    "ELSECOMPARE":    ("ELIF",    "CONDITION"),
+    "ELSEIF":         ("ELIF",    "BOOLEAN"),
+    "ITERATE":        ("ITERATE", "LIST"),
+    # Print
+    "PRINTSTRING":    ("PRINT",   "STRING"),
+    "PRINTINTEGER":   ("PRINT",   "INTEGER"),
+    # List read
+    "GETINTEGER":     ("GET",     "INTEGER"),
+    "GETSTRING":      ("GET",     "STRING"),
+    "GETBOOL":        ("GET",     "BOOLEAN"),
+    "GETLIST":        ("GET",     "LIST"),
+    "GETTYPE":        ("GET",     "TYPE"),
+    "LENGTH":         ("GET",     "LENGTH"),
+    # List write
+    "WRITEINTEGER":   ("WRITE",   "INTEGER"),
+    "WRITESTRING":    ("WRITE",   "STRING"),
+    "WRITEBOOL":      ("WRITE",   "BOOLEAN"),
+    "WRITELIST":      ("WRITE",   "LIST"),
+    # List resize
+    "ADDSIZE":        ("ADD",     "SIZE"),
+    # String utilities
+    "BLANKSPACES":    ("PAD",     "STRING"),
+    # Type introspection
+    "TYPETOINT":      ("TYPE",    "TOINT"),
+    # Functions
+    "INTEGERFUNCTION": ("FUNCTION", "INTEGER"),
+    "STRINGFUNCTION":  ("FUNCTION", "STRING"),
+    "LISTFUNCTION":    ("FUNCTION", "LIST"),
+    "RETURN":          ("RETURN",   "VALUE"),
+}
+# 3-word ALU opcodes that implicitly write to TEMPORARY (or TEMPSTRING for COMBINE).
+# They expand differently: OPCODE A B → [VERB, DEST, A, B]
+_THREE_WORD_ALU: Dict[str, Tuple[str, str]] = {
+    "ADDVALUES":    ("ADD",         "TEMPORARY"),
+    "SUBTRACT":     ("SUBTRACT",    "TEMPORARY"),
+    "MULTIPLY":     ("MULTIPLY",    "TEMPORARY"),
+    "DIVIDE":       ("DIVIDE",      "TEMPORARY"),
+    "SIMPLEDIVIDE": ("SIMPLEDIVIDE","TEMPORARY"),
+    "MODULO":       ("MODULO",      "TEMPORARY"),
+    "MATHPOW":      ("POWER",       "TEMPORARY"),
+    "COMBINE":      ("COMBINE",     "TEMPSTRING"),
+}
+# Reverse lookup: (VERB, DEST) → old 3-word opcode (only for non-ALU ops)
+FOUR_TO_THREE: Dict[Tuple[str, str], str] = {v: k for k, v in THREE_TO_FOUR.items()}
+# Set of ALU verbs that use the [VERB, DEST, SRC1, SRC2] layout
+ALU_VERBS = frozenset(_THREE_WORD_ALU[k][0] for k in _THREE_WORD_ALU)
+def words_per_line(dialect: str) -> int:
+    """Return the expected token count for the given dialect."""
+    return 4 if dialect == FOUR_WORD else 3
+# ---------------------------------------------------------------------------
+# Normalisation — always produces a 4-word CAPS tuple
+# ---------------------------------------------------------------------------
+def normalize_line(tokens: List[str], dialect: str, casing: str) -> List[str]:
+    """
+    Convert a raw token list into a canonical 4-word UPPERCASE tuple.
+    For most instructions the layout is  [VERB, TYPE, ARG1, ARG2].
+    For ALU operations the layout is     [VERB, DEST, SRC1, SRC2].
+    In the 3-word dialect ALU ops have no explicit dest:
+        ADDVALUES A B  →  [ADD,  TEMPORARY,  A,  B]
+        MODULO    A B  →  [MODULO, TEMPORARY, A, B]
+    In the 4-word dialect the dest is already present:
+        ADD RESULT A B  →  [ADD, RESULT, A, B]
+    Returns
+    -------
+    list[str]
+        Exactly 4 UPPERCASE tokens.
+    """
+    upper = [t.upper() for t in tokens]
+    if dialect == FOUR_WORD:
+        while len(upper) < 4:
+            upper.append("")
+        return upper[:4]
+    # THREE_WORD → expand to 4-word
+    while len(upper) < 3:
+        upper.append("")
+    upper = upper[:3]
+    opcode, arg1, arg2 = upper[0], upper[1], upper[2]
+    # 3-word ALU: inject implicit dest
+    alu = _THREE_WORD_ALU.get(opcode)
+    if alu is not None:
+        return [alu[0], alu[1], arg1, arg2]
+    # Standard verb+type expansion
+    pair = THREE_TO_FOUR.get(opcode)
+    if pair is not None:
+        return [pair[0], pair[1], arg1, arg2]
+    # Unknown opcode — treat as user-defined function call: FUNCNAME INPUT OUTPUT
+    return ["CALL", opcode, arg1, arg2]

language/topy.py CHANGED Viewed

@@ -1,454 +1,384 @@
-def makeparenthasis(listofvals):
-    stri = "("
-    for i in range(len(listofvals) - 1):
-        stri = stri + " " + str(listofvals[i]) + " " + ","
-    stri = stri + " " + str(listofvals[-1]) + " )"
-    return stri
-def strreadvalue(type, name):
-    return "getvar" + makeparenthasis([tostri(type), tostri(name)]) + ".read()"
-lineupdate = "endline() ;"
-infunction = False
-dictoffunct = {i[0]: i for i in [[0]]}
-dictofinstructions = {i: "thetext" for i in dictoffunct}
-listfunctionswithtypes = {i[0]: i for i in [[0]]}
-listfunctionswithtypes["GREATESTDIV"] = ["GREATESTDIV", "LIST", "LIST"]
-for i in listfunctionswithtypes:
-    for j in range(len(listfunctionswithtypes[i])):
-        if (listfunctionswithtypes[i][j] == "BOOL"):
-            listfunctionswithtypes[i][j] = "BOOLEAN"
-listofindentchanges = [0 for i in range(1, 1000 + 1)]
-def getinstructions(listfunctions, listezfunctions):
-    global dictoffunct, listfunctionswithtypes
-    dictoffunct = {i[0]: i for i in listezfunctions}
-    listfunctionswithtypes = {i[0]: i for i in listfunctions}
-def tostri(value):
-    return "'" + str(value) + "'"
-def MAKEINTEGER(name, value, linenum):
-    global infunction
-    inparan = makeparenthasis(['"INT"', tostri(name), value])
-    if (infunction):
-        declarestr = "addlocalvar" + inparan
-    else:
-        declarestr = "addvar" + inparan
-    stri = "line(" + str(linenum) + ")" + "; " + declarestr + "; " + lineupdate
-    return stri
-def MAKESTR(name, value, linenum):
-    global infunction
-    inparan = makeparenthasis(['"STR"', tostri(name), "'" + str(value) + "'"])
-    if (infunction):
-        declarestr = "addlocalvar" + inparan
-    else:
-        declarestr = "addvar" + inparan
-    stri = "line(" + str(linenum) + ")" + "; " + declarestr + "; " + lineupdate
-    return stri
-def MAKEBOOLEAN(name, value, linenum):
-    global infunction
-    if value == "TRUE":
-        value = "True"
-    elif value == "FALSE":
-        value = "False"
-    inparan = makeparenthasis(['"BOOLEAN"', tostri(name), value])
-    if (infunction):
-        declarestr = "addlocalvar" + inparan
-    else:
-        declarestr = "addvar" + inparan
-    stri = "line(" + str(linenum) + ")" + "; " + declarestr + "; " + lineupdate
-    return stri
-def NEWLIST(name, value, linenum):
-    global infunction
-    # value is already a plain integer string (e.g. '6') resolved at compile time
-    inparan = makeparenthasis(['"LIST"', tostri(name), str(int(value))])
-    if (infunction):
-        declarestr = "addlocalvar" + inparan
-    else:
-        declarestr = "addvar" + inparan
-    stri = "line(" + str(linenum) + ")" + "; " + declarestr + "; " + lineupdate
-    return stri
-def BASICCONDITION(name, compare, linenum):
-    global infunction
-    if (infunction == False):
-        declarestr = "addcond" + makeparenthasis([tostri(name), tostri(compare)])
-    else:
-        declarestr = "addlocalcond" + makeparenthasis([tostri(name), tostri(compare)])
-    stri = "line(" + str(linenum) + ")" + "; " + declarestr + "; " + lineupdate
-    return stri
-def LEFTSIDE(name, othername, linenum):
-    thegetvar = "getvar" + makeparenthasis(['"INT"', tostri(othername)])
-    stri = "line(" + str(linenum) + ")" + "; " + \
-           "getcond" + makeparenthasis([tostri(name)]) + ".changeleft(" + thegetvar + ")" + "; " + lineupdate
-    return (stri)
-def RIGHTSIDE(name, othername, linenum):
-    thegetvar = "getvar" + makeparenthasis(['"INT"', tostri(othername)])
-    stri = "line(" + str(linenum) + ")" + "; " + \
-           "getcond" + makeparenthasis([tostri(name)]) + ".changeright(" + thegetvar + ")" + "; " + lineupdate
-    return (stri)
-def CHANGECOMPARE(name, valuecompare, linenum):
-    stri = "line(" + str(linenum) + ")" + "; " + \
-           "getcond" + makeparenthasis([tostri(name)]) + ".changecompare(" + tostri(
-        valuecompare) + ")" + "; " + lineupdate
-    return (stri)
-def WHILE(compare, endline, linenum):
-    global listofindentchanges
-    lineofwhile = "while" + makeparenthasis(["line(" + str(linenum) + ") and " + (
-            "getcond" + makeparenthasis([tostri(compare)])) + ".giveresult() and endline()"]) + ":"
-    listofindentchanges[linenum + 1] = 1
-    listofindentchanges[int(endline) + 1] = -1
-    return (lineofwhile)
-def ITERATE(listi, endline, linenum):
-    global listofindentchanges
-    lineofwhile = "for i in join" + makeparenthasis(["getvar('LIST'," + tostri(listi) + ")", str(linenum)]) + ":"
-    listofindentchanges[linenum + 1] = 1
-    listofindentchanges[int(endline) + 1] = -1
-    return lineofwhile
-def COMPARE(compare, endline, linenum):
-    global listofindentchanges
-    lineofwhile = "if" + makeparenthasis(["line(" + str(linenum) + ") and " + (
-            "getcond" + makeparenthasis([tostri(compare)])) + ".giveresult() and endline()"]) + ":"
-    listofindentchanges[linenum + 1] = 1
-    listofindentchanges[int(endline) + 1] = -1
-    return (lineofwhile)
-def ELSECOMPARE(compare, endline, linenum):
-    global listofindentchanges
-    lineofwhile = "elif" + makeparenthasis(["line(" + str(linenum) + ") and " + (
-            "getcond" + makeparenthasis([tostri(compare)])) + ".giveresult() and endline()"]) + ":"
-    listofindentchanges[linenum + 1] = 1
-    listofindentchanges[int(endline) + 1] = -1
-    return (lineofwhile)
-def WHILETRUE(bool, endline, linenum):
-    global listofindentchanges
-    lineofwhile = "while" + makeparenthasis(["line(" + str(linenum) + ") and " + (
-            "getvar('BOOLEAN'," + tostri(bool) + ").read() " + "and endline()")]) + ":"
-    listofindentchanges[linenum + 1] = 1
-    listofindentchanges[int(endline) + 1] = -1
-    return (lineofwhile)
-def IFTRUE(bool, endline, linenum):
-    global listofindentchanges
-    lineofwhile = "if" + makeparenthasis(["line(" + str(linenum) + ") and " + (
-            "getvar('BOOLEAN'," + tostri(bool) + ").read() " + "and endline()")]) + ":"
-    listofindentchanges[linenum + 1] = 1
-    listofindentchanges[int(endline) + 1] = -1
-    return (lineofwhile)
-def ELSEIF(bool, endline, linenum):
-    global listofindentchanges
-    lineofwhile = "elif" + makeparenthasis(["line(" + str(linenum) + ") and " + (
-            "getvar('BOOLEAN'," + tostri(bool) + ").read() " + "and endline()")]) + ":"
-    listofindentchanges[linenum + 1] = 1
-    listofindentchanges[int(endline) + 1] = -1
-    return (lineofwhile)
-def INTEGERFUNCTION(name, type, linenum):
-    global thetype, infunction
-    infunction = True
-    thetype = "INT"
-    listofindentchanges[linenum + 1] = 1
-    return "def " + name + "" + '():'
-def STRINGFUNCTION(name, type, linenum):
-    global thetype, infunction
-    infunction = True
-    thetype = "STR"
-    listofindentchanges[linenum + 1] = 1
-    return "def " + name + "" + '():'
-def LISTFUNCTION(name, type, linenum):
-    global thetype, infunction
-    infunction = True
-    thetype = "LIST"
-    listofindentchanges[linenum + 1] = 1
-    return "def " + name + "" + '():'
-def RETURN(name, stay, linenum):
-    if (stay == "BREAK"):
-        listofindentchanges[linenum + 1] = -1
-        global infunction
-        infunction = False
-    return ("line(" + str(linenum) + "); " + "return(updatelineexitingcall" + makeparenthasis(
-        [tostri(thetype), tostri(name)]) + ")")
-def PRINTSTRING(name, state, linenum):
-    if (state == "BREAK"):
-        state = "True"
     else:
-        state = "False"
-    return "line(" + str(linenum) + "); " + "Print(" + "getvar('STR'," + tostri(name) + ")," + state + "); " + "endline()"
-def PRINTINTEGER(name, state, linenum):
-    if (state == "BREAK"):
-        state = "True"
     else:
-        state = "False"
-    return "line(" + str(linenum) + "); " + "Print(" + "getvar('INT'," + tostri(
-        name) + ")," + state + "); " + "endline()"
-def SETINDEX(name, index, linenum):
-    # index is already a plain integer string resolved at compile time
-    return ("line(" + str(linenum) + "); getvar('LIST'," + tostri(name) + ").changeindex(" + str(int(index)) + "); endline()")
-def GETSTRING(listname, name, linenum):
-    name = tostri(name)
-    listname = tostri(listname)
-    return ("line(" + str(
-        linenum) + ");getvar('STR'," + name + ").copyvar(getvar('LIST'," + listname + ").read()); endline()")
-def GETINTEGER(listname, name, linenum):
-    name = tostri(name)
-    listname = tostri(listname)
-    return ("line(" + str(
-        linenum) + ");getvar('INT'," + name + ").copyvar(getvar('LIST'," + listname + ").read()); endline()")
-def GETLIST(listname, name, linenum):
-    name = tostri(name)
-    listname = tostri(listname)
-    return ("line(" + str(
-        linenum) + ");getvar('LIST'," + name + ").copyvar(getvar('LIST'," + listname + ").read()); endline()")
-def GETBOOL(listname, name, linenum):
-    name = tostri(name)
-    listname = tostri(listname)
-    return ("line(" + str(
-        linenum) + ");getvar('BOOLEAN'," + name + ").copyvar(getvar('LIST'," + listname + ").read()); endline()")
-def WRITESTRING(listname, name, linenum):
-    name = tostri(name)
-    listname = tostri(listname)
-    return ("line(" + str(linenum) + ");getvar('LIST'," + listname + ") .placevalue(" + name + ',"STR"'"); endline()")
-def WRITEINTEGER(listname, name, linenum):
-    name = tostri(name)
-    listname = tostri(listname)
-    return ("line(" + str(linenum) + ");getvar('LIST'," + listname + ") .placevalue(" + name + ',"INT"'"); endline()")
-def WRITEBOOL(listname, name, linenum):
-    name = tostri(name)
-    listname = tostri(listname)
-    return ("line(" + str(
-        linenum) + ");getvar('LIST'," + listname + ") .placevalue(" + name + ',"BOOLEAN"'"); endline()")
-def WRITELIST(listname, name, linenum):
-    name = tostri(name)
-    listname = tostri(listname)
-    return ("line(" + str(linenum) + ");getvar('LIST'," + listname + ") .placevalue(" + name + ',"LIST"'"); endline()")
-def GETTYPE(listname, strname, linenum):
-    strname = tostri(strname)
-    listname = tostri(listname)
-    return ("line(" + str(
-        linenum) + ");getvar('STR'," + strname + ").write(getvar('LIST'," + listname + ").returntype()); endline()")
-def LENGTH(listname, intname, linenum):
-    intname = tostri(intname)
-    listname = tostri(listname)
-    return ("line(" + str(
-        linenum) + ");getvar('INT'," + intname + ").write(getvar('LIST'," + listname + ").getsize()); endline()")
-def ADDVALUES(vali, vali2, linenum):
-    return ("line(" + str(linenum) + "); " + "add" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
-def MULTIPLY(vali, vali2, linenum):
-    return ("line(" + str(linenum) + "); " + "mult" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
-def MATHPOW(vali, vali2, linenum):
-    return ("line(" + str(linenum) + "); " + "pow" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
-def DIVIDE(vali, vali2, linenum):
-    return ("line(" + str(linenum) + "); " + "betterdiv" + makeparenthasis(
-        [tostri(vali), tostri(vali2)]) + "; endline()")
-def SIMPLEDIVIDE(vali, vali2, linenum):
-    return ("line(" + str(linenum) + "); " + "div" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
-def SUBTRACT(vali, vali2, linenum):
-    return ("line(" + str(linenum) + "); " + "dec" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
-def MODULO(vali, vali2, linenum):
-    return ("line(" + str(linenum) + "); " + "mod" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
-def COMBINE(vali, vali2, linenum):
-    return ("line(" + str(linenum) + "); " + "comb" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
-def ADDSIZE(vali, vali2, linenum):
-    return ("line(" + str(linenum) + "); " + "addsize" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
-def ASSSIGNINT(vali, vali2, linenum):
-    return ("line(" + str(linenum) + "); " + "assignint" + makeparenthasis(
-        [tostri(vali), tostri(vali2)]) + "; endline()")
-def STRINGASSIGN(vali, vali2, linenum):
-    return ("line(" + str(linenum) + "); " + "assignstr" + makeparenthasis(
-        [tostri(vali), tostri(vali2)]) + "; endline()")
-def COPYLIST(vali, vali2, linenum):
-    return ("line(" + str(linenum) + "); " + "assignlist" + makeparenthasis(
-        [tostri(vali), tostri(vali2)]) + "; endline()")
-def BLANKSPACES(vali, vali2, linenum):
-    return ("line(" + str(linenum) + "); " + "blankspaces" + makeparenthasis([tostri(vali), vali2]) + "; endline()")
-def TYPETOINT(vali, vali2, linenum):
-    return ("line(" + str(linenum) + "); " + "typetoint" + makeparenthasis(
-        [tostri(vali), tostri(vali2)]) + "; endline()")
-dictofinstructions["MAKEINTEGER"] = MAKEINTEGER
-dictofinstructions["MAKESTR"] = MAKESTR
-dictofinstructions["MAKEBOOLEAN"] = MAKEBOOLEAN
-dictofinstructions["NEWLIST"] = NEWLIST
-dictofinstructions["BASICCONDITION"] = BASICCONDITION
-dictofinstructions["LEFTSIDE"] = LEFTSIDE
-dictofinstructions["RIGHTSIDE"] = RIGHTSIDE
-dictofinstructions["CHANGECOMPARE"] = CHANGECOMPARE
-dictofinstructions["WHILE"] = WHILE
-dictofinstructions["ITERATE"] = ITERATE
-dictofinstructions["COMPARE"] = COMPARE
-dictofinstructions["ELSECOMPARE"] = ELSECOMPARE
-dictofinstructions["WHILETRUE"] = WHILETRUE
-dictofinstructions["IFTRUE"] = IFTRUE
-dictofinstructions["ELSEIF"] = ELSEIF
-dictofinstructions["SETINDEX"] = SETINDEX
-dictofinstructions["INTEGERFUNCTION"] = INTEGERFUNCTION
-dictofinstructions["STRINGFUNCTION"] = STRINGFUNCTION
-dictofinstructions["LISTFUNCTION"] = LISTFUNCTION
-dictofinstructions["PRINTSTRING"] = PRINTSTRING
-dictofinstructions["PRINTINTEGER"] = PRINTINTEGER
-dictofinstructions["GETSTRING"] = GETSTRING
-dictofinstructions["GETINTEGER"] = GETINTEGER
-dictofinstructions["GETLIST"] = GETLIST
-dictofinstructions["GETBOOL"] = GETBOOL
-dictofinstructions["WRITESTRING"] = WRITESTRING
-dictofinstructions["WRITEINTEGER"] = WRITEINTEGER
-dictofinstructions["WRITEBOOL"] = WRITEBOOL
-dictofinstructions["WRITELIST"] = WRITELIST
-dictofinstructions["GETTYPE"] = GETTYPE
-dictofinstructions["LENGTH"] = LENGTH
-dictofinstructions["ASSSIGNINT"] = ASSSIGNINT
-dictofinstructions["ADDSIZE"] = ADDSIZE
-dictofinstructions["STRINGASSIGN"] = STRINGASSIGN
-dictofinstructions["COPYLIST"] = COPYLIST
-dictofinstructions["ADDVALUES"] = ADDVALUES
-dictofinstructions["MULTIPLY"] = MULTIPLY
-dictofinstructions["MATHPOW"] = MATHPOW
-dictofinstructions["DIVIDE"] = DIVIDE
-dictofinstructions["SIMPLEDIVIDE"] = SIMPLEDIVIDE
-dictofinstructions["SUBTRACT"] = SUBTRACT
-dictofinstructions["MODULO"] = MODULO
-dictofinstructions["COMBINE"] = COMBINE
-dictofinstructions["BLANKSPACES"] = BLANKSPACES
-dictofinstructions["RETURN"] = RETURN
-dictofinstructions["TYPETOINT"] = TYPETOINT
-def makepredict(listi, i):
-    if listi[0] in dictofinstructions:
-        return dictofinstructions[listi[0]](listi[1], listi[2], i)
-    else:
-        listfun = listfunctionswithtypes[listi[0]]
-        return ("updatelinewithcall" + makeparenthasis(
-            [tostri(listfun[1]), tostri(listi[1]), listi[0], tostri(listfun[2]), tostri(listi[2]), i]))
-def makepyfile(listi):
     from pathlib import Path
-    outfile = Path(__file__).parent / "test.py"
-    with outfile.open("w+", encoding="utf-8") as f:
         f.write("from Tzefa_Language.createdpython import *\n")
-        counterindent = 0
-        indent = "    "
-        for i in range(1, len(listi) + 1):
-            counterindent += listofindentchanges[i]
-            f.write(indent * counterindent + makepredict(listi[i - 1], i) + '\n')
-        f.write("printvars()")
-if __name__ == '__main__':
-    listi = [["MAKEINTEGER", "THEINT", '2769'], ["MAKEINTEGER", "THEINTI", '1065'], ["MAKEINTEGER", "THROWONE", '1065'],
-             ["MAKEINTEGER", "THROWTWO", '1065'], ["NEWLIST", "LISTOFTWO", '2'], ["SETINDEX", "LISTOFTWO", '0'],
-             ["WRITEINTEGER", "LISTOFTWO", 'THEINT'], ["SETINDEX", "LISTOFTWO", '1'],
-             ["WRITEINTEGER", "LISTOFTWO", 'THEINTI'], ["MAKEINTEGER", "ZERO", '0'], ["ADDVALUES", "THEINT", 'THEINTI'],
-             ["PRINTINTEGER", "TEMPORARY", 'BREAK'], ["LISTFUNCTION", "GREATESTDIV", 'LIST'],
-             ["SETINDEX", "LISTOFTWO", '0'], ["GETINTEGER", "LISTOFTWO", 'THROWONE'], ["SETINDEX", "LISTOFTWO", '1'],
-             ["GETINTEGER", "LISTOFTWO", 'THROWTWO'], ["BASICCONDITION", "EUCLIDCOMPARE", 'EQUALS'],
-             ["LEFTSIDE", "EUCLIDCOMPARE", 'THROWTWO'], ["RIGHTSIDE", "EUCLIDCOMPARE", 'ZERO'],
-             ["COMPARE", "EUCLIDCOMPARE", '23'], ["WRITEINTEGER", "LISTOFTWO", 'THROWTWO'],
-             ["RETURN", "LISTOFTWO", "STAY"], ["RIGHTSIDE", "EUCLIDCOMPARE", 'THROWTWO']
-        , ["SETINDEX", "LISTOFTWO", '0'], ["WRITEINTEGER", "LISTOFTWO", 'THROWTWO'], ["MODULO", "THROWONE", 'THROWTWO'],
-             ["SETINDEX", "LISTOFTWO", '1'], ["WRITEINTEGER", "LISTOFTWO", 'TEMPORARY'],
-             ["GREATESTDIV", "LISTOFTWO", 'LISTOFTWO'], ["RETURN", "LISTOFTWO", 'BREAK'],
-             ["GREATESTDIV", "LISTOFTWO", 'LISTOFTWO']]
-    makepyfile(listi)

+"""
+topy.py – Tzefa IR → Python code generator.
+The bytecode is a 4-element tuple::
+    [VERB, TYPE, ARG1, ARG2]
+Each handler receives (verb, type_word, arg1, arg2, line_num) and returns a
+Python source-code string that is later assembled by make_py_file().
+"""
+from __future__ import annotations
+from typing import Any, Callable, Dict, List, Tuple
+# ---------------------------------------------------------------------------
+# Globals
+# ---------------------------------------------------------------------------
+_TICK: str = "tick_line() ;"
+_in_function: bool = False
+_current_return_type: str = ""
+_user_functions: Dict[str, List[str]] = {}
+_indent_changes: List[int] = [0] * 1001
+# ---------------------------------------------------------------------------
+# Tiny code-gen helpers
+# ---------------------------------------------------------------------------
+def _args(*values: Any) -> str:
+    """Parenthesised, comma-separated argument list."""
+    return "( " + ", ".join(str(v) for v in values) + " )" if values else "()"
+def _q(value: Any) -> str:
+    """Single-quote a value for generated code."""
+    return f"'{value}'"
+def _gv(var_type: str, name: str) -> str:
+    """get_var() call expression."""
+    return f"get_var({_q(var_type)}, {_q(name)})"
+def _lp(n: int) -> str:
+    """set_current_line() prefix."""
+    return f"set_current_line({n})"
+def _stmt(line_num: int, *parts: str) -> str:
+    """Standard statement: set_current_line; body; tick_line."""
+    return f"{_lp(line_num)}; " + "; ".join(parts) + f"; {_TICK}"
+# ---------------------------------------------------------------------------
+# Register user-defined functions (called by ErrorCorrection after parsing)
+# ---------------------------------------------------------------------------
+def register_user_function(name: str, input_type: str, output_type: str) -> None:
+    """Register a user-defined function so the code generator can emit calls."""
+    _user_functions[name] = [name, input_type, output_type]
+def get_user_functions() -> Dict[str, List[str]]:
+    return _user_functions
+# ---------------------------------------------------------------------------
+# Handlers — each takes (type_word, arg1, arg2, line_num) -> str
+# ---------------------------------------------------------------------------
+# -- MAKE: declare variables -----------------------------------------------
+def _make(type_word: str, arg1: str, arg2: str, ln: int) -> str:
+    call = "add_local_var" if _in_function else "add_var"
+    call_c = "add_local_cond" if _in_function else "add_cond"
+    if type_word == "BOOLEAN":
+        val = "True" if arg2 == "TRUE" else ("False" if arg2 == "FALSE" else arg2)
+        return _stmt(ln, f"{call}{_args(_q('BOOLEAN'), _q(arg1), val)}")
+    if type_word == "STRING":
+        return _stmt(ln, f"{call}{_args(_q('STR'), _q(arg1), _q(arg2))}")
+    if type_word == "INTEGER":
+        return _stmt(ln, f"{call}{_args(_q('INT'), _q(arg1), arg2)}")
+    if type_word == "LIST":
+        return _stmt(ln, f"{call}{_args(_q('LIST'), _q(arg1), int(arg2))}")
+    if type_word == "CONDITION":
+        return _stmt(ln, f"{call_c}{_args(_q(arg1), _q(arg2))}")
+    return ""
+# -- SET: assignment / index / condition sides -----------------------------
+def _set(type_word: str, arg1: str, arg2: str, ln: int) -> str:
+    if type_word == "INTEGER":
+        return _stmt(ln, f"vm_assign_int{_args(_q(arg1), _q(arg2))}")
+    if type_word == "STRING":
+        return _stmt(ln, f"vm_assign_str{_args(_q(arg1), _q(arg2))}")
+    if type_word == "LIST":
+        return _stmt(ln, f"vm_assign_list{_args(_q(arg1), _q(arg2))}")
+    if type_word == "INDEX":
+        return _stmt(ln, f"get_var('LIST',{_q(arg1)}).change_index({int(arg2)})")
+    if type_word == "LEFT":
+        return _stmt(ln, f"get_cond({_q(arg1)}).set_left({_gv('INT', arg2)})")
+    if type_word == "RIGHT":
+        return _stmt(ln, f"get_cond({_q(arg1)}).set_right({_gv('INT', arg2)})")
+    return ""
+# -- CHANGE ----------------------------------------------------------------
+def _change(type_word: str, arg1: str, arg2: str, ln: int) -> str:
+    # Only COMPARE for now
+    return _stmt(ln, f"get_cond({_q(arg1)}).set_compare({_q(arg2)})")
+# -- Control flow ----------------------------------------------------------
+def _while(type_word: str, arg1: str, arg2: str, ln: int) -> str:
+    _indent_changes[ln + 1] = 1
+    _indent_changes[int(arg2) + 1] = -1
+    if type_word == "CONDITION":
+        guard = f"set_current_line({ln}) and get_cond({_q(arg1)}).evaluate() and tick_line()"
+    else:  # BOOLEAN
+        guard = f"set_current_line({ln}) and get_var('BOOLEAN',{_q(arg1)}).read() and tick_line()"
+    return f"while( {guard} ):"
+def _if(type_word: str, arg1: str, arg2: str, ln: int) -> str:
+    _indent_changes[ln + 1] = 1
+    _indent_changes[int(arg2) + 1] = -1
+    if type_word == "CONDITION":
+        guard = f"set_current_line({ln}) and get_cond({_q(arg1)}).evaluate() and tick_line()"
     else:
+        guard = f"set_current_line({ln}) and get_var('BOOLEAN',{_q(arg1)}).read() and tick_line()"
+    return f"if( {guard} ):"
+def _elif(type_word: str, arg1: str, arg2: str, ln: int) -> str:
+    _indent_changes[ln + 1] = 1
+    _indent_changes[int(arg2) + 1] = -1
+    if type_word == "CONDITION":
+        guard = f"set_current_line({ln}) and get_cond({_q(arg1)}).evaluate() and tick_line()"
     else:
+        guard = f"set_current_line({ln}) and get_var('BOOLEAN',{_q(arg1)}).read() and tick_line()"
+    return f"elif( {guard} ):"
+def _iterate(type_word: str, arg1: str, arg2: str, ln: int) -> str:
+    _indent_changes[ln + 1] = 1
+    _indent_changes[int(arg2) + 1] = -1
+    return f"for i in vm_loop_list({_gv('LIST', arg1)}, {ln}):"
+# -- PRINT -----------------------------------------------------------------
+def _print(type_word: str, arg1: str, arg2: str, ln: int) -> str:
+    vm_type = "STR" if type_word == "STRING" else "INT"
+    newline = "True" if arg2 == "BREAK" else "False"
+    return _stmt(ln, f"vm_print(get_var({_q(vm_type)},{_q(arg1)}),{newline})")
+# -- GET: read from list ---------------------------------------------------
+_GET_TYPE_MAP = {"INTEGER": "INT", "STRING": "STR", "BOOLEAN": "BOOLEAN", "LIST": "LIST"}
+def _get(type_word: str, arg1: str, arg2: str, ln: int) -> str:
+    if type_word == "TYPE":
+        return _stmt(ln, f"get_var('STR',{_q(arg2)}).write(get_var('LIST',{_q(arg1)}).read_type())")
+    if type_word == "LENGTH":
+        return _stmt(ln, f"get_var('INT',{_q(arg2)}).write(get_var('LIST',{_q(arg1)}).get_size())")
+    vm = _GET_TYPE_MAP[type_word]
+    return _stmt(ln, f"get_var({_q(vm)},{_q(arg2)}).copy_var(get_var('LIST',{_q(arg1)}).read())")
+# -- WRITE: write to list --------------------------------------------------
+_WRITE_TYPE_MAP = {"INTEGER": "INT", "STRING": "STR", "BOOLEAN": "BOOLEAN", "LIST": "LIST"}
+def _write(type_word: str, arg1: str, arg2: str, ln: int) -> str:
+    vm = _WRITE_TYPE_MAP[type_word]
+    return _stmt(ln, f"get_var('LIST',{_q(arg1)}).place_value({_q(arg2)},\"{vm}\")")
+# -- ADD (dual purpose: list resize / arithmetic with explicit dest) --------
+def _add(dest: str, src1: str, src2: str, ln: int) -> str:
+    if dest == "SIZE":
+        # ADD SIZE listname int_amount  (list resize — dest is literally "SIZE")
+        return _stmt(ln, f"vm_list_grow{_args(_q(src1), _q(src2))}")
+    # ADD DEST SRC1 SRC2
+    return _stmt(ln, f"vm_add_to{_args(_q(dest), _q(src1), _q(src2))}")
+# -- Arithmetic verbs — all take (dest, src1, src2, ln) --------------------
+def _subtract(dest: str, src1: str, src2: str, ln: int) -> str:
+    return _stmt(ln, f"vm_sub_to{_args(_q(dest), _q(src1), _q(src2))}")
+def _multiply(dest: str, src1: str, src2: str, ln: int) -> str:
+    return _stmt(ln, f"vm_mul_to{_args(_q(dest), _q(src1), _q(src2))}")
+def _divide(dest: str, src1: str, src2: str, ln: int) -> str:
+    return _stmt(ln, f"vm_float_div_to{_args(_q(dest), _q(src1), _q(src2))}")
+def _simpledivide(dest: str, src1: str, src2: str, ln: int) -> str:
+    return _stmt(ln, f"vm_div_to{_args(_q(dest), _q(src1), _q(src2))}")
+def _modulo(dest: str, src1: str, src2: str, ln: int) -> str:
+    return _stmt(ln, f"vm_mod_to{_args(_q(dest), _q(src1), _q(src2))}")
+def _power(dest: str, src1: str, src2: str, ln: int) -> str:
+    return _stmt(ln, f"vm_pow_to{_args(_q(dest), _q(src1), _q(src2))}")
+def _combine(dest: str, src1: str, src2: str, ln: int) -> str:
+    return _stmt(ln, f"vm_concat_to{_args(_q(dest), _q(src1), _q(src2))}")
+# -- PAD -------------------------------------------------------------------
+def _pad(type_word: str, arg1: str, arg2: str, ln: int) -> str:
+    return _stmt(ln, f"vm_pad_str{_args(_q(arg1), arg2)}")
+# -- TYPE ------------------------------------------------------------------
+def _type(type_word: str, arg1: str, arg2: str, ln: int) -> str:
+    return _stmt(ln, f"vm_type_to_int{_args(_q(arg1), _q(arg2))}")
+# -- FUNCTION: define ------------------------------------------------------
+def _function(type_word: str, arg1: str, arg2: str, ln: int) -> str:
+    global _in_function, _current_return_type
+    _in_function = True
+    type_map = {"INTEGER": "INT", "STRING": "STR", "LIST": "LIST"}
+    _current_return_type = type_map.get(type_word, "INT")
+    _indent_changes[ln + 1] = 1
+    return f"def {arg1}():"
+# -- RETURN ----------------------------------------------------------------
+def _return(type_word: str, arg1: str, arg2: str, ln: int) -> str:
+    global _in_function
+    if arg2 == "BREAK":
+        _indent_changes[ln + 1] = -1
+        _in_function = False
+    return f"set_current_line({ln}); return(exit_function_call({_q(_current_return_type)}, {_q(arg1)}))"
+# -- CALL: user-defined function -------------------------------------------
+def _call(type_word: str, arg1: str, arg2: str, ln: int) -> str:
+    # type_word = function name, arg1 = input var, arg2 = output var
+    func_name = type_word
+    spec = _user_functions.get(func_name)
+    if spec:
+        return (
+            f"enter_function_call"
+            f"({_q(spec[1])}, {_q(arg1)}, {func_name}, {_q(spec[2])}, {_q(arg2)}, {ln})"
+        )
+    # Fallback — shouldn't happen if ErrorCorrection registered all functions
+    return f"enter_function_call('INT', {_q(arg1)}, {func_name}, 'INT', {_q(arg2)}, {ln})"
+# ---------------------------------------------------------------------------
+# Dispatch table — keyed by VERB
+# ---------------------------------------------------------------------------
+_DISPATCH: Dict[str, Callable[[str, str, str, int], str]] = {
+    "MAKE":         _make,
+    "SET":          _set,
+    "CHANGE":       _change,
+    "WHILE":        _while,
+    "IF":           _if,
+    "ELIF":         _elif,
+    "ITERATE":      _iterate,
+    "PRINT":        _print,
+    "GET":          _get,
+    "WRITE":        _write,
+    "ADD":          _add,
+    "SUBTRACT":     _subtract,
+    "MULTIPLY":     _multiply,
+    "DIVIDE":       _divide,
+    "SIMPLEDIVIDE": _simpledivide,
+    "MODULO":       _modulo,
+    "POWER":        _power,
+    "COMBINE":      _combine,
+    "PAD":          _pad,
+    "TYPE":         _type,
+    "FUNCTION":     _function,
+    "RETURN":       _return,
+    "CALL":         _call,
+}
+# ---------------------------------------------------------------------------
+# Code generation
+# ---------------------------------------------------------------------------
+def make_instruction(quad: List[str], line_num: int) -> str:
+    """Dispatch a 4-word bytecode tuple to its code-gen handler."""
+    verb = quad[0]
+    handler = _DISPATCH.get(verb)
+    if handler:
+        return handler(quad[1], quad[2], quad[3], line_num)
+    # Unknown verb — treat as user-defined function call
+    return _call(verb, quad[1], quad[2], line_num)
+def make_py_file(instruction_list: List[List[str]]) -> None:
+    """Compile *instruction_list* to Python and write it to test.py."""
     from pathlib import Path
+    out_path = Path(__file__).parent / "test.py"
+    indent_unit = "    "
+    with out_path.open("w", encoding="utf-8") as f:
+        f.write("import sys\nimport os\n")
+        f.write("sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))\n")
         f.write("from Tzefa_Language.createdpython import *\n")
+        f.write("print('VM TEST START')\n")
+        indent_level = 0
+        for i, quad in enumerate(instruction_list, start=1):
+            indent_level += _indent_changes[i]
+            f.write(indent_unit * indent_level + make_instruction(quad, i) + "\n")
+        f.write("print_vars()\nprint('VM TEST END')\n")
+# ---------------------------------------------------------------------------
+# Self-test
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    register_user_function("GREATESTDIV", "LIST", "LIST")
+    _sample = [
+        ["MAKE",     "INTEGER",   "THEINT",        "2769"],
+        ["MAKE",     "INTEGER",   "THEINTI",       "1065"],
+        ["MAKE",     "INTEGER",   "THROWONE",      "1065"],
+        ["MAKE",     "INTEGER",   "THROWTWO",      "1065"],
+        ["MAKE",     "LIST",      "LISTOFTWO",     "2"],
+        ["SET",      "INDEX",     "LISTOFTWO",     "0"],
+        ["WRITE",    "INTEGER",   "LISTOFTWO",     "THEINT"],
+        ["SET",      "INDEX",     "LISTOFTWO",     "1"],
+        ["WRITE",    "INTEGER",   "LISTOFTWO",     "THEINTI"],
+        ["MAKE",     "INTEGER",   "ZERO",          "0"],
+        ["ADD",      "TEMPORARY", "THEINT",        "THEINTI"],
+        ["PRINT",    "INTEGER",   "TEMPORARY",     "BREAK"],
+        ["FUNCTION", "LIST",      "GREATESTDIV",   "LIST"],
+        ["SET",      "INDEX",     "LISTOFTWO",     "0"],
+        ["GET",      "INTEGER",   "LISTOFTWO",     "THROWONE"],
+        ["SET",      "INDEX",     "LISTOFTWO",     "1"],
+        ["GET",      "INTEGER",   "LISTOFTWO",     "THROWTWO"],
+        ["MAKE",     "CONDITION", "EUCLIDCOMPARE", "EQUALS"],
+        ["SET",      "LEFT",      "EUCLIDCOMPARE", "THROWTWO"],
+        ["SET",      "RIGHT",     "EUCLIDCOMPARE", "ZERO"],
+        ["IF",       "CONDITION", "EUCLIDCOMPARE", "23"],
+        ["WRITE",    "INTEGER",   "LISTOFTWO",     "THROWTWO"],
+        ["RETURN",   "VALUE",     "LISTOFTWO",     "STAY"],
+        ["SET",      "RIGHT",     "EUCLIDCOMPARE", "THROWTWO"],
+        ["SET",      "INDEX",     "LISTOFTWO",     "0"],
+        ["WRITE",    "INTEGER",   "LISTOFTWO",     "THROWTWO"],
+        ["MODULO",   "TEMPORARY", "THROWONE",      "THROWTWO"],  # DEST=TEMPORARY
+        ["SET",      "INDEX",     "LISTOFTWO",     "1"],
+        ["WRITE",    "INTEGER",   "LISTOFTWO",     "TEMPORARY"],
+        ["CALL",     "GREATESTDIV","LISTOFTWO",    "LISTOFTWO"],
+        ["RETURN",   "VALUE",     "LISTOFTWO",     "BREAK"],
+        ["CALL",     "GREATESTDIV","LISTOFTWO",    "LISTOFTWO"],
+    ]
+    make_py_file(_sample)

requirements.txt CHANGED Viewed

@@ -1,17 +1,18 @@
-torch
-torchvision
-transformers
-segmentation-models-pytorch
-albumentations
-ultralytics
-timm
-opencv-python-headless
-pillow
-numpy
-gradio
-huggingface_hub
-fast_edit_distance
-pydantic==2.10.6
-tiktoken
-protobuf
-sentencepiece

+torch
+torchvision
+transformers
+segmentation-models-pytorch
+ultralytics
+timm
+opencv-python-headless
+pillow
+numpy
+gradio
+huggingface_hub
+fast_edit_distance
+pydantic
+einops
+safetensors
+surya-ocr
+sentencepiece
+protobuf