Update Tzefa Space (full pipeline)
Browse files- app.py +434 -488
- demo.png +2 -2
- language/ErrorCorrection.py +474 -364
- language/Number2Name.py +24 -15
- language/__pycache__/topy.cpython-313.pyc +0 -0
- language/createdpython.py +821 -596
- language/dialects.py +175 -0
- language/topy.py +281 -351
- requirements.txt +18 -17
app.py
CHANGED
|
@@ -1,488 +1,434 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Tzefa - Complete Pipeline Demo Space
|
| 3 |
-
Image
|
| 4 |
-
Error Correction
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
import
|
| 12 |
-
import
|
| 13 |
-
import
|
| 14 |
-
import
|
| 15 |
-
import
|
| 16 |
-
import
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
import
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
import
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
)
|
| 66 |
-
self.
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
nn.Conv2d(16,
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
models
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
models["
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
# ══════════════════════════════════════════════════════════════
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
# ══════════════════════════════════════════════════════════════
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
for
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
input_image = gr.Image(type="pil", label="Upload Image")
|
| 436 |
-
bin_choice = gr.Dropdown(
|
| 437 |
-
choices=["mit_b3 (Standard)", "mit_b5 (HighRes)"],
|
| 438 |
-
value="mit_b5 (HighRes)",
|
| 439 |
-
label="Binarization Model",
|
| 440 |
-
)
|
| 441 |
-
run_btn = gr.Button("Run Full Pipeline", variant="primary", size="lg")
|
| 442 |
-
with gr.Column(scale=1):
|
| 443 |
-
status_box = gr.Textbox(label="Terminal / Pipeline Status", lines=12, interactive=False)
|
| 444 |
-
|
| 445 |
-
with gr.Tabs():
|
| 446 |
-
with gr.Tab("Binarized"):
|
| 447 |
-
bin_out = gr.Image(label="Binarized Image")
|
| 448 |
-
with gr.Tab("Line Detection"):
|
| 449 |
-
line_out = gr.Image(label="Line Bounding Boxes")
|
| 450 |
-
with gr.Tab("Word Detection + OCR"):
|
| 451 |
-
word_out = gr.Image(label="Word Bboxes with OCR Labels")
|
| 452 |
-
with gr.Tab("Raw OCR"):
|
| 453 |
-
raw_out = gr.Textbox(label="Raw OCR (before correction)", lines=15, interactive=False)
|
| 454 |
-
with gr.Tab("Error Corrected"):
|
| 455 |
-
corrected_out = gr.Textbox(label="After Error Correction", lines=15, interactive=False)
|
| 456 |
-
with gr.Tab("Compiled Python"):
|
| 457 |
-
compiled_out = gr.Code(language="python", label="Generated Python Code")
|
| 458 |
-
with gr.Tab("Execution Output"):
|
| 459 |
-
exec_out = gr.Textbox(label="Program Output", lines=10, interactive=False)
|
| 460 |
-
|
| 461 |
-
run_btn.click(
|
| 462 |
-
fn=run_full_pipeline,
|
| 463 |
-
inputs=[input_image, bin_choice],
|
| 464 |
-
outputs=[bin_out, line_out, word_out, raw_out, corrected_out, compiled_out, exec_out, status_box],
|
| 465 |
-
api_name="predict"
|
| 466 |
-
)
|
| 467 |
-
|
| 468 |
-
gr.Examples(
|
| 469 |
-
examples=[["demo.png", "mit_b5 (HighRes)"]],
|
| 470 |
-
inputs=[input_image, bin_choice],
|
| 471 |
-
label="Example Images"
|
| 472 |
-
)
|
| 473 |
-
|
| 474 |
-
gr.Markdown(
|
| 475 |
-
"### Resources\n"
|
| 476 |
-
"| Component | Link |\n"
|
| 477 |
-
"|-----------|------|\n"
|
| 478 |
-
"| Binarization Demo | [WARAJA/Tzefa-Binarization](https://huggingface.co/spaces/WARAJA/Tzefa-Binarization) |\n"
|
| 479 |
-
"| b5 Model | [WARAJA/b5_model](https://huggingface.co/WARAJA/b5_model) |\n"
|
| 480 |
-
"| YOLO Model | [WARAJA/Tzefa-Line-Segmentation-YOLO](https://huggingface.co/WARAJA/Tzefa-Line-Segmentation-YOLO) |\n"
|
| 481 |
-
"| TrOCR Model | [WARAJA/Tzefa-Word-OCR-TrOCR](https://huggingface.co/WARAJA/Tzefa-Word-OCR-TrOCR) |\n"
|
| 482 |
-
"| Binarization Dataset | [WARAJA/Tzefa-Binarization-Dataset](https://huggingface.co/datasets/WARAJA/Tzefa-Binarization-Dataset) |\n"
|
| 483 |
-
"| Line Seg Dataset | [WARAJA/Tzefa-Line-Segmentation-Dataset](https://huggingface.co/datasets/WARAJA/Tzefa-Line-Segmentation-Dataset) |\n"
|
| 484 |
-
"| Word OCR Dataset | [WARAJA/Tzefa-Word-OCR-Dataset](https://huggingface.co/datasets/WARAJA/Tzefa-Word-OCR-Dataset) |"
|
| 485 |
-
)
|
| 486 |
-
|
| 487 |
-
if __name__ == "__main__":
|
| 488 |
-
demo.queue().launch()
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tzefa - Complete Pipeline Demo Space
|
| 3 |
+
Image → Binarization → Line Segmentation → Word Segmentation → OCR →
|
| 4 |
+
Error Correction → Compilation → Execution
|
| 5 |
+
|
| 6 |
+
Supports:
|
| 7 |
+
- Dialect toggle: 3-word (classic) / 4-word (verbose)
|
| 8 |
+
- Line segmentation toggle: YOLO (trained model) / Surya (general detector)
|
| 9 |
+
- Binarization model toggle: mit_b3 / mit_b5
|
| 10 |
+
"""
|
| 11 |
+
import os
|
| 12 |
+
import gc
|
| 13 |
+
import sys
|
| 14 |
+
import subprocess
|
| 15 |
+
import importlib
|
| 16 |
+
import traceback
|
| 17 |
+
|
| 18 |
+
import cv2
|
| 19 |
+
import torch
|
| 20 |
+
import numpy as np
|
| 21 |
+
from PIL import Image
|
| 22 |
+
import gradio as gr
|
| 23 |
+
from huggingface_hub import hf_hub_download
|
| 24 |
+
import segmentation_models_pytorch as smp
|
| 25 |
+
import torch.nn as nn
|
| 26 |
+
import torch.nn.functional as F
|
| 27 |
+
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
| 28 |
+
from ultralytics import YOLO
|
| 29 |
+
|
| 30 |
+
SPACE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 31 |
+
sys.path.insert(0, SPACE_DIR)
|
| 32 |
+
|
| 33 |
+
from language.dialects import THREE_WORD, FOUR_WORD, CAPS_ONLY, MIXED_CASE
|
| 34 |
+
from language.ErrorCorrection import TzefaParser
|
| 35 |
+
from language import topy
|
| 36 |
+
|
| 37 |
+
# ══════════════════════════════════════════════════════════════
|
| 38 |
+
# CONFIG
|
| 39 |
+
# ══════════════════════════════════════════════════════════════
|
| 40 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 41 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 42 |
+
|
| 43 |
+
BIN_B3_REPO = "WARAJA/Model"
|
| 44 |
+
BIN_B3_FILE = "b3_model.pth"
|
| 45 |
+
BIN_B5_REPO = "WARAJA/b5_model"
|
| 46 |
+
BIN_B5_FILE = "b5_model.pth"
|
| 47 |
+
YOLO_REPO = "WARAJA/Tzefa-Line-Segmentation-YOLO"
|
| 48 |
+
YOLO_FILE = "best.pt"
|
| 49 |
+
TROCR_REPO = "WARAJA/Tzefa-Word-OCR-TrOCR"
|
| 50 |
+
TROCR_BASE_PROC = "microsoft/trocr-small-stage1"
|
| 51 |
+
|
| 52 |
+
TILE_SIZE = 640
|
| 53 |
+
YOLO_IMGSZ = 640
|
| 54 |
+
MAX_DILATE_ITERS = 200
|
| 55 |
+
|
| 56 |
+
_DIALECT_MAP = {"4-word (verbose)": FOUR_WORD, "3-word (classic)": THREE_WORD}
|
| 57 |
+
_CASING_MAP = {"CAPS only": CAPS_ONLY, "Mixed case": MIXED_CASE}
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# ══════════════════════════════════════════════════════════════
|
| 61 |
+
# 1. BINARIZATION
|
| 62 |
+
# ══════════════════════════════════════════════════════════════
|
| 63 |
+
class HighResMAnet(nn.Module):
|
| 64 |
+
def __init__(self, encoder_name="mit_b5", classes=1):
|
| 65 |
+
super().__init__()
|
| 66 |
+
self.base_model = smp.MAnet(
|
| 67 |
+
encoder_name=encoder_name, encoder_weights=None,
|
| 68 |
+
in_channels=3, classes=classes, encoder_depth=5,
|
| 69 |
+
decoder_channels=(256, 128, 64, 32, 16),
|
| 70 |
+
)
|
| 71 |
+
self.high_res_stem = nn.Sequential(
|
| 72 |
+
nn.Conv2d(3, 16, 3, padding=1), nn.BatchNorm2d(16), nn.ReLU(True),
|
| 73 |
+
nn.Conv2d(16, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(True),
|
| 74 |
+
)
|
| 75 |
+
self.final_fusion = nn.Sequential(
|
| 76 |
+
nn.Conv2d(48, 16, 3, padding=1), nn.ReLU(True),
|
| 77 |
+
nn.Conv2d(16, classes, 1),
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
def forward(self, x):
|
| 81 |
+
hr = self.high_res_stem(x)
|
| 82 |
+
feat = self.base_model.encoder(x)
|
| 83 |
+
dec = self.base_model.decoder(feat)
|
| 84 |
+
return self.final_fusion(torch.cat([dec, hr], dim=1))
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def _load_bin_models():
|
| 88 |
+
models = {}
|
| 89 |
+
b3_path = hf_hub_download(BIN_B3_REPO, BIN_B3_FILE, token=HF_TOKEN, repo_type="space")
|
| 90 |
+
m3 = smp.Unet(encoder_name="mit_b3", encoder_weights=None, in_channels=3, classes=1)
|
| 91 |
+
ckpt3 = torch.load(b3_path, map_location=DEVICE)
|
| 92 |
+
m3.load_state_dict(ckpt3.get("model_state_dict", ckpt3))
|
| 93 |
+
models["mit_b3 (Standard)"] = m3.to(DEVICE).eval()
|
| 94 |
+
b5_path = hf_hub_download(BIN_B5_REPO, BIN_B5_FILE, token=HF_TOKEN, repo_type="model")
|
| 95 |
+
m5 = HighResMAnet(encoder_name="mit_b5")
|
| 96 |
+
ckpt5 = torch.load(b5_path, map_location=DEVICE)
|
| 97 |
+
m5.load_state_dict(ckpt5.get("model_state_dict", ckpt5))
|
| 98 |
+
models["mit_b5 (HighRes)"] = m5.to(DEVICE).eval()
|
| 99 |
+
return models
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def _preprocess_tile(pil_img):
|
| 103 |
+
arr = np.array(pil_img).astype(np.float32) / 255.0
|
| 104 |
+
mean = np.array([0.485, 0.456, 0.406])
|
| 105 |
+
std = np.array([0.229, 0.224, 0.225])
|
| 106 |
+
return torch.from_numpy(((arr - mean) / std).transpose(2, 0, 1))
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def binarize(pil_img, model):
|
| 110 |
+
orig_w, orig_h = pil_img.size
|
| 111 |
+
pad_w = (TILE_SIZE - orig_w % TILE_SIZE) % TILE_SIZE
|
| 112 |
+
pad_h = (TILE_SIZE - orig_h % TILE_SIZE) % TILE_SIZE
|
| 113 |
+
padded = Image.new("RGB", (orig_w + pad_w, orig_h + pad_h), (255, 255, 255))
|
| 114 |
+
padded.paste(pil_img, (0, 0))
|
| 115 |
+
nw, nh = padded.size
|
| 116 |
+
canvas = Image.new("L", (nw, nh), 255)
|
| 117 |
+
for y in range(0, nh, TILE_SIZE):
|
| 118 |
+
for x in range(0, nw, TILE_SIZE):
|
| 119 |
+
tile = padded.crop((x, y, x + TILE_SIZE, y + TILE_SIZE))
|
| 120 |
+
t = _preprocess_tile(tile).unsqueeze(0).to(DEVICE).float()
|
| 121 |
+
with torch.no_grad():
|
| 122 |
+
logits = model(t)
|
| 123 |
+
if logits.shape[-2:] != (TILE_SIZE, TILE_SIZE):
|
| 124 |
+
logits = F.interpolate(logits, (TILE_SIZE, TILE_SIZE), mode="bilinear")
|
| 125 |
+
mask = (torch.sigmoid(logits) > 0.5).float().cpu().numpy()[0, 0]
|
| 126 |
+
canvas.paste(Image.fromarray(((1.0 - mask) * 255).astype(np.uint8)), (x, y))
|
| 127 |
+
return canvas.crop((0, 0, orig_w, orig_h))
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
# ══════════════════════════════════════════════════════════════
|
| 131 |
+
# 2. LINE SEGMENTATION
|
| 132 |
+
# ══════════════════════════════════════════════════════════════
|
| 133 |
+
def _load_yolo():
|
| 134 |
+
path = hf_hub_download(YOLO_REPO, YOLO_FILE, token=HF_TOKEN, repo_type="model")
|
| 135 |
+
return YOLO(path)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def segment_lines_yolo(bin_arr, yolo_model):
|
| 139 |
+
img_rgb = cv2.cvtColor(bin_arr, cv2.COLOR_GRAY2RGB) if len(bin_arr.shape) == 2 else bin_arr
|
| 140 |
+
orig_h, orig_w = img_rgb.shape[:2]
|
| 141 |
+
results = yolo_model.predict(img_rgb, imgsz=YOLO_IMGSZ, conf=0.2, iou=0.2, verbose=False)
|
| 142 |
+
truelines = []
|
| 143 |
+
if len(results) > 0 and results[0].obb is not None:
|
| 144 |
+
obbs = sorted(results[0].obb.xyxyxyxy.cpu().numpy(), key=lambda p: np.min(p[:, 1]))
|
| 145 |
+
for pts in obbs:
|
| 146 |
+
rx0, rx1 = np.min(pts[:, 0]), np.max(pts[:, 0])
|
| 147 |
+
ry0, ry1 = np.min(pts[:, 1]), np.max(pts[:, 1])
|
| 148 |
+
pad = (rx1 - rx0) * 0.12
|
| 149 |
+
x0 = int(np.clip(rx0 - pad, 0, orig_w))
|
| 150 |
+
x1 = int(np.clip(rx1 + pad, 0, orig_w))
|
| 151 |
+
y0 = int(np.clip(ry0, 0, orig_h))
|
| 152 |
+
y1 = int(np.clip(ry1, 0, orig_h))
|
| 153 |
+
if x1 - x0 > 0 and y1 - y0 > 0:
|
| 154 |
+
truelines.append((x0, y0, x1 - x0, y1 - y0))
|
| 155 |
+
return truelines
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
_surya_predictor = None
|
| 159 |
+
|
| 160 |
+
def segment_lines_surya(bin_arr):
|
| 161 |
+
global _surya_predictor
|
| 162 |
+
os.environ.setdefault("DETECTOR_TEXT_THRESHOLD", "0.75")
|
| 163 |
+
os.environ.setdefault("DETECTOR_BLANK_THRESHOLD", "0.45")
|
| 164 |
+
try:
|
| 165 |
+
from surya.detection import DetectionPredictor
|
| 166 |
+
except ImportError:
|
| 167 |
+
raise RuntimeError("surya-ocr not installed. Add 'surya-ocr' to requirements.txt.")
|
| 168 |
+
if _surya_predictor is None:
|
| 169 |
+
_surya_predictor = DetectionPredictor()
|
| 170 |
+
img_rgb = cv2.cvtColor(bin_arr, cv2.COLOR_GRAY2RGB) if len(bin_arr.shape) == 2 else bin_arr
|
| 171 |
+
pil_image = Image.fromarray(img_rgb)
|
| 172 |
+
predictions = _surya_predictor([pil_image])
|
| 173 |
+
|
| 174 |
+
CONF_THRESHOLD = 0.6
|
| 175 |
+
raw = []
|
| 176 |
+
if predictions and predictions[0].bboxes:
|
| 177 |
+
for bbox in predictions[0].bboxes:
|
| 178 |
+
conf = getattr(bbox, "confidence", 1.0)
|
| 179 |
+
if conf < CONF_THRESHOLD:
|
| 180 |
+
continue
|
| 181 |
+
x1, y1, x2, y2 = bbox.bbox
|
| 182 |
+
if (x2 - x1) > 5 and (y2 - y1) > 5:
|
| 183 |
+
raw.append([float(x1), float(y1), float(x2), float(y2)])
|
| 184 |
+
|
| 185 |
+
raw.sort(key=lambda b: (b[1] + b[3]) / 2)
|
| 186 |
+
|
| 187 |
+
def overlaps_v(a, b):
|
| 188 |
+
return a[1] < b[3] and b[1] < a[3]
|
| 189 |
+
|
| 190 |
+
merged = []
|
| 191 |
+
for box in raw:
|
| 192 |
+
placed = False
|
| 193 |
+
for m in merged:
|
| 194 |
+
if overlaps_v(m, box):
|
| 195 |
+
m[0] = min(m[0], box[0]); m[1] = min(m[1], box[1])
|
| 196 |
+
m[2] = max(m[2], box[2]); m[3] = max(m[3], box[3])
|
| 197 |
+
placed = True; break
|
| 198 |
+
if not placed:
|
| 199 |
+
merged.append(list(box))
|
| 200 |
+
|
| 201 |
+
merged.sort(key=lambda b: b[1])
|
| 202 |
+
return [(int(b[0]), int(b[1]), int(b[2]-b[0]), int(b[3]-b[1])) for b in merged]
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
# ══════════════════════════════════════════════════════════════
|
| 206 |
+
# 3. WORD SEGMENTATION
|
| 207 |
+
# ══════════════════════════════════════════════════════════════
|
| 208 |
+
def _get_word_boxes(dilated, min_w, min_h):
|
| 209 |
+
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 210 |
+
return sorted(
|
| 211 |
+
[b for b in [cv2.boundingRect(c) for c in contours] if b[2] >= min_w and b[3] >= min_h],
|
| 212 |
+
key=lambda b: b[0],
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def segment_words(bin_arr, lines, target_words):
|
| 217 |
+
words_dict = {}
|
| 218 |
+
for i, (lx, ly, lw, lh) in enumerate(lines):
|
| 219 |
+
ih, iw = bin_arr.shape[:2]
|
| 220 |
+
ly, lx = max(0, ly), max(0, lx)
|
| 221 |
+
lh, lw = min(lh, ih - ly), min(lw, iw - lx)
|
| 222 |
+
if lw <= 0 or lh <= 0:
|
| 223 |
+
continue
|
| 224 |
+
crop = bin_arr[ly:ly+lh, lx:lx+lw]
|
| 225 |
+
inv = cv2.bitwise_not(crop)
|
| 226 |
+
min_ww = max(5, int(lw * 0.02))
|
| 227 |
+
min_wh = max(5, int(lh * 0.25))
|
| 228 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
|
| 229 |
+
dilated, prev, found = inv.copy(), None, False
|
| 230 |
+
for _ in range(MAX_DILATE_ITERS):
|
| 231 |
+
dilated = cv2.dilate(dilated, kernel, iterations=1)
|
| 232 |
+
boxes = _get_word_boxes(dilated, min_ww, min_wh)
|
| 233 |
+
if len(boxes) == target_words:
|
| 234 |
+
prev = boxes; found = True; break
|
| 235 |
+
elif len(boxes) < target_words:
|
| 236 |
+
break
|
| 237 |
+
else:
|
| 238 |
+
prev = boxes
|
| 239 |
+
if not found and prev and len(prev) > target_words:
|
| 240 |
+
while len(prev) > target_words:
|
| 241 |
+
gaps = [(prev[j+1][0] - (prev[j][0]+prev[j][2]), j) for j in range(len(prev)-1)]
|
| 242 |
+
_, mi = min(gaps)
|
| 243 |
+
b1, b2 = prev[mi], prev[mi+1]
|
| 244 |
+
merged = (
|
| 245 |
+
min(b1[0],b2[0]), min(b1[1],b2[1]),
|
| 246 |
+
max(b1[0]+b1[2],b2[0]+b2[2])-min(b1[0],b2[0]),
|
| 247 |
+
max(b1[1]+b1[3],b2[1]+b2[3])-min(b1[1],b2[1]),
|
| 248 |
+
)
|
| 249 |
+
prev = list(prev); prev[mi] = merged; prev.pop(mi+1)
|
| 250 |
+
found = True
|
| 251 |
+
if not found or not prev or len(prev) != target_words:
|
| 252 |
+
continue
|
| 253 |
+
words_dict[i+1] = {wi+1: (wx, wx+ww) for wi, (wx, wy, ww, wh) in enumerate(prev)}
|
| 254 |
+
return words_dict
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
# ══════════════════════════════════════════════════════════════
|
| 258 |
+
# 4. OCR
|
| 259 |
+
# ══════════════════════════════════════════════════════════════
|
| 260 |
+
def _load_trocr():
|
| 261 |
+
proc = TrOCRProcessor.from_pretrained(TROCR_BASE_PROC, use_fast=False)
|
| 262 |
+
model = VisionEncoderDecoderModel.from_pretrained(TROCR_REPO, token=HF_TOKEN).to(DEVICE).eval()
|
| 263 |
+
return proc, model
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
def _pad_aspect(img, max_ratio=4.0):
|
| 267 |
+
w, h = img.size
|
| 268 |
+
if w <= max_ratio * h:
|
| 269 |
+
return img
|
| 270 |
+
th = int(w / max_ratio)
|
| 271 |
+
pad = th - h
|
| 272 |
+
from PIL import ImageOps
|
| 273 |
+
return ImageOps.expand(img, (0, pad//2, 0, pad - pad//2), fill=(255, 255, 255))
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
def ocr_word(img_pil, proc, model):
|
| 277 |
+
if img_pil.mode != "RGB":
|
| 278 |
+
img_pil = img_pil.convert("RGB")
|
| 279 |
+
img_pil = _pad_aspect(img_pil)
|
| 280 |
+
pv = proc(img_pil, return_tensors="pt").pixel_values.to(DEVICE)
|
| 281 |
+
with torch.no_grad():
|
| 282 |
+
ids = model.generate(pv)
|
| 283 |
+
txt = proc.batch_decode(ids, skip_special_tokens=True)[0]
|
| 284 |
+
parts = txt.split()
|
| 285 |
+
return max(parts, key=len) if parts else txt
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
# ══════════════════════════════════════════════════════════════
|
| 289 |
+
# 5. VISUALISATION
|
| 290 |
+
# ══════════════════════════════════════════════════════════════
|
| 291 |
+
def draw_line_bboxes(img_arr, bboxes):
|
| 292 |
+
vis = cv2.cvtColor(img_arr, cv2.COLOR_GRAY2RGB) if len(img_arr.shape) == 2 else img_arr.copy()
|
| 293 |
+
for i, (x, y, w, h) in enumerate(bboxes):
|
| 294 |
+
cv2.rectangle(vis, (x, y), (x+w, y+h), (255, 50, 50), 2)
|
| 295 |
+
cv2.putText(vis, str(i+1), (x, max(y-5, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (50, 50, 255), 2)
|
| 296 |
+
return vis
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
def draw_word_bboxes(img_arr, word_tuples):
|
| 300 |
+
vis = cv2.cvtColor(img_arr, cv2.COLOR_GRAY2RGB) if len(img_arr.shape) == 2 else img_arr.copy()
|
| 301 |
+
colors = [(50, 220, 50), (50, 180, 255), (255, 180, 50), (220, 50, 220)]
|
| 302 |
+
for lt in word_tuples:
|
| 303 |
+
for wi, (text, (x1, y1, x2, y2)) in enumerate(lt):
|
| 304 |
+
c = colors[wi % len(colors)]
|
| 305 |
+
cv2.rectangle(vis, (x1, y1), (x2, y2), c, 2)
|
| 306 |
+
cv2.putText(vis, text, (x1, max(y1-4, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.45, c, 1)
|
| 307 |
+
return vis
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
# ══════════════════════════════════════════════════════════════
|
| 311 |
+
# 6. UTILITIES
|
| 312 |
+
# ══════════════════════════════════════════════════════════════
|
| 313 |
+
def clear_vram():
|
| 314 |
+
gc.collect()
|
| 315 |
+
if torch.cuda.is_available():
|
| 316 |
+
torch.cuda.empty_cache()
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
def execute_code(compiled_code):
|
| 320 |
+
try:
|
| 321 |
+
result = subprocess.run(
|
| 322 |
+
[sys.executable, "-c", compiled_code],
|
| 323 |
+
capture_output=True, text=True, timeout=15,
|
| 324 |
+
cwd=SPACE_DIR,
|
| 325 |
+
)
|
| 326 |
+
output = result.stdout
|
| 327 |
+
if result.stderr:
|
| 328 |
+
output += "\n--- STDERR ---\n" + result.stderr
|
| 329 |
+
if result.returncode != 0:
|
| 330 |
+
output += f"\n[Process exited with code {result.returncode}]"
|
| 331 |
+
return output.strip() if output.strip() else "(no output)"
|
| 332 |
+
except subprocess.TimeoutExpired:
|
| 333 |
+
return "[Execution timed out after 15 seconds]"
|
| 334 |
+
except Exception as e:
|
| 335 |
+
return f"[Execution error: {e}]"
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
# ══════════════════════════════════════════════════════════════
|
| 339 |
+
# 7. FULL PIPELINE
|
| 340 |
+
# ══════════════════════════════════════════════════════════════
|
| 341 |
+
def run_full_pipeline(input_image, bin_model_choice, dialect_choice, casing_choice, seg_method):
|
| 342 |
+
if input_image is None:
|
| 343 |
+
return None, None, None, "", "", "", "", "No image provided."
|
| 344 |
+
|
| 345 |
+
if isinstance(input_image, np.ndarray):
|
| 346 |
+
pil_img = Image.fromarray(input_image).convert("RGB")
|
| 347 |
+
else:
|
| 348 |
+
pil_img = input_image.convert("RGB")
|
| 349 |
+
|
| 350 |
+
dialect = _DIALECT_MAP.get(dialect_choice, FOUR_WORD)
|
| 351 |
+
casing = _CASING_MAP.get(casing_choice, CAPS_ONLY)
|
| 352 |
+
status = []
|
| 353 |
+
|
| 354 |
+
# Fresh language state for every run
|
| 355 |
+
importlib.reload(topy)
|
| 356 |
+
parser = TzefaParser(dialect=dialect, casing=casing)
|
| 357 |
+
target_words = parser.expected_words_per_line
|
| 358 |
+
|
| 359 |
+
# ── Stage 1: Binarization ──
|
| 360 |
+
try:
|
| 361 |
+
status.append("[1/6] Binarization...")
|
| 362 |
+
bin_models = _load_bin_models()
|
| 363 |
+
bin_pil = binarize(pil_img, bin_models[bin_model_choice])
|
| 364 |
+
bin_arr = np.array(bin_pil)
|
| 365 |
+
del bin_models; clear_vram()
|
| 366 |
+
status.append(" OK")
|
| 367 |
+
except Exception as e:
|
| 368 |
+
return None, None, None, "", "", "", "", f"Binarization failed: {e}"
|
| 369 |
+
|
| 370 |
+
# ── Stage 2: Line Segmentation ──
|
| 371 |
+
try:
|
| 372 |
+
status.append(f"[2/6] Line Segmentation ({seg_method})...")
|
| 373 |
+
if seg_method == "Surya":
|
| 374 |
+
truelines = segment_lines_surya(bin_arr)
|
| 375 |
+
else:
|
| 376 |
+
yolo_model = _load_yolo()
|
| 377 |
+
truelines = segment_lines_yolo(bin_arr, yolo_model)
|
| 378 |
+
del yolo_model; clear_vram()
|
| 379 |
+
status.append(f" OK {len(truelines)} lines")
|
| 380 |
+
line_vis = draw_line_bboxes(bin_arr, truelines)
|
| 381 |
+
except Exception as e:
|
| 382 |
+
return bin_arr, None, None, "", "", "", "", f"Line Seg failed: {e}\n{traceback.format_exc()}"
|
| 383 |
+
|
| 384 |
+
# ── Stage 3: Word Seg + OCR ──
|
| 385 |
+
try:
|
| 386 |
+
status.append("[3/6] Word Segmentation + OCR...")
|
| 387 |
+
words = segment_words(bin_arr, truelines, target_words)
|
| 388 |
+
proc, trocr_model = _load_trocr()
|
| 389 |
+
all_line_tuples, raw_lines = [], []
|
| 390 |
+
for ln in sorted(words.keys()):
|
| 391 |
+
if ln - 1 >= len(truelines):
|
| 392 |
+
continue
|
| 393 |
+
lx, ly, lw, lh = truelines[ln - 1]
|
| 394 |
+
line_tuples = []
|
| 395 |
+
for wn in sorted(words[ln].keys()):
|
| 396 |
+
wx1, wx2 = words[ln][wn]
|
| 397 |
+
ax1 = max(0, int(lx + wx1))
|
| 398 |
+
ax2 = min(bin_arr.shape[1], int(lx + wx2))
|
| 399 |
+
ay1 = max(0, ly - 20)
|
| 400 |
+
ay2 = min(bin_arr.shape[0], ly + lh + 20)
|
| 401 |
+
text = ocr_word(Image.fromarray(bin_arr[ay1:ay2, ax1:ax2]), proc, trocr_model)
|
| 402 |
+
line_tuples.append((text, (ax1, ay1, ax2, ay2)))
|
| 403 |
+
raw_lines.append(" ".join(t[0] for t in line_tuples))
|
| 404 |
+
all_line_tuples.append(line_tuples)
|
| 405 |
+
del proc, trocr_model; clear_vram()
|
| 406 |
+
word_vis = draw_word_bboxes(bin_arr, all_line_tuples)
|
| 407 |
+
raw_text = "\n".join(raw_lines)
|
| 408 |
+
status.append(f" OK {len(raw_lines)} lines recognised")
|
| 409 |
+
except Exception as e:
|
| 410 |
+
return bin_arr, line_vis, None, "", "", "", "", f"OCR failed: {e}\n{traceback.format_exc()}"
|
| 411 |
+
|
| 412 |
+
# ── Stage 4: Error Correction ──
|
| 413 |
+
try:
|
| 414 |
+
status.append("[4/6] Error Correction...")
|
| 415 |
+
parser.init_indent_table(len(truelines))
|
| 416 |
+
corrected_lines, bytecode_list = [], []
|
| 417 |
+
for line_entries in all_line_tuples:
|
| 418 |
+
if not line_entries:
|
| 419 |
+
corrected_lines.append("")
|
| 420 |
+
bytecode_list.append(["MAKE", "INTEGER", "TEMPORARY", "0"])
|
| 421 |
+
continue
|
| 422 |
+
raw_tokens = [t[0] for t in line_entries]
|
| 423 |
+
while len(raw_tokens) < target_words:
|
| 424 |
+
raw_tokens.append("")
|
| 425 |
+
raw_tokens = raw_tokens[:target_words]
|
| 426 |
+
normalised = parser.normalize_source_line(raw_tokens)
|
| 427 |
+
bytecode = parser.parse_line(normalised)
|
| 428 |
+
bytecode_list.append(bytecode)
|
| 429 |
+
corrected_lines.append(" ".join(bytecode)) # post-correction output
|
| 430 |
+
corrected_text = "\n".join(corrected_lines)
|
| 431 |
+
status.append(" OK")
|
| 432 |
+
except Exception as e:
|
| 433 |
+
return bin_arr, line_vis, word_vis, raw_text, "", "", "", \
|
| 434 |
+
f"Error Correction failed: {e}\n{traceback.format_exc()}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
language/ErrorCorrection.py
CHANGED
|
@@ -1,373 +1,483 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from language import Number2Name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from fast_edit_distance import edit_distance
|
| 3 |
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
["
|
| 85 |
-
["
|
| 86 |
-
|
| 87 |
-
[
|
| 88 |
-
|
| 89 |
-
["
|
| 90 |
-
["
|
| 91 |
-
["
|
| 92 |
-
["
|
| 93 |
-
["
|
| 94 |
-
["
|
| 95 |
-
["
|
| 96 |
-
|
| 97 |
-
[
|
| 98 |
-
["
|
| 99 |
-
["
|
| 100 |
-
|
| 101 |
-
[
|
| 102 |
-
["
|
| 103 |
-
["PRINTSTRING", "STR", "STATE"],
|
| 104 |
-
["PRINTINTEGER", "INT", "STATE"],
|
| 105 |
-
["SETINDEX", "LIST", "INT"],
|
| 106 |
-
["TYPETOINT", "STR", "INT"],
|
| 107 |
-
["GETSTRING", "LIST", "STR"],
|
| 108 |
-
["GETINTEGER", "LIST", "INT"],
|
| 109 |
-
["WRITEINTEGER", "LIST", "INT"],
|
| 110 |
-
["WRITESTRING", "LIST", "STR"],
|
| 111 |
-
["WRITEBOOL", "LIST", "BOOL"],
|
| 112 |
-
["WRITELIST", "LIST", "LIST"],
|
| 113 |
-
["GETLIST", "LIST", "LIST"],
|
| 114 |
-
["GETBOOL", "LIST", "BOOL"],
|
| 115 |
-
["GETTYPE", "LIST", "STR"],
|
| 116 |
-
["LENGTH", "LIST", "INT"],
|
| 117 |
-
["ADDVALUES", "INT", "INT"],
|
| 118 |
-
["MULTIPLY", "INT", "INT"],
|
| 119 |
-
["MATHPOW", "INT", "INT"],
|
| 120 |
-
["DIVIDE", "INT", "INT"],
|
| 121 |
-
["SIMPLEDIVIDE", "INT", "INT"],
|
| 122 |
-
["SUBTRACT", "INT", "INT"],
|
| 123 |
-
["MODULO", "INT", "INT"],
|
| 124 |
-
["COMBINE", "STR", "STR"],
|
| 125 |
-
["BLANKSPACES", "STR", "NUMNAME"],
|
| 126 |
-
["ADDSIZE", "LIST", "INT"],
|
| 127 |
-
["ASSSIGNINT", "INT", "INT"],
|
| 128 |
-
["STRINGASSIGN", "STR", "STR"],
|
| 129 |
-
["COPYLIST", "LIST", "LIST"],
|
| 130 |
-
]
|
| 131 |
-
listsimplefunc = [tosimple(i) for i in listfunctions]
|
| 132 |
-
listofindents = []
|
| 133 |
-
listezfunc = [i[0] for i in listfunctions]
|
| 134 |
-
|
| 135 |
-
# Variable Names only
|
| 136 |
-
listintegers = ["TEMPORARY", "LOCALINT", "LOOPINTEGER"]
|
| 137 |
-
|
| 138 |
-
# Immediate Number Names (Index 10)
|
| 139 |
-
listnumnames = []
|
| 140 |
-
word_to_num = {}
|
| 141 |
-
for i in range(101):
|
| 142 |
-
name = Number2Name.get_name(i)
|
| 143 |
-
listnumnames.append(name)
|
| 144 |
-
word_to_num[name] = str(i)
|
| 145 |
-
|
| 146 |
-
liststrings = ["TEMPSTRING", "GLOBALSTR", "LOOPSTRING", "INTEGER", "STRING", "LIST", "BOOLEAN"]
|
| 147 |
-
listlists = ["GLOBALLIST", "LOOPLIST"]
|
| 148 |
-
listconds = ["THETRUTH"]
|
| 149 |
-
listbools = ["LOOPBOOL"]
|
| 150 |
-
liststate = ["STAY", "BREAK"]
|
| 151 |
-
listype = ["INTEGER", "STRING", "LIST", "BOOLEAN"]
|
| 152 |
-
lookuptype = {"INTEGER": "INT", "STRING": "STR", "LIST": "LIST", "BOOLEAN": "BOOL"}
|
| 153 |
-
listtruth = ["TRUE", "FALSE"]
|
| 154 |
-
listcompare = ["EQUALS", "BIGEQUALS", "BIGGER"]
|
| 155 |
-
listtext = [] # Placeholder for raw text arguments (Index 11)
|
| 156 |
-
|
| 157 |
-
listall = [
|
| 158 |
-
listintegers,
|
| 159 |
-
liststrings,
|
| 160 |
-
listlists,
|
| 161 |
-
listbools,
|
| 162 |
-
listconds,
|
| 163 |
-
liststate,
|
| 164 |
-
listype,
|
| 165 |
-
listezfunc,
|
| 166 |
-
listtruth,
|
| 167 |
-
listcompare,
|
| 168 |
-
listnumnames, # Index 10: Immediates
|
| 169 |
-
listtext, # Index 11: Text
|
| 170 |
]
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
""
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
else:
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
)
|
| 233 |
-
return dp[m][n]
|
| 234 |
-
|
| 235 |
-
def findword(somelist, word, use_ocr_weights=False):
|
| 236 |
-
"""
|
| 237 |
-
Find the closest match to `word` in `somelist`.
|
| 238 |
-
|
| 239 |
-
use_ocr_weights=True → ocr_edit_distance (custom weighted, no cap)
|
| 240 |
-
used for function/command name lookups where OCR
|
| 241 |
-
can produce arbitrarily garbled prefixes/suffixes.
|
| 242 |
-
use_ocr_weights=False → standard edit_distance with a generous cap (32)
|
| 243 |
-
used for argument vocab lookups (short words, small lists).
|
| 244 |
-
"""
|
| 245 |
-
min_dist = 999
|
| 246 |
-
tobereturned = [word, 0]
|
| 247 |
-
lentobereturned = 16
|
| 248 |
-
for b in range(len(somelist)):
|
| 249 |
-
lenword = len(word)
|
| 250 |
-
i = somelist[b]
|
| 251 |
-
lenofi = len(i)
|
| 252 |
-
if i == word:
|
| 253 |
-
return [i, b]
|
| 254 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
if use_ocr_weights:
|
| 256 |
-
|
| 257 |
else:
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
func, index = findword(listezfunc, firstword, use_ocr_weights=True)
|
| 273 |
-
# Check if Arg2 (Index 2 in definition) is NUMNAME (Index 10 in listall)
|
| 274 |
-
# We use listfunctions directly to check the string type
|
| 275 |
-
if listfunctions[index][2] == "NUMNAME":
|
| 276 |
-
return (func, index, 1)
|
| 277 |
-
else:
|
| 278 |
-
return (func, index, 0)
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
def toline(line, index, listofindents):
|
| 282 |
-
global counter
|
| 283 |
-
global thetype
|
| 284 |
-
global insidefunction
|
| 285 |
-
disthreeline = line.split(" ")
|
| 286 |
-
threeline = ["", "", ""]
|
| 287 |
-
problem = False
|
| 288 |
-
threeline[0] = listezfunc[index]
|
| 289 |
-
simpler = listsimplefunc[index]
|
| 290 |
-
if threeline[0] == "STRINGFUNCTION":
|
| 291 |
-
if insidefunction:
|
| 292 |
-
problem = True
|
| 293 |
-
else:
|
| 294 |
-
insidefunction = True
|
| 295 |
-
threeline[1] = disthreeline[1]
|
| 296 |
-
threeline[2] = findword(listype, disthreeline[2])[0]
|
| 297 |
-
thetype.append(threeline[2])
|
| 298 |
-
newsomething = [threeline[1], "STR", lookuptype[threeline[2]]]
|
| 299 |
-
listezfunc.append(newsomething[0])
|
| 300 |
-
listfunctions.append(newsomething)
|
| 301 |
-
simplerer = tosimple(newsomething)
|
| 302 |
-
listsimplefunc.append(simplerer)
|
| 303 |
-
elif threeline[0] == "INTEGERFUNCTION":
|
| 304 |
-
if insidefunction:
|
| 305 |
-
problem = True
|
| 306 |
-
else:
|
| 307 |
-
insidefunction = True
|
| 308 |
-
threeline[1] = disthreeline[1]
|
| 309 |
-
threeline[2] = findword(listype, disthreeline[2])[0]
|
| 310 |
-
thetype.append(threeline[2])
|
| 311 |
-
newsomething = [threeline[1], "INT", lookuptype[threeline[2]]]
|
| 312 |
-
listezfunc.append(newsomething[0])
|
| 313 |
-
listfunctions.append(newsomething)
|
| 314 |
-
simplerer = tosimple(newsomething)
|
| 315 |
-
listsimplefunc.append(simplerer)
|
| 316 |
-
elif threeline[0] == "LISTFUNCTION":
|
| 317 |
-
if insidefunction:
|
| 318 |
-
problem = True
|
| 319 |
-
else:
|
| 320 |
-
insidefunction = True
|
| 321 |
-
threeline[1] = disthreeline[1]
|
| 322 |
-
threeline[2] = findword(listype, disthreeline[2])[0]
|
| 323 |
-
thetype.append(threeline[2])
|
| 324 |
-
newsomething = [threeline[1], "LIST", lookuptype[threeline[2]]]
|
| 325 |
-
listezfunc.append(newsomething[0])
|
| 326 |
-
listfunctions.append(newsomething)
|
| 327 |
-
simplerer = tosimple(newsomething)
|
| 328 |
-
listsimplefunc.append(simplerer)
|
| 329 |
-
elif simpler[0] == "RETURN":
|
| 330 |
-
if len(thetype) == 0:
|
| 331 |
-
problem = True
|
| 332 |
-
else:
|
| 333 |
-
threeline[0] = "RETURN"
|
| 334 |
-
threeline[1] = findword(listall[listype.index(thetype[-1])], disthreeline[1])[0]
|
| 335 |
-
threeline[2] = findword(liststate, disthreeline[2])[0]
|
| 336 |
-
if threeline[2] == "BREAK":
|
| 337 |
-
insidefunction = False
|
| 338 |
-
thetype.pop()
|
| 339 |
-
listofindents[counter] = -1
|
| 340 |
-
else:
|
| 341 |
-
# Arg 1
|
| 342 |
-
if simpler[1] == 0:
|
| 343 |
-
listall[simpler[2]].append(disthreeline[1])
|
| 344 |
-
threeline[1] = disthreeline[1]
|
| 345 |
-
else:
|
| 346 |
-
threeline[1] = findword(listall[simpler[2]], disthreeline[1])[0]
|
| 347 |
-
# If Arg 1 is a NUMNAME, replace with actual value
|
| 348 |
-
if simpler[2] == 10:
|
| 349 |
-
threeline[1] = word_to_num[threeline[1]]
|
| 350 |
-
|
| 351 |
-
# Arg 2
|
| 352 |
-
if simpler[3] < len(listall):
|
| 353 |
-
threeline[2] = findword(listall[simpler[3]], disthreeline[2])[0]
|
| 354 |
-
# If Arg 2 is a NUMNAME, replace with actual value
|
| 355 |
-
if simpler[3] == 10:
|
| 356 |
-
threeline[2] = word_to_num[threeline[2]]
|
| 357 |
-
else:
|
| 358 |
-
threeline[2] = disthreeline[2]
|
| 359 |
-
|
| 360 |
-
# Use the now-numeric value in threeline[2] for indents
|
| 361 |
-
# Only actual control flow: WHILE, ITERATE, COMPARE, ELSECOMPARE, WHILETRUE, IFTRUE, ELSEIF
|
| 362 |
-
# NOT BASICCONDITION, CHANGECOMPARE (these aren't control flow, their arg2 isn't a line number)
|
| 363 |
-
control_flow = {"WHILE", "ITERATE", "COMPARE", "ELSECOMPARE", "WHILETRUE", "IFTRUE", "ELSEIF"}
|
| 364 |
-
if threeline[0] in control_flow:
|
| 365 |
-
listofindents[counter] = 1
|
| 366 |
-
listofindents[int(threeline[2])] = -1
|
| 367 |
-
elif threeline[0] == "DEFINE":
|
| 368 |
-
listfunctions.append(threeline[0])
|
| 369 |
-
listofindents[int(counter)] = 1
|
| 370 |
-
counter += 1
|
| 371 |
-
if len(threeline[1]) == 0 or len(threeline[2]) == 0:
|
| 372 |
-
problem = True
|
| 373 |
-
return threeline
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ErrorCorrection.py – Tzefa source-text parser and error-correcting compiler front-end.
|
| 3 |
+
|
| 4 |
+
TzefaParser converts raw text lines (e.g. from OCR) into validated 4-word
|
| 5 |
+
bytecode tuples consumed by topy.make_instruction().
|
| 6 |
+
"""
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from typing import Any, Dict, List, Tuple
|
| 10 |
+
|
| 11 |
from language import Number2Name
|
| 12 |
+
from language.dialects import (
|
| 13 |
+
THREE_WORD, CAPS_ONLY,
|
| 14 |
+
normalize_line, words_per_line, ALU_VERBS,
|
| 15 |
+
)
|
| 16 |
+
from language import topy
|
| 17 |
from fast_edit_distance import edit_distance
|
| 18 |
|
| 19 |
|
| 20 |
+
# ---------------------------------------------------------------------------
|
| 21 |
+
# Instruction definitions — now in 4-word form
|
| 22 |
+
# ---------------------------------------------------------------------------
|
| 23 |
+
# Each entry: [VERB, TYPE, ARG1_KIND, ARG2_KIND]
|
| 24 |
+
#
|
| 25 |
+
# ARG_KIND values:
|
| 26 |
+
# "NEWINT" – declares a new integer name
|
| 27 |
+
# "NEWSTR" – declares a new string name
|
| 28 |
+
# "NEWBOOL" – declares a new boolean name
|
| 29 |
+
# "NEWLIST" – declares a new list name
|
| 30 |
+
# "NEWCOND" – declares a new condition name
|
| 31 |
+
# "NEWFUNC" – declares a new function name
|
| 32 |
+
# "INT" – existing integer var
|
| 33 |
+
# "STR" – existing string var
|
| 34 |
+
# "LIST" – existing list var
|
| 35 |
+
# "BOOL" – existing boolean var
|
| 36 |
+
# "COND" – existing condition
|
| 37 |
+
# "STATE" – STAY / BREAK
|
| 38 |
+
# "TYPE" – INTEGER / STRING / LIST / BOOLEAN
|
| 39 |
+
# "TRUTH" – TRUE / FALSE
|
| 40 |
+
# "COMPARE" – EQUALS / BIGEQUALS / BIGGER
|
| 41 |
+
# "NUMNAME" – numeric name (ZERO … ONEHUNDRED)
|
| 42 |
+
# "TEXT" – free text (no correction)
|
| 43 |
+
# "VALUE" – context-dependent (return var, resolved at parse time)
|
| 44 |
+
|
| 45 |
+
_BUILTIN_INSTRUCTIONS: List[List[str]] = [
|
| 46 |
+
# Variable declarations
|
| 47 |
+
["MAKE", "INTEGER", "NEWINT", "NUMNAME"],
|
| 48 |
+
["MAKE", "BOOLEAN", "NEWBOOL", "TRUTH"],
|
| 49 |
+
["MAKE", "STRING", "NEWSTR", "TEXT"],
|
| 50 |
+
["MAKE", "LIST", "NEWLIST", "NUMNAME"],
|
| 51 |
+
["MAKE", "CONDITION", "NEWCOND", "COMPARE"],
|
| 52 |
+
|
| 53 |
+
# Condition manipulation
|
| 54 |
+
["SET", "LEFT", "COND", "INT"],
|
| 55 |
+
["SET", "RIGHT", "COND", "INT"],
|
| 56 |
+
["CHANGE", "COMPARE", "COND", "COMPARE"],
|
| 57 |
+
|
| 58 |
+
# Control flow
|
| 59 |
+
["WHILE", "CONDITION", "COND", "NUMNAME"],
|
| 60 |
+
["IF", "CONDITION", "COND", "NUMNAME"],
|
| 61 |
+
["ELIF", "CONDITION", "COND", "NUMNAME"],
|
| 62 |
+
["ITERATE", "LIST", "LIST", "NUMNAME"],
|
| 63 |
+
["WHILE", "BOOLEAN", "BOOL", "NUMNAME"],
|
| 64 |
+
["IF", "BOOLEAN", "BOOL", "NUMNAME"],
|
| 65 |
+
["ELIF", "BOOLEAN", "BOOL", "NUMNAME"],
|
| 66 |
+
|
| 67 |
+
# Function definition
|
| 68 |
+
["FUNCTION", "INTEGER", "NEWFUNC", "TYPE"],
|
| 69 |
+
["FUNCTION", "STRING", "NEWFUNC", "TYPE"],
|
| 70 |
+
["FUNCTION", "LIST", "NEWFUNC", "TYPE"],
|
| 71 |
+
|
| 72 |
+
# Return
|
| 73 |
+
["RETURN", "VALUE", "VALUE", "STATE"],
|
| 74 |
+
|
| 75 |
+
# Print
|
| 76 |
+
["PRINT", "STRING", "STR", "STATE"],
|
| 77 |
+
["PRINT", "INTEGER", "INT", "STATE"],
|
| 78 |
+
|
| 79 |
+
# Assignment / copy
|
| 80 |
+
["SET", "INTEGER", "INT", "INT"],
|
| 81 |
+
["SET", "STRING", "STR", "STR"],
|
| 82 |
+
["SET", "LIST", "LIST", "LIST"],
|
| 83 |
+
["SET", "INDEX", "LIST", "NUMNAME"],
|
| 84 |
+
|
| 85 |
+
# Type introspection
|
| 86 |
+
["TYPE", "TOINT", "STR", "INT"],
|
| 87 |
+
|
| 88 |
+
# List read
|
| 89 |
+
["GET", "STRING", "LIST", "STR"],
|
| 90 |
+
["GET", "INTEGER", "LIST", "INT"],
|
| 91 |
+
["GET", "BOOLEAN", "LIST", "BOOL"],
|
| 92 |
+
["GET", "LIST", "LIST", "LIST"],
|
| 93 |
+
["GET", "TYPE", "LIST", "STR"],
|
| 94 |
+
["GET", "LENGTH", "LIST", "INT"],
|
| 95 |
+
|
| 96 |
+
# List write
|
| 97 |
+
["WRITE", "INTEGER", "LIST", "INT"],
|
| 98 |
+
["WRITE", "STRING", "LIST", "STR"],
|
| 99 |
+
["WRITE", "BOOLEAN", "LIST", "BOOL"],
|
| 100 |
+
["WRITE", "LIST", "LIST", "LIST"],
|
| 101 |
+
|
| 102 |
+
# Arithmetic — layout: [VERB, DEST, SRC1, SRC2]
|
| 103 |
+
# arg1_kind=INT is the dest (existing or new), arg2/3 are sources
|
| 104 |
+
["ADD", "INT", "INT", "INT"],
|
| 105 |
+
["MULTIPLY", "INT", "INT", "INT"],
|
| 106 |
+
["POWER", "INT", "INT", "INT"],
|
| 107 |
+
["DIVIDE", "INT", "INT", "INT"],
|
| 108 |
+
["SIMPLEDIVIDE","INT", "INT", "INT"],
|
| 109 |
+
["SUBTRACT", "INT", "INT", "INT"],
|
| 110 |
+
["MODULO", "INT", "INT", "INT"],
|
| 111 |
+
|
| 112 |
+
# String ops — COMBINE layout: [COMBINE, DEST, SRC1, SRC2]
|
| 113 |
+
["COMBINE", "STR", "STR", "STR"],
|
| 114 |
+
["PAD", "STRING", "STR", "NUMNAME"],
|
| 115 |
+
|
| 116 |
+
# List resize — ADD SIZE layout: [ADD, SIZE, listname, int_amount]
|
| 117 |
+
["ADD", "SIZE", "LIST", "INT"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
]
|
| 119 |
+
|
| 120 |
+
# Which kinds declare new names (start with "NEW")
|
| 121 |
+
_NEW_KINDS = {"NEWINT", "NEWSTR", "NEWBOOL", "NEWLIST", "NEWCOND", "NEWFUNC"}
|
| 122 |
+
|
| 123 |
+
# Kind → bucket index in the all_names list
|
| 124 |
+
_KIND_TO_BUCKET: Dict[str, int] = {
|
| 125 |
+
"INT": 0, "NEWINT": 0,
|
| 126 |
+
"STR": 1, "NEWSTR": 1,
|
| 127 |
+
"LIST": 2, "NEWLIST": 2,
|
| 128 |
+
"BOOL": 3, "NEWBOOL": 3,
|
| 129 |
+
"COND": 4, "NEWCOND": 4,
|
| 130 |
+
"STATE": 5,
|
| 131 |
+
"TYPE": 6,
|
| 132 |
+
"NEWFUNC": 7,
|
| 133 |
+
"TRUTH": 8,
|
| 134 |
+
"COMPARE": 9,
|
| 135 |
+
"NUMNAME": 10,
|
| 136 |
+
"TEXT": 11,
|
| 137 |
+
"VALUE": -1, # resolved dynamically
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
# The lookup type for function return types
|
| 141 |
+
_FUNC_TYPE_MAP: Dict[str, str] = {
|
| 142 |
+
"INTEGER": "INT", "STRING": "STR", "LIST": "LIST", "BOOLEAN": "BOOL",
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
class TzefaParser:
|
| 147 |
+
"""Parse and error-correct Tzefa source lines into 4-word bytecode."""
|
| 148 |
+
|
| 149 |
+
def __init__(
|
| 150 |
+
self,
|
| 151 |
+
dialect: str = THREE_WORD,
|
| 152 |
+
casing: str = CAPS_ONLY,
|
| 153 |
+
) -> None:
|
| 154 |
+
self.dialect = dialect
|
| 155 |
+
self.casing = casing
|
| 156 |
+
|
| 157 |
+
# Build instruction table from the static definitions
|
| 158 |
+
self.instructions: List[List[str]] = [row[:] for row in _BUILTIN_INSTRUCTIONS]
|
| 159 |
+
|
| 160 |
+
# Opcode keys: (VERB, TYPE) tuples for lookup
|
| 161 |
+
self.opcode_keys: List[Tuple[str, str]] = [(r[0], r[1]) for r in self.instructions]
|
| 162 |
+
|
| 163 |
+
# Name buckets for fuzzy-matching (index-aligned with _KIND_TO_BUCKET)
|
| 164 |
+
self.all_names: List[List[str]] = [
|
| 165 |
+
# 0: INT names
|
| 166 |
+
["TEMPORARY", "LOCALINT", "LOOPINTEGER"],
|
| 167 |
+
# 1: STR names
|
| 168 |
+
["TEMPSTRING", "GLOBALSTR", "LOOPSTRING",
|
| 169 |
+
"INTEGER", "STRING", "LIST", "BOOLEAN"],
|
| 170 |
+
# 2: LIST names
|
| 171 |
+
["GLOBALLIST", "LOOPLIST"],
|
| 172 |
+
# 3: BOOL names
|
| 173 |
+
["LOOPBOOL"],
|
| 174 |
+
# 4: COND names
|
| 175 |
+
["THETRUTH"],
|
| 176 |
+
# 5: STATE
|
| 177 |
+
["STAY", "BREAK"],
|
| 178 |
+
# 6: TYPE
|
| 179 |
+
["INTEGER", "STRING", "LIST", "BOOLEAN"],
|
| 180 |
+
# 7: opcode verbs (populated below)
|
| 181 |
+
[],
|
| 182 |
+
# 8: TRUTH
|
| 183 |
+
["TRUE", "FALSE"],
|
| 184 |
+
# 9: COMPARE
|
| 185 |
+
["EQUALS", "BIGEQUALS", "BIGGER"],
|
| 186 |
+
# 10: NUMNAME
|
| 187 |
+
[],
|
| 188 |
+
# 11: TEXT (free, no correction)
|
| 189 |
+
[],
|
| 190 |
+
]
|
| 191 |
+
|
| 192 |
+
# Populate bucket 7 (opcode verbs) from instruction table
|
| 193 |
+
seen_verbs: set = set()
|
| 194 |
+
for row in self.instructions:
|
| 195 |
+
key = (row[0], row[1])
|
| 196 |
+
label = f"{row[0]}_{row[1]}"
|
| 197 |
+
if label not in seen_verbs:
|
| 198 |
+
seen_verbs.add(label)
|
| 199 |
+
self.all_names[7].append(label)
|
| 200 |
+
|
| 201 |
+
# Numeric name immediates
|
| 202 |
+
self.word_to_num: Dict[str, str] = {}
|
| 203 |
+
for i in range(101):
|
| 204 |
+
name = Number2Name.get_name(i)
|
| 205 |
+
self.all_names[10].append(name)
|
| 206 |
+
self.word_to_num[name] = str(i)
|
| 207 |
+
|
| 208 |
+
# Build verb→[valid types] lookup for sequential word matching
|
| 209 |
+
self._verb_to_types: Dict[str, List[str]] = {}
|
| 210 |
+
for row in self.instructions:
|
| 211 |
+
v, t = row[0], row[1]
|
| 212 |
+
if v not in self._verb_to_types:
|
| 213 |
+
self._verb_to_types[v] = []
|
| 214 |
+
if t not in self._verb_to_types[v]:
|
| 215 |
+
self._verb_to_types[v].append(t)
|
| 216 |
+
|
| 217 |
+
# Deduplicated verb list (order preserved, for fuzzy matching)
|
| 218 |
+
# Always include CALL even before functions are registered
|
| 219 |
+
self._verb_list: List[str] = ["CALL"]
|
| 220 |
+
for row in self.instructions:
|
| 221 |
+
if row[0] not in self._verb_list:
|
| 222 |
+
self._verb_list.append(row[0])
|
| 223 |
+
|
| 224 |
+
# Indent tracking
|
| 225 |
+
self.indent_table: List[int] = []
|
| 226 |
+
|
| 227 |
+
# Function definition state
|
| 228 |
+
self.function_type_stack: List[str] = []
|
| 229 |
+
self.inside_function: bool = False
|
| 230 |
+
self.line_counter: int = 0
|
| 231 |
+
|
| 232 |
+
# ------------------------------------------------------------------
|
| 233 |
+
# Public interface
|
| 234 |
+
# ------------------------------------------------------------------
|
| 235 |
+
|
| 236 |
+
@property
|
| 237 |
+
def expected_words_per_line(self) -> int:
|
| 238 |
+
return words_per_line(self.dialect)
|
| 239 |
+
|
| 240 |
+
def normalize_source_line(self, raw_tokens: List[str]) -> List[str]:
|
| 241 |
+
"""Normalize raw tokens into a canonical 4-word CAPS tuple."""
|
| 242 |
+
return normalize_line(raw_tokens, self.dialect, self.casing)
|
| 243 |
+
|
| 244 |
+
def init_indent_table(self, line_count: int) -> None:
|
| 245 |
+
"""Allocate the indent-change table for *line_count* lines."""
|
| 246 |
+
self.indent_table = [0] * max(line_count + 2, 1002)
|
| 247 |
+
|
| 248 |
+
def get_indent_table(self) -> List[int]:
|
| 249 |
+
return self.indent_table
|
| 250 |
+
|
| 251 |
+
def match_opcode(self, verb: str, type_word: str) -> Tuple[int, List[str]]:
|
| 252 |
+
"""Exact lookup of (verb, type_word) → instruction row."""
|
| 253 |
+
key = (verb, type_word)
|
| 254 |
+
for i, k in enumerate(self.opcode_keys):
|
| 255 |
+
if k == key:
|
| 256 |
+
return i, self.instructions[i]
|
| 257 |
+
return 0, self.instructions[0]
|
| 258 |
+
|
| 259 |
+
def parse_line(self, quad: List[str]) -> List[str]:
|
| 260 |
+
"""
|
| 261 |
+
Sequential error-correction:
|
| 262 |
+
W1 → fuzzy match against verb list
|
| 263 |
+
W2 → fuzzy match against valid types for that verb
|
| 264 |
+
(ALU: dest var auto-registered; CALL: known function names)
|
| 265 |
+
W3,W4 → resolved by the spec (arg1_kind, arg2_kind)
|
| 266 |
+
"""
|
| 267 |
+
while len(quad) < 4:
|
| 268 |
+
quad.append("")
|
| 269 |
+
|
| 270 |
+
# ── W1: verb ─────────────────────────────────────────────────────────
|
| 271 |
+
verb = self.find_word(self._verb_list, quad[0], use_ocr_weights=True)[0]
|
| 272 |
+
|
| 273 |
+
# ── ALU fast path (W2 = dest var, W3/W4 = sources) ──────────────────
|
| 274 |
+
if verb in ALU_VERBS:
|
| 275 |
+
# ADD SIZE is the non-ALU outlier — treat normally
|
| 276 |
+
if verb == "ADD":
|
| 277 |
+
size_types = self._verb_to_types.get("ADD", [])
|
| 278 |
+
w2 = self.find_word(size_types, quad[1], use_ocr_weights=True)[0]
|
| 279 |
+
if w2 == "SIZE":
|
| 280 |
+
# fall through to standard path
|
| 281 |
+
type_word = "SIZE"
|
| 282 |
+
verb = "ADD"
|
| 283 |
+
_, spec = self.match_opcode(verb, type_word)
|
| 284 |
+
result = [verb, type_word,
|
| 285 |
+
self._resolve_arg(spec[2], quad[2]),
|
| 286 |
+
self._resolve_arg(spec[3], quad[3])]
|
| 287 |
+
self.line_counter += 1
|
| 288 |
+
return result
|
| 289 |
+
if verb == "COMBINE":
|
| 290 |
+
dest = self._resolve_arg("STR", quad[1])
|
| 291 |
+
src1 = self._resolve_arg("STR", quad[2])
|
| 292 |
+
src2 = self._resolve_arg("STR", quad[3])
|
| 293 |
+
else:
|
| 294 |
+
dest = self._resolve_arg("INT", quad[1])
|
| 295 |
+
src1 = self._resolve_arg("INT", quad[2])
|
| 296 |
+
src2 = self._resolve_arg("INT", quad[3])
|
| 297 |
+
self.line_counter += 1
|
| 298 |
+
return [verb, dest, src1, src2]
|
| 299 |
+
|
| 300 |
+
# ── CALL (W2 = function name, W3 = input var, W4 = output var) ───────
|
| 301 |
+
if verb == "CALL":
|
| 302 |
+
known_funcs = [k[1] for k in self.opcode_keys if k[0] == "CALL"]
|
| 303 |
+
func_name = self.find_word(known_funcs, quad[1], use_ocr_weights=True)[0] if known_funcs else quad[1]
|
| 304 |
+
func_spec = next((r for r in self.instructions if r[0] == "CALL" and r[1] == func_name), None)
|
| 305 |
+
arg1 = self._resolve_arg(func_spec[2] if func_spec else "INT", quad[2])
|
| 306 |
+
arg2 = self._resolve_arg("INT", quad[3])
|
| 307 |
+
self.line_counter += 1
|
| 308 |
+
return ["CALL", func_name, arg1, arg2]
|
| 309 |
+
|
| 310 |
+
# ── W2: type keyword for this verb ───────────────────────────────────
|
| 311 |
+
valid_types = self._verb_to_types.get(verb, [])
|
| 312 |
+
type_word = self.find_word(valid_types, quad[1], use_ocr_weights=True)[0] if valid_types else quad[1]
|
| 313 |
+
|
| 314 |
+
# ── Look up full spec ─────────────────────────────────────────────────
|
| 315 |
+
_, spec = self.match_opcode(verb, type_word)
|
| 316 |
+
arg1_kind, arg2_kind = spec[2], spec[3]
|
| 317 |
+
|
| 318 |
+
result = [verb, type_word, "", ""]
|
| 319 |
+
|
| 320 |
+
# ── FUNCTION ─────────────────────────────────────────────────────────
|
| 321 |
+
if verb == "FUNCTION":
|
| 322 |
+
if not self.inside_function:
|
| 323 |
+
self.inside_function = True
|
| 324 |
+
func_name = quad[2] # new name, register as-is
|
| 325 |
+
param_type = self.find_word(self.all_names[6], quad[3], use_ocr_weights=True)[0]
|
| 326 |
+
result[2] = func_name
|
| 327 |
+
result[3] = param_type
|
| 328 |
+
self.function_type_stack.append(type_word)
|
| 329 |
+
topy.register_user_function(
|
| 330 |
+
func_name,
|
| 331 |
+
_FUNC_TYPE_MAP.get(type_word, "INT"),
|
| 332 |
+
_FUNC_TYPE_MAP.get(param_type, "INT"),
|
| 333 |
+
)
|
| 334 |
+
self.opcode_keys.append(("CALL", func_name))
|
| 335 |
+
self.instructions.append(["CALL", func_name, "INT", "INT"])
|
| 336 |
+
if "CALL" not in self._verb_to_types:
|
| 337 |
+
self._verb_to_types["CALL"] = []
|
| 338 |
+
if func_name not in self._verb_to_types["CALL"]:
|
| 339 |
+
self._verb_to_types["CALL"].append(func_name)
|
| 340 |
+
label = f"CALL_{func_name}"
|
| 341 |
+
if label not in self.all_names[7]:
|
| 342 |
+
self.all_names[7].append(label)
|
| 343 |
+
|
| 344 |
+
# ── RETURN ────────────────────────────────────────────────────────────
|
| 345 |
+
elif verb == "RETURN":
|
| 346 |
+
if self.function_type_stack:
|
| 347 |
+
ret_kind = _FUNC_TYPE_MAP.get(self.function_type_stack[-1], "INT")
|
| 348 |
+
bucket = _KIND_TO_BUCKET.get(ret_kind, 0)
|
| 349 |
+
result[2] = self.find_word(self.all_names[bucket], quad[2], use_ocr_weights=True)[0]
|
| 350 |
else:
|
| 351 |
+
result[2] = quad[2]
|
| 352 |
+
result[3] = self.find_word(self.all_names[5], quad[3], use_ocr_weights=True)[0]
|
| 353 |
+
if result[3] == "BREAK" and self.function_type_stack:
|
| 354 |
+
self.inside_function = False
|
| 355 |
+
self.function_type_stack.pop()
|
| 356 |
+
self.indent_table[self.line_counter] = -1
|
| 357 |
+
|
| 358 |
+
# ── Everything else ───────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
else:
|
| 360 |
+
result[2] = self._resolve_arg(arg1_kind, quad[2])
|
| 361 |
+
result[3] = self._resolve_arg(arg2_kind, quad[3])
|
| 362 |
+
|
| 363 |
+
# Control-flow indent tracking
|
| 364 |
+
if verb in {"WHILE", "IF", "ELIF", "ITERATE"}:
|
| 365 |
+
self.indent_table[self.line_counter] = 1
|
| 366 |
+
try:
|
| 367 |
+
self.indent_table[int(result[3])] = -1
|
| 368 |
+
except (ValueError, IndexError):
|
| 369 |
+
pass
|
| 370 |
+
|
| 371 |
+
self.line_counter += 1
|
| 372 |
+
return result
|
| 373 |
+
|
| 374 |
+
# ------------------------------------------------------------------
|
| 375 |
+
# Argument resolution
|
| 376 |
+
# ------------------------------------------------------------------
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
def _resolve_arg(self, kind: str, raw: str) -> str:
|
| 380 |
+
"""Resolve a single argument against its kind's name bucket via fuzzy-match."""
|
| 381 |
+
if not kind or kind == "VALUE":
|
| 382 |
+
return raw
|
| 383 |
+
|
| 384 |
+
bucket_idx = _KIND_TO_BUCKET.get(kind, -1)
|
| 385 |
+
if bucket_idx < 0 or bucket_idx >= len(self.all_names):
|
| 386 |
+
return raw
|
| 387 |
+
|
| 388 |
+
# New-name kinds: register as-is, no correction
|
| 389 |
+
if kind in _NEW_KINDS:
|
| 390 |
+
if raw and raw not in self.all_names[bucket_idx]:
|
| 391 |
+
self.all_names[bucket_idx].append(raw)
|
| 392 |
+
return raw
|
| 393 |
+
|
| 394 |
+
# NUMNAME: digit strings pass through, words get fuzzy-matched then converted
|
| 395 |
+
if kind == "NUMNAME" and raw.isdigit():
|
| 396 |
+
return raw
|
| 397 |
+
|
| 398 |
+
# Fuzzy-match against the bucket — always, no exceptions
|
| 399 |
+
matched, _ = self.find_word(self.all_names[bucket_idx], raw, use_ocr_weights=True)
|
| 400 |
+
|
| 401 |
+
if kind == "NUMNAME":
|
| 402 |
+
matched = self.word_to_num.get(matched, matched)
|
| 403 |
+
|
| 404 |
+
return matched
|
| 405 |
+
|
| 406 |
+
# ------------------------------------------------------------------
|
| 407 |
+
# Edit distance helpers
|
| 408 |
+
# ------------------------------------------------------------------
|
| 409 |
+
|
| 410 |
+
@staticmethod
|
| 411 |
+
def ocr_edit_distance(word1: str, word2: str) -> float:
|
| 412 |
+
"""Levenshtein distance with reduced cost for common OCR confusions."""
|
| 413 |
+
word1, word2 = word1.upper(), word2.upper()
|
| 414 |
+
|
| 415 |
+
_LOW_COST: Dict[Tuple[str, str], float] = {
|
| 416 |
+
('O', '0'): 0.5, ('0', 'O'): 0.5,
|
| 417 |
+
('I', '1'): 0.5, ('1', 'I'): 0.5,
|
| 418 |
+
('I', 'L'): 0.5, ('L', 'I'): 0.5,
|
| 419 |
+
('S', '5'): 0.5, ('5', 'S'): 0.5,
|
| 420 |
+
('Z', '2'): 0.5, ('2', 'Z'): 0.5,
|
| 421 |
+
('C', 'O'): 0.5, ('O', 'C'): 0.5,
|
| 422 |
+
('C', 'G'): 0.5, ('G', 'C'): 0.5,
|
| 423 |
+
('B', '8'): 0.5, ('8', 'B'): 0.5,
|
| 424 |
+
('D', 'O'): 0.5, ('O', 'D'): 0.5,
|
| 425 |
+
('E', 'F'): 0.5, ('F', 'E'): 0.5,
|
| 426 |
+
('A', '4'): 0.5, ('4', 'A'): 0.5,
|
| 427 |
+
}
|
| 428 |
+
|
| 429 |
+
m, n = len(word1), len(word2)
|
| 430 |
+
dp = [[0.0] * (n + 1) for _ in range(m + 1)]
|
| 431 |
+
for i in range(m + 1):
|
| 432 |
+
dp[i][0] = float(i)
|
| 433 |
+
for j in range(n + 1):
|
| 434 |
+
dp[0][j] = float(j)
|
| 435 |
+
for i in range(1, m + 1):
|
| 436 |
+
for j in range(1, n + 1):
|
| 437 |
+
if word1[i - 1] == word2[j - 1]:
|
| 438 |
+
cost = 0.0
|
| 439 |
+
else:
|
| 440 |
+
cost = _LOW_COST.get((word1[i - 1], word2[j - 1]), 2.0)
|
| 441 |
+
dp[i][j] = min(
|
| 442 |
+
dp[i - 1][j] + 1.0,
|
| 443 |
+
dp[i][j - 1] + 1.0,
|
| 444 |
+
dp[i - 1][j - 1] + cost,
|
| 445 |
+
)
|
| 446 |
+
return dp[m][n]
|
| 447 |
+
|
| 448 |
+
@staticmethod
|
| 449 |
+
def find_word(
|
| 450 |
+
name_list: List[str],
|
| 451 |
+
word: str,
|
| 452 |
+
use_ocr_weights: bool = False,
|
| 453 |
+
) -> Tuple[str, int]:
|
| 454 |
+
"""Return the closest match to *word* in *name_list* and its index."""
|
| 455 |
+
if not name_list:
|
| 456 |
+
return word, 0
|
| 457 |
+
|
| 458 |
+
min_dist = 999.0
|
| 459 |
+
best: List[Any] = [word, 0]
|
| 460 |
+
best_len = 16
|
| 461 |
+
word_len = len(word)
|
| 462 |
+
|
| 463 |
+
for idx, item in enumerate(name_list):
|
| 464 |
+
if item == word:
|
| 465 |
+
return item, idx
|
| 466 |
+
|
| 467 |
if use_ocr_weights:
|
| 468 |
+
dist = TzefaParser.ocr_edit_distance(word, item)
|
| 469 |
else:
|
| 470 |
+
dist = float(edit_distance(word, item, 32))
|
| 471 |
+
|
| 472 |
+
item_len = len(item)
|
| 473 |
+
if dist < min_dist:
|
| 474 |
+
min_dist = dist
|
| 475 |
+
best = [item, idx]
|
| 476 |
+
best_len = item_len
|
| 477 |
+
elif dist == min_dist:
|
| 478 |
+
if abs(word_len - item_len) < abs(word_len - best_len):
|
| 479 |
+
best = [item, idx]
|
| 480 |
+
best_len = item_len
|
| 481 |
+
|
| 482 |
+
return tuple(best)
|
| 483 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
language/Number2Name.py
CHANGED
|
@@ -1,15 +1,24 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# python
|
| 2 |
+
from typing import Tuple
|
| 3 |
+
|
| 4 |
+
NUMBER_NAMES: Tuple[str, ...] = (
|
| 5 |
+
'ZERO', 'ONE', 'TWO', 'THREE', 'FOUR', 'FIVE', 'SIX', 'SEVEN', 'EIGHT', 'NINE', 'TEN', 'ELEVEN',
|
| 6 |
+
'TWELVE', 'THIRTEEN', 'FOURTEEN', 'FIFTEEN', 'SIXTEEN', 'SEVENTEEN', 'EIGHTEEN', 'NINETEEN', 'TWENTY',
|
| 7 |
+
'TWENTYONE', 'TWENTYTWO', 'TWENTYTHREE', 'TWENTYFOUR', 'TWENTYFIVE', 'TWENTYSIX', 'TWENTYSEVEN',
|
| 8 |
+
'TWENTYEIGHT', 'TWENTYNINE', 'THIRTY', 'THIRTYONE', 'THIRTYTWO', 'THIRTYTHREE', 'THIRTYFOUR',
|
| 9 |
+
'THIRTYFIVE', 'THIRTYSIX', 'THIRTYSEVEN', 'THIRTYEIGHT', 'THIRTYNINE', 'FORTY', 'FORTYONE', 'FORTYTWO',
|
| 10 |
+
'FORTYTHREE', 'FORTYFOUR', 'FORTYFIVE', 'FORTYSIX', 'FORTYSEVEN', 'FORTYEIGHT', 'FORTYNINE', 'FIFTY',
|
| 11 |
+
'FIFTYONE', 'FIFTYTWO', 'FIFTYTHREE', 'FIFTYFOUR', 'FIFTYFIVE', 'FIFTYSIX', 'FIFTYSEVEN', 'FIFTYEIGHT',
|
| 12 |
+
'FIFTYNINE', 'SIXTY', 'SIXTYONE', 'SIXTYTWO', 'SIXTYTHREE', 'SIXTYFOUR', 'SIXTYFIVE', 'SIXTYSIX',
|
| 13 |
+
'SIXTYSEVEN', 'SIXTYEIGHT', 'SIXTYNINE', 'SEVENTY', 'SEVENTYONE', 'SEVENTYTWO', 'SEVENTYTHREE',
|
| 14 |
+
'SEVENTYFOUR', 'SEVENTYFIVE', 'SEVENTYSIX', 'SEVENTYSEVEN', 'SEVENTYEIGHT', 'SEVENTYNINE', 'EIGHTY',
|
| 15 |
+
'EIGHTYONE', 'EIGHTYTWO', 'EIGHTYTHREE', 'EIGHTYFOUR', 'EIGHTYFIVE', 'EIGHTYSIX', 'EIGHTYSEVEN',
|
| 16 |
+
'EIGHTYEIGHT', 'EIGHTYNINE', 'NINETY', 'NINETYONE', 'NINETYTWO', 'NINETYTHREE', 'NINETYFOUR',
|
| 17 |
+
'NINETYFIVE', 'NINETYSIX', 'NINETYSEVEN', 'NINETYEIGHT', 'NINETYNINE', 'ONEHUNDRED'
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
def get_name(number: int) -> str:
|
| 21 |
+
"""Convert an integer (0-100) to its English uppercase name."""
|
| 22 |
+
if not 0 <= number < len(NUMBER_NAMES):
|
| 23 |
+
raise ValueError(f'number out of range: {number}')
|
| 24 |
+
return NUMBER_NAMES[number]
|
language/__pycache__/topy.cpython-313.pyc
ADDED
|
Binary file (22.1 kB). View file
|
|
|
language/createdpython.py
CHANGED
|
@@ -1,760 +1,985 @@
|
|
| 1 |
-
### finally found the bug
|
| 2 |
-
### here it is:
|
| 3 |
-
### lets say function f calls a new instance of itself
|
| 4 |
-
### the new instance creates cond b and a new instance of itself
|
| 5 |
-
### the new instance closes and returns to the instance with cond b that also closes itself
|
| 6 |
-
### now the original instance still owns cond b since no function was called to clean the stack or dict between the exiting calls
|
| 7 |
-
dicte = {"EQUALS": 0, "BIGEQUALS": 1, "BIGGER": 2}
|
| 8 |
-
listfunctions = [(lambda x, y: x == y), (lambda x, y: x >= y), (lambda x, y: x > y)]
|
| 9 |
import sys
|
| 10 |
-
import
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
return True
|
| 29 |
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
class Node:
|
| 32 |
-
|
|
|
|
|
|
|
| 33 |
self.value = value
|
| 34 |
-
self.next = None
|
| 35 |
|
| 36 |
-
def
|
| 37 |
-
|
|
|
|
| 38 |
|
| 39 |
-
def
|
| 40 |
-
|
|
|
|
| 41 |
|
| 42 |
-
def
|
|
|
|
| 43 |
return self.next
|
| 44 |
|
| 45 |
-
def
|
|
|
|
| 46 |
return self.value
|
| 47 |
|
| 48 |
|
| 49 |
class Stack:
|
|
|
|
|
|
|
| 50 |
def __init__(self):
|
| 51 |
-
self.top = None
|
| 52 |
-
self.list = []
|
| 53 |
|
| 54 |
-
def
|
|
|
|
| 55 |
return len(self.list) == 0
|
| 56 |
|
| 57 |
-
def push(self, value):
|
|
|
|
| 58 |
self.list.append(value)
|
| 59 |
|
| 60 |
-
def pop(self):
|
|
|
|
| 61 |
return self.list.pop()
|
| 62 |
|
| 63 |
|
| 64 |
-
|
| 65 |
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
errore.varexistserror(name)
|
| 71 |
-
else:
|
| 72 |
-
alltheconds[name] = COND(compare)
|
| 73 |
|
|
|
|
|
|
|
| 74 |
|
| 75 |
-
def
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
return allthelocalconds
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
# import dis
|
| 86 |
-
# print(dis.dis(addlocalcond))
|
| 87 |
-
def movetonewconds(localconds):
|
| 88 |
-
global stackoflocalconds
|
| 89 |
-
global alltheconds
|
| 90 |
-
global reserveconds
|
| 91 |
-
for i in localconds:
|
| 92 |
-
del alltheconds[i]
|
| 93 |
-
stackoflocalconds.push(localconds)
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
def backtooldconds():
|
| 97 |
-
global stackoflocalconds
|
| 98 |
-
popped = stackoflocalconds.pop()
|
| 99 |
-
global alltheconds
|
| 100 |
-
global reserveconds
|
| 101 |
-
for i in reserveconds:
|
| 102 |
-
if i in alltheconds:
|
| 103 |
-
del alltheconds[i]
|
| 104 |
-
reserveconds = {}
|
| 105 |
-
for i in popped:
|
| 106 |
-
alltheconds[i] = popped[i]
|
| 107 |
-
reserveconds[i] = popped[i]
|
| 108 |
-
return popped
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
def getcond(name):
|
| 112 |
-
global alltheconds
|
| 113 |
-
if (name in alltheconds):
|
| 114 |
-
return alltheconds[name]
|
| 115 |
-
else:
|
| 116 |
-
errore.doesntexisterror(name)
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
def printvars():
|
| 120 |
-
global allthevars, printed
|
| 121 |
-
print("END OF PROGRAM")
|
| 122 |
-
print()
|
| 123 |
-
for i in allthevars:
|
| 124 |
-
print("All the vars used from type " + i)
|
| 125 |
-
for j in allthevars[i]:
|
| 126 |
-
if (allthevars[i][j].iswritable()):
|
| 127 |
-
print(j + " : " + allthevars[i][j].tostring())
|
| 128 |
-
else:
|
| 129 |
-
pass
|
| 130 |
-
print("")
|
| 131 |
-
print("All that was printed during the program")
|
| 132 |
-
print(printed)
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
-
def
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
allthevars[type][name] = VALUE(name, value, True, True, type)
|
| 143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
-
def
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
|
|
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
-
def
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
allthelocalrvars[type][name] = allthevars[type][name]
|
| 161 |
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
-
def
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
stack.push(dict2)
|
| 172 |
-
return {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
|
| 175 |
-
def
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
del dict1[i][j]
|
| 183 |
-
reserveloc = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
|
| 184 |
-
for i in lastcall:
|
| 185 |
-
for j in lastcall[i]:
|
| 186 |
-
dict1[i][j] = lastcall[i][j]
|
| 187 |
-
reserveloc[i][j] = lastcall[i][j]
|
| 188 |
-
return lastcall
|
| 189 |
|
| 190 |
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
print(toprint, end='')
|
| 195 |
-
printed = printed + toprint
|
| 196 |
|
|
|
|
|
|
|
| 197 |
|
| 198 |
-
|
| 199 |
-
def __init__(self, name, value, readable, writable, TYPE):
|
| 200 |
self.name = name
|
| 201 |
self.value = value
|
| 202 |
self.readable = readable
|
| 203 |
self.writable = writable
|
| 204 |
-
self.type =
|
| 205 |
|
| 206 |
-
def write(self, value):
|
| 207 |
-
if
|
|
|
|
| 208 |
self.value = value
|
| 209 |
else:
|
| 210 |
-
|
| 211 |
|
| 212 |
-
def
|
|
|
|
| 213 |
self.value = value
|
| 214 |
|
| 215 |
-
def read(self):
|
| 216 |
-
if
|
|
|
|
| 217 |
return self.value
|
| 218 |
else:
|
| 219 |
-
|
| 220 |
|
| 221 |
-
def
|
|
|
|
| 222 |
return self.value
|
| 223 |
|
| 224 |
-
def
|
| 225 |
-
|
|
|
|
| 226 |
|
| 227 |
-
def
|
| 228 |
-
|
|
|
|
| 229 |
|
| 230 |
-
def
|
|
|
|
| 231 |
return self.name
|
| 232 |
|
| 233 |
-
def
|
|
|
|
| 234 |
return self.writable
|
| 235 |
|
| 236 |
-
def
|
|
|
|
| 237 |
return self.readable
|
| 238 |
|
| 239 |
-
def
|
|
|
|
| 240 |
return str(self.value)
|
| 241 |
|
| 242 |
-
def
|
|
|
|
| 243 |
return self.type
|
| 244 |
|
| 245 |
-
def override(self, value):
|
|
|
|
| 246 |
self.value = value
|
| 247 |
|
| 248 |
-
def
|
| 249 |
-
|
|
|
|
| 250 |
|
| 251 |
-
def
|
| 252 |
-
|
| 253 |
-
|
|
|
|
| 254 |
else:
|
| 255 |
-
if
|
| 256 |
-
|
| 257 |
else:
|
| 258 |
-
self.value =
|
| 259 |
|
| 260 |
|
| 261 |
-
|
| 262 |
-
|
|
|
|
| 263 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
|
| 265 |
-
def
|
| 266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
|
| 268 |
-
def
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
-
def
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
|
|
|
|
|
|
|
|
|
| 276 |
|
|
|
|
|
|
|
|
|
|
| 277 |
|
| 278 |
-
def
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
getvar("INT", "TEMPORARY").forcewrite(getvar("INT", Vali1).read() / getvar("INT", vali2).read())
|
| 282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
-
def
|
| 285 |
-
|
|
|
|
| 286 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
|
| 288 |
-
def
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
getvar("INT", "TEMPORARY").forcewrite(int(getvar("INT", Vali1).read() % getvar("INT", vali2).read()))
|
| 292 |
|
|
|
|
|
|
|
|
|
|
| 293 |
|
| 294 |
-
def
|
| 295 |
-
|
|
|
|
| 296 |
|
|
|
|
|
|
|
|
|
|
| 297 |
|
| 298 |
-
def
|
| 299 |
-
|
|
|
|
| 300 |
|
|
|
|
|
|
|
|
|
|
| 301 |
|
| 302 |
-
def
|
| 303 |
-
|
|
|
|
| 304 |
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
-
def
|
| 307 |
-
|
|
|
|
| 308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
|
| 310 |
-
def
|
| 311 |
-
|
|
|
|
|
|
|
|
|
|
| 312 |
|
|
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
-
def
|
| 315 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
|
| 317 |
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
getvar('INT', vali2).write(lookuptable[getvar('STR', vali1).read()])
|
| 322 |
-
else:
|
| 323 |
-
errore.typetointerror(getvar('STR', vali1).read())
|
| 324 |
|
|
|
|
|
|
|
| 325 |
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
self.
|
| 329 |
-
self.
|
| 330 |
-
self.
|
| 331 |
|
| 332 |
-
def
|
| 333 |
-
|
|
|
|
| 334 |
|
| 335 |
-
def
|
|
|
|
| 336 |
self.left = left
|
| 337 |
|
| 338 |
-
def
|
|
|
|
| 339 |
self.right = right
|
| 340 |
|
| 341 |
-
def
|
| 342 |
-
return
|
|
|
|
| 343 |
|
| 344 |
-
def
|
|
|
|
| 345 |
return self.type
|
| 346 |
|
| 347 |
|
| 348 |
-
|
| 349 |
-
|
|
|
|
| 350 |
|
| 351 |
-
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
|
| 354 |
-
def nameerror(self, type, name):
|
| 355 |
-
global currentline
|
| 356 |
-
globalline = currentline
|
| 357 |
-
print("Error Line: " + str(globalline))
|
| 358 |
-
print("Var of name " + name + " doesn't exist as type " + type)
|
| 359 |
-
print(" ")
|
| 360 |
-
printvars()
|
| 361 |
-
sys.exit(0)
|
| 362 |
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
|
| 373 |
-
def DIVZEROERROR(self, name):
|
| 374 |
-
global currentline
|
| 375 |
-
globalline = currentline
|
| 376 |
-
print("Error Line: " + str(globalline))
|
| 377 |
-
print("Cant divide by zero and var " + name + " has value of zero")
|
| 378 |
-
print(" ")
|
| 379 |
-
printvars()
|
| 380 |
-
sys.exit(0)
|
| 381 |
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
|
| 390 |
-
def writeerror(self, name, value):
|
| 391 |
-
global currentline
|
| 392 |
-
globalline = currentline
|
| 393 |
-
print("Error Line: " + str(globalline))
|
| 394 |
-
print("Tried to write value of " + str(value) + " to unwritable variable " + name)
|
| 395 |
-
print(" ")
|
| 396 |
-
printvars()
|
| 397 |
-
sys.exit(0)
|
| 398 |
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
print("Error Line: " + str(globalline))
|
| 403 |
-
print("No type such as " + str(value))
|
| 404 |
-
print(" ")
|
| 405 |
-
printvars()
|
| 406 |
-
sys.exit(0)
|
| 407 |
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
|
| 417 |
-
def linelimiterror(self):
|
| 418 |
-
global currentline
|
| 419 |
-
globalline = currentline
|
| 420 |
-
print("Error Line: " + str(currentline))
|
| 421 |
-
print("Program ran for too long")
|
| 422 |
-
print(" ")
|
| 423 |
-
printvars()
|
| 424 |
-
sys.exit(0)
|
| 425 |
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
print("Error Line: " + str(globalline))
|
| 430 |
-
print("Executing too many function calls ")
|
| 431 |
-
print("List of function calls")
|
| 432 |
-
for i in functioncalls:
|
| 433 |
-
pass
|
| 434 |
-
print(" ")
|
| 435 |
-
printvars()
|
| 436 |
-
sys.exit(0)
|
| 437 |
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
print("Error Line: " + str(globalline))
|
| 448 |
-
print("Tried to create object with name " + name + " but var already exists")
|
| 449 |
-
print(" ")
|
| 450 |
-
printvars()
|
| 451 |
-
sys.exit(0)
|
| 452 |
|
| 453 |
-
def typeerror(self, name, type1, type2):
|
| 454 |
-
global currentline
|
| 455 |
-
globalline = currentline
|
| 456 |
-
print("Error Line: " + str(globalline))
|
| 457 |
-
print("Mismatch of types " + type1 + " and " + type2 + " in list " + name)
|
| 458 |
-
print(" ")
|
| 459 |
-
printvars()
|
| 460 |
-
sys.exit(0)
|
| 461 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
|
| 463 |
-
class LIST:
|
| 464 |
-
def __init__(self, name, size, readable, writable, TYPE):
|
| 465 |
-
self.size = size
|
| 466 |
-
self.index = 0
|
| 467 |
-
self.values = [VALUE(name=(str(name) + " " + str(i)), value=0, writable=True, readable=True, TYPE="INT") for i
|
| 468 |
-
in range(size)]
|
| 469 |
-
self.types = ["INT" for i in range(size)]
|
| 470 |
-
self.readable = readable
|
| 471 |
-
self.writable = writable
|
| 472 |
-
self.name = name
|
| 473 |
-
self.type = TYPE
|
| 474 |
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
|
|
|
|
|
|
| 484 |
|
| 485 |
-
def forceaddsize(self, added):
|
| 486 |
-
self.size = self.size + added
|
| 487 |
-
self.values = [
|
| 488 |
-
self.values[i] if listfunctions[2](self.size, i) else VALUE(name=(str(i) + " " + str(i)), value=0,
|
| 489 |
-
writable=True, readable=True, TYPE="INT") for i
|
| 490 |
-
in range(self.size + added)]
|
| 491 |
-
|
| 492 |
-
def changeindex(self, newindex):
|
| 493 |
-
if (self.readable):
|
| 494 |
-
if (newindex >= self.size):
|
| 495 |
-
errore.makeeindexrror(newindex, self.size, self.name)
|
| 496 |
-
else:
|
| 497 |
-
self.index = newindex
|
| 498 |
-
else:
|
| 499 |
-
errore.readerror(self.name)
|
| 500 |
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
if (self.writable):
|
| 509 |
-
thevar = getvar(type, name)
|
| 510 |
-
if (thevar.isreadable()):
|
| 511 |
-
self.types[self.index] = type
|
| 512 |
-
self.values[self.index] = thevar.makecopy()
|
| 513 |
-
else:
|
| 514 |
-
errore.readerror(name)
|
| 515 |
-
else:
|
| 516 |
-
errore.writeerror(self.name, name)
|
| 517 |
|
| 518 |
-
def forceplacevalue(self, name, type):
|
| 519 |
-
thevar = getvar(type, name)
|
| 520 |
-
if (thevar.isreadable()):
|
| 521 |
-
self.types[self.index] = type
|
| 522 |
-
self.values[self.index] = thevar.makecopy()
|
| 523 |
-
else:
|
| 524 |
-
errore.readerror()
|
| 525 |
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
|
| 532 |
-
def read(self):
|
| 533 |
-
if (self.readable):
|
| 534 |
-
return self.values[self.index]
|
| 535 |
-
else:
|
| 536 |
-
errore.readerror(self.name)
|
| 537 |
|
| 538 |
-
|
| 539 |
-
|
|
|
|
| 540 |
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
|
|
|
| 546 |
|
| 547 |
-
def returntype(self):
|
| 548 |
-
if (self.readable):
|
| 549 |
-
return self.types[self.index]
|
| 550 |
-
else:
|
| 551 |
-
errore.readerror(self.name)
|
| 552 |
|
| 553 |
-
|
| 554 |
-
|
|
|
|
| 555 |
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
strei = strei + str(i.tostring()) + " "
|
| 560 |
-
return "[ " + strei + " ]"
|
| 561 |
-
|
| 562 |
-
def tostringoftypes(self):
|
| 563 |
-
if (self.readable):
|
| 564 |
-
stre = ""
|
| 565 |
-
for i in self.types:
|
| 566 |
-
stre = stre + i[0]
|
| 567 |
-
return stre
|
| 568 |
-
else:
|
| 569 |
-
errore.readerror(self.name)
|
| 570 |
|
| 571 |
-
def forcetostringoftypes(self):
|
| 572 |
-
stre = ""
|
| 573 |
-
for i in self.types:
|
| 574 |
-
stre = stre + i[0]
|
| 575 |
-
return stre
|
| 576 |
|
| 577 |
-
|
| 578 |
-
|
|
|
|
| 579 |
|
| 580 |
-
def changewrite(self, writestatus):
|
| 581 |
-
self.writable = writestatus
|
| 582 |
|
| 583 |
-
|
| 584 |
-
|
|
|
|
| 585 |
|
| 586 |
-
def iswritable(self):
|
| 587 |
-
return self.writable
|
| 588 |
|
| 589 |
-
|
| 590 |
-
|
|
|
|
|
|
|
|
|
|
| 591 |
|
| 592 |
-
def getvalues(self):
|
| 593 |
-
return self.values
|
| 594 |
|
| 595 |
-
|
| 596 |
-
|
|
|
|
|
|
|
|
|
|
| 597 |
|
| 598 |
-
def getsize(self):
|
| 599 |
-
return self.size
|
| 600 |
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
thelist.values = [val.makecopy() for val in self.values]
|
| 605 |
-
return thelist
|
| 606 |
|
| 607 |
-
def override(self, values, types, size):
|
| 608 |
-
self.values = values
|
| 609 |
-
self.types = types
|
| 610 |
-
self.size = size
|
| 611 |
|
| 612 |
-
|
| 613 |
-
|
|
|
|
|
|
|
|
|
|
| 614 |
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
def
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
if (linecount == linelimit):
|
| 658 |
-
errore.linelimiterror()
|
| 659 |
-
else:
|
| 660 |
-
return True
|
| 661 |
|
| 662 |
|
| 663 |
-
def
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 668 |
else:
|
| 669 |
return True
|
| 670 |
|
| 671 |
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 683 |
else:
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 693 |
else:
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
output =
|
| 700 |
-
|
| 701 |
-
output.
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
def
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 746 |
# Number names (ZERO..ONEHUNDRED) are compile-time constants only.
|
| 747 |
# They are resolved to plain integer literals by the compiler (toline/word_to_num)
|
| 748 |
-
# and must NOT live in
|
| 749 |
# their own variables ONE, ZERO, etc.
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import sys
|
| 2 |
+
from typing import Dict, List, Any, Callable, Optional
|
| 3 |
+
|
| 4 |
+
# --- Comparison operator tables ---
|
| 5 |
+
COMPARE_OP_INDEX: Dict[str, int] = {"EQUALS": 0, "BIGEQUALS": 1, "BIGGER": 2}
|
| 6 |
+
COMPARE_OPS: List[Callable[[Any, Any], bool]] = [
|
| 7 |
+
(lambda x, y: x == y),
|
| 8 |
+
(lambda x, y: x >= y),
|
| 9 |
+
(lambda x, y: x > y)
|
| 10 |
+
]
|
| 11 |
+
|
| 12 |
+
# --- VM execution counters and limits ---
|
| 13 |
+
line_count: int = 0
|
| 14 |
+
current_line: int = 0
|
| 15 |
+
line_limit: int = 1000
|
| 16 |
+
function_limit: int = 25
|
| 17 |
+
function_count: int = 0
|
| 18 |
+
printed_output: str = ""
|
| 19 |
+
|
| 20 |
+
# --- Forward declarations for types ---
|
| 21 |
+
class Value: pass
|
| 22 |
+
class VmList: pass
|
| 23 |
+
class Condition: pass
|
| 24 |
+
|
| 25 |
+
# --- Condition registries ---
|
| 26 |
+
cond_registry: Dict[str, Condition] = {}
|
| 27 |
+
saved_conds: Dict[str, Condition] = {}
|
| 28 |
+
|
| 29 |
+
# --- Local variable save slots ---
|
| 30 |
+
saved_locals: Dict[str, Any] = {}
|
| 31 |
+
saved_globals: Dict[str, Dict[str, Any]] = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# ---------------------------------------------------------------------------
|
| 35 |
+
# Line tracking
|
| 36 |
+
# ---------------------------------------------------------------------------
|
| 37 |
+
|
| 38 |
+
def set_current_line(line_num: int) -> bool:
|
| 39 |
+
"""Record the current executing line number for error reporting."""
|
| 40 |
+
global current_line
|
| 41 |
+
current_line = line_num
|
| 42 |
return True
|
| 43 |
|
| 44 |
|
| 45 |
+
# ---------------------------------------------------------------------------
|
| 46 |
+
# Node / Stack
|
| 47 |
+
# ---------------------------------------------------------------------------
|
| 48 |
+
|
| 49 |
class Node:
|
| 50 |
+
"""Singly-linked list node used internally by Stack."""
|
| 51 |
+
|
| 52 |
+
def __init__(self, value: Any):
|
| 53 |
self.value = value
|
| 54 |
+
self.next: Optional['Node'] = None
|
| 55 |
|
| 56 |
+
def set_next(self, next_value: Any) -> None:
|
| 57 |
+
"""Create and link a new node with the given value."""
|
| 58 |
+
self.next = Node(value=next_value)
|
| 59 |
|
| 60 |
+
def set_next_node(self, next_node: 'Node') -> None:
|
| 61 |
+
"""Directly link to an existing node."""
|
| 62 |
+
self.next = next_node
|
| 63 |
|
| 64 |
+
def get_next(self) -> Optional['Node']:
|
| 65 |
+
"""Return the next node."""
|
| 66 |
return self.next
|
| 67 |
|
| 68 |
+
def get_value(self) -> Any:
|
| 69 |
+
"""Return the value stored in this node."""
|
| 70 |
return self.value
|
| 71 |
|
| 72 |
|
| 73 |
class Stack:
|
| 74 |
+
"""Simple LIFO stack backed by a Python list."""
|
| 75 |
+
|
| 76 |
def __init__(self):
|
| 77 |
+
self.top: Optional[Node] = None
|
| 78 |
+
self.list: List[Any] = []
|
| 79 |
|
| 80 |
+
def is_empty(self) -> bool:
|
| 81 |
+
"""Return True if the stack holds no elements."""
|
| 82 |
return len(self.list) == 0
|
| 83 |
|
| 84 |
+
def push(self, value: Any) -> None:
|
| 85 |
+
"""Push a value onto the top of the stack."""
|
| 86 |
self.list.append(value)
|
| 87 |
|
| 88 |
+
def pop(self) -> Any:
|
| 89 |
+
"""Pop and return the top value of the stack."""
|
| 90 |
return self.list.pop()
|
| 91 |
|
| 92 |
|
| 93 |
+
function_call_stack = Stack()
|
| 94 |
|
| 95 |
|
| 96 |
+
# ---------------------------------------------------------------------------
|
| 97 |
+
# Error Handler
|
| 98 |
+
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
+
class ErrorHandler(Exception):
|
| 101 |
+
"""Centralised VM error reporter: prints diagnostics and terminates execution."""
|
| 102 |
|
| 103 |
+
def name_error(self, var_type: str, name: str) -> None:
|
| 104 |
+
"""Report that a variable of the given type and name does not exist."""
|
| 105 |
+
print(f"Error Line: {current_line}")
|
| 106 |
+
print(f"Var of name {name} doesn't exist as type {var_type}")
|
| 107 |
+
print(" ")
|
| 108 |
+
print_vars()
|
| 109 |
+
sys.exit(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
+
def index_error(self, list_name: str, bad_index: int, list_size: int) -> None:
|
| 112 |
+
"""Report an out-of-bounds list index access."""
|
| 113 |
+
print(f"Error Line: {current_line}")
|
| 114 |
+
print(f"Tried to change index of list {list_name} with size of {list_size} to out-of-bounds index {bad_index}")
|
| 115 |
+
print(" ")
|
| 116 |
+
print_vars()
|
| 117 |
+
sys.exit(0)
|
| 118 |
|
| 119 |
+
def div_zero_error(self, var_name: str) -> None:
|
| 120 |
+
"""Report a division-by-zero attempt."""
|
| 121 |
+
print(f"Error Line: {current_line}")
|
| 122 |
+
print(f"Cant divide by zero and var {var_name} has value of zero")
|
| 123 |
+
print(" ")
|
| 124 |
+
print_vars()
|
| 125 |
+
sys.exit(0)
|
|
|
|
| 126 |
|
| 127 |
+
def doesnt_exist_error(self, name: str) -> None:
|
| 128 |
+
"""Report that no object with the given name exists."""
|
| 129 |
+
print(f"Error Line: {current_line}")
|
| 130 |
+
print(f"No object with name {name} exists")
|
| 131 |
+
print_vars()
|
| 132 |
+
sys.exit(0)
|
| 133 |
|
| 134 |
+
def write_error(self, name: str, value: Any) -> None:
|
| 135 |
+
"""Report an attempt to write to an unwritable variable."""
|
| 136 |
+
print(f"Error Line: {current_line}")
|
| 137 |
+
print(f"Tried to write value of {value} to unwritable variable {name}")
|
| 138 |
+
print(" ")
|
| 139 |
+
print_vars()
|
| 140 |
+
sys.exit(0)
|
| 141 |
|
| 142 |
+
def type_to_int_error(self, value: str) -> None:
|
| 143 |
+
"""Report that a string does not correspond to a known type name."""
|
| 144 |
+
print(f"Error Line: {current_line}")
|
| 145 |
+
print(f"No type such as {value}")
|
| 146 |
+
print(" ")
|
| 147 |
+
print_vars()
|
| 148 |
+
sys.exit(0)
|
| 149 |
|
| 150 |
+
def read_error(self, name: str) -> None:
|
| 151 |
+
"""Report an attempt to read from an unreadable variable."""
|
| 152 |
+
print(f"Error Line: {current_line}")
|
| 153 |
+
print(f"Tried to read from unreadable variable {name}")
|
| 154 |
+
print(" ")
|
| 155 |
+
print_vars()
|
| 156 |
+
sys.exit(0)
|
|
|
|
| 157 |
|
| 158 |
+
def line_limit_error(self) -> None:
|
| 159 |
+
"""Report that the program exceeded the maximum allowed line count."""
|
| 160 |
+
print(f"Error Line: {current_line}")
|
| 161 |
+
print("Program ran for too long")
|
| 162 |
+
print(" ")
|
| 163 |
+
print_vars()
|
| 164 |
+
sys.exit(0)
|
| 165 |
|
| 166 |
+
def overflow_error(self, call_stack: Stack) -> None:
|
| 167 |
+
"""Report a function call stack overflow."""
|
| 168 |
+
print(f"Error Line: {current_line}")
|
| 169 |
+
print("Executing too many function calls")
|
| 170 |
+
print(" ")
|
| 171 |
+
print_vars()
|
| 172 |
+
sys.exit(0)
|
| 173 |
|
| 174 |
+
def cant_change_index_error(self, name: str, added: int) -> None:
|
| 175 |
+
"""Report an attempt to resize an unwritable list."""
|
| 176 |
+
print(f"Error Line: {current_line}")
|
| 177 |
+
print(f"Tried to change indexes of list {name} and add size {added} but list is unwritable")
|
|
|
|
|
|
|
| 178 |
|
| 179 |
+
def var_exists_error(self, name: str) -> None:
|
| 180 |
+
"""Report an attempt to create a variable whose name is already taken."""
|
| 181 |
+
print(f"Error Line: {current_line}")
|
| 182 |
+
print(f"Tried to create object with name {name} but var already exists")
|
| 183 |
+
print(" ")
|
| 184 |
+
print_vars()
|
| 185 |
+
sys.exit(0)
|
| 186 |
|
| 187 |
+
def type_error(self, name: str, type1: str, type2: str) -> None:
|
| 188 |
+
"""Report a type mismatch during a variable assignment."""
|
| 189 |
+
print(f"Error Line: {current_line}")
|
| 190 |
+
print(f"Mismatch of types {type1} and {type2} in variable {name}")
|
| 191 |
+
print(" ")
|
| 192 |
+
print_vars()
|
| 193 |
+
sys.exit(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
|
| 196 |
+
# ---------------------------------------------------------------------------
|
| 197 |
+
# Value
|
| 198 |
+
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
| 199 |
|
| 200 |
+
class Value:
|
| 201 |
+
"""A single typed, named VM variable with read/write permission flags."""
|
| 202 |
|
| 203 |
+
def __init__(self, name: str, value: Any, readable: bool, writable: bool, type_name: str):
|
|
|
|
| 204 |
self.name = name
|
| 205 |
self.value = value
|
| 206 |
self.readable = readable
|
| 207 |
self.writable = writable
|
| 208 |
+
self.type = type_name
|
| 209 |
|
| 210 |
+
def write(self, value: Any) -> None:
|
| 211 |
+
"""Write a value if writable, otherwise raise a write error."""
|
| 212 |
+
if self.writable:
|
| 213 |
self.value = value
|
| 214 |
else:
|
| 215 |
+
error_handler.write_error(self.name, value)
|
| 216 |
|
| 217 |
+
def force_write(self, value: Any) -> None:
|
| 218 |
+
"""Write a value unconditionally, bypassing the writable flag."""
|
| 219 |
self.value = value
|
| 220 |
|
| 221 |
+
def read(self) -> Any:
|
| 222 |
+
"""Read the value if readable, otherwise raise a read error."""
|
| 223 |
+
if self.readable:
|
| 224 |
return self.value
|
| 225 |
else:
|
| 226 |
+
error_handler.read_error(self.name)
|
| 227 |
|
| 228 |
+
def force_read(self) -> Any:
|
| 229 |
+
"""Read the value unconditionally, bypassing the readable flag."""
|
| 230 |
return self.value
|
| 231 |
|
| 232 |
+
def set_readable(self, readable: bool) -> None:
|
| 233 |
+
"""Set the readable permission flag."""
|
| 234 |
+
self.readable = readable
|
| 235 |
|
| 236 |
+
def set_writable(self, writable: bool) -> None:
|
| 237 |
+
"""Set the writable permission flag."""
|
| 238 |
+
self.writable = writable
|
| 239 |
|
| 240 |
+
def get_name(self) -> str:
|
| 241 |
+
"""Return the variable's name."""
|
| 242 |
return self.name
|
| 243 |
|
| 244 |
+
def is_writable(self) -> bool:
|
| 245 |
+
"""Return True if the variable is writable."""
|
| 246 |
return self.writable
|
| 247 |
|
| 248 |
+
def is_readable(self) -> bool:
|
| 249 |
+
"""Return True if the variable is readable."""
|
| 250 |
return self.readable
|
| 251 |
|
| 252 |
+
def to_string(self) -> str:
|
| 253 |
+
"""Return a string representation of the stored value."""
|
| 254 |
return str(self.value)
|
| 255 |
|
| 256 |
+
def give_type(self) -> str:
|
| 257 |
+
"""Return the type string of this variable."""
|
| 258 |
return self.type
|
| 259 |
|
| 260 |
+
def override(self, value: Any) -> None:
|
| 261 |
+
"""Directly overwrite the stored value, bypassing all checks."""
|
| 262 |
self.value = value
|
| 263 |
|
| 264 |
+
def make_copy(self) -> 'Value':
|
| 265 |
+
"""Return a readable, writable copy of this variable."""
|
| 266 |
+
return Value(self.name, self.value, self.readable, True, self.type)
|
| 267 |
|
| 268 |
+
def copy_var(self, source: 'Value') -> None:
|
| 269 |
+
"""Copy the value from source into this variable, with type and read checks."""
|
| 270 |
+
if self.type != source.type:
|
| 271 |
+
error_handler.type_error(self.name, self.type, source.type)
|
| 272 |
else:
|
| 273 |
+
if not source.is_readable():
|
| 274 |
+
error_handler.read_error(source.get_name())
|
| 275 |
else:
|
| 276 |
+
self.value = source.value
|
| 277 |
|
| 278 |
|
| 279 |
+
# ---------------------------------------------------------------------------
|
| 280 |
+
# VmList
|
| 281 |
+
# ---------------------------------------------------------------------------
|
| 282 |
|
| 283 |
+
class VmList:
|
| 284 |
+
"""A fixed-size, typed VM list whose elements are Value objects."""
|
| 285 |
+
|
| 286 |
+
def __init__(self, name: str, size: int, readable: bool, writable: bool, type_name: str):
|
| 287 |
+
self.size = size
|
| 288 |
+
self.index = 0
|
| 289 |
+
self.values: List[Value] = [
|
| 290 |
+
Value(name=(str(name) + " " + str(i)), value=0, writable=True, readable=True, type_name="INT")
|
| 291 |
+
for i in range(size)
|
| 292 |
+
]
|
| 293 |
+
self.types: List[str] = ["INT" for _ in range(size)]
|
| 294 |
+
self.readable = readable
|
| 295 |
+
self.writable = writable
|
| 296 |
+
self.name = name
|
| 297 |
+
self.type = type_name
|
| 298 |
|
| 299 |
+
def add_size(self, added: int) -> None:
|
| 300 |
+
"""Grow the list by `added` elements if writable."""
|
| 301 |
+
if self.writable:
|
| 302 |
+
old_size = self.size
|
| 303 |
+
self.size = self.size + added
|
| 304 |
+
# Recreate values list with previous values preserved or new slots added
|
| 305 |
+
# Note: The logic here mirrors the original, which re-checks 'compare_ops' in a
|
| 306 |
+
# way that copies old indices and creates new ones.
|
| 307 |
+
self.values = [
|
| 308 |
+
self.values[i] if i < old_size
|
| 309 |
+
else Value(name=(str(self.name) + " " + str(i)), value=0, writable=True, readable=True, type_name="INT")
|
| 310 |
+
for i in range(self.size)
|
| 311 |
+
]
|
| 312 |
+
self.types = self.types + ["INT" for _ in range(added)]
|
| 313 |
+
else:
|
| 314 |
+
error_handler.cant_change_index_error(self.name, added)
|
| 315 |
|
| 316 |
+
def force_add_size(self, added: int) -> None:
|
| 317 |
+
"""Grow the list by `added` elements unconditionally."""
|
| 318 |
+
old_size = self.size
|
| 319 |
+
self.size = self.size + added
|
| 320 |
+
self.values = [
|
| 321 |
+
self.values[i] if i < old_size
|
| 322 |
+
else Value(name=(str(self.name) + " " + str(i)), value=0, writable=True, readable=True, type_name="INT")
|
| 323 |
+
for i in range(self.size)
|
| 324 |
+
]
|
| 325 |
+
self.types = self.types + ["INT" for _ in range(added)]
|
| 326 |
+
|
| 327 |
+
def change_index(self, new_index: int) -> None:
|
| 328 |
+
"""Set the active index if readable and in bounds."""
|
| 329 |
+
if self.readable:
|
| 330 |
+
if new_index >= self.size:
|
| 331 |
+
error_handler.index_error(self.name, new_index, self.size)
|
| 332 |
+
else:
|
| 333 |
+
self.index = new_index
|
| 334 |
+
else:
|
| 335 |
+
error_handler.read_error(self.name)
|
| 336 |
|
| 337 |
+
def force_change_index(self, new_index: int) -> None:
|
| 338 |
+
"""Set the active index unconditionally, still checking bounds."""
|
| 339 |
+
if new_index >= self.size:
|
| 340 |
+
error_handler.index_error(self.name, new_index, self.size)
|
| 341 |
+
else:
|
| 342 |
+
self.index = new_index
|
| 343 |
+
|
| 344 |
+
def place_value(self, name: str, var_type: str) -> None:
|
| 345 |
+
"""Copy a variable into the current index slot if writable."""
|
| 346 |
+
if self.writable:
|
| 347 |
+
source = get_var(var_type, name)
|
| 348 |
+
if source.is_readable():
|
| 349 |
+
self.types[self.index] = var_type
|
| 350 |
+
self.values[self.index] = source.make_copy()
|
| 351 |
+
else:
|
| 352 |
+
error_handler.read_error(name)
|
| 353 |
+
else:
|
| 354 |
+
error_handler.write_error(self.name, name)
|
| 355 |
+
|
| 356 |
+
def force_place_value(self, name: str, var_type: str) -> None:
|
| 357 |
+
"""Copy a variable into the current index slot unconditionally."""
|
| 358 |
+
source = get_var(var_type, name)
|
| 359 |
+
if source.is_readable():
|
| 360 |
+
self.types[self.index] = var_type
|
| 361 |
+
self.values[self.index] = source.make_copy()
|
| 362 |
+
else:
|
| 363 |
+
error_handler.read_error(name)
|
| 364 |
|
| 365 |
+
def read_value(self) -> Value:
|
| 366 |
+
"""Return the Value at the current index if readable."""
|
| 367 |
+
if self.readable:
|
| 368 |
+
return self.values[self.index]
|
| 369 |
+
else:
|
| 370 |
+
error_handler.read_error(self.name)
|
| 371 |
+
return self.values[0] # Should be unreachable due to sys.exit
|
| 372 |
|
| 373 |
+
def read(self) -> Value:
|
| 374 |
+
"""Return the Value at the current index if readable (alias for read_value)."""
|
| 375 |
+
return self.read_value()
|
| 376 |
|
| 377 |
+
def force_read_value(self) -> Value:
|
| 378 |
+
"""Return the Value at the current index unconditionally."""
|
| 379 |
+
return self.values[self.index]
|
|
|
|
| 380 |
|
| 381 |
+
def copy_element_to(self, dest_value: Value) -> None:
|
| 382 |
+
"""Write the current element's value into dest_value, with type checking."""
|
| 383 |
+
if self.types[self.index] == dest_value.give_type():
|
| 384 |
+
dest_value.write(self.values[self.index].read())
|
| 385 |
+
else:
|
| 386 |
+
error_handler.type_error(
|
| 387 |
+
name=self.name,
|
| 388 |
+
type1=self.types[self.index],
|
| 389 |
+
type2=dest_value.give_type()
|
| 390 |
+
)
|
| 391 |
+
|
| 392 |
+
def read_type(self) -> str:
|
| 393 |
+
"""Return the type string of the element at the current index if readable."""
|
| 394 |
+
if self.readable:
|
| 395 |
+
return self.types[self.index]
|
| 396 |
+
else:
|
| 397 |
+
error_handler.read_error(self.name)
|
| 398 |
+
return ""
|
| 399 |
|
| 400 |
+
def force_read_type(self) -> str:
|
| 401 |
+
"""Return the type string of the element at the current index unconditionally."""
|
| 402 |
+
return self.types[self.index]
|
| 403 |
|
| 404 |
+
def to_string(self) -> str:
|
| 405 |
+
"""Return a bracketed string of all element values."""
|
| 406 |
+
parts = ""
|
| 407 |
+
for val in self.values:
|
| 408 |
+
parts = parts + str(val.to_string()) + " "
|
| 409 |
+
return "[ " + parts + " ]"
|
| 410 |
+
|
| 411 |
+
def to_type_string(self) -> str:
|
| 412 |
+
"""Return a string of the first-character type codes for all elements if readable."""
|
| 413 |
+
if self.readable:
|
| 414 |
+
return "".join(t[0] for t in self.types)
|
| 415 |
+
else:
|
| 416 |
+
error_handler.read_error(self.name)
|
| 417 |
+
return ""
|
| 418 |
|
| 419 |
+
def force_to_type_string(self) -> str:
|
| 420 |
+
"""Return a string of the first-character type codes for all elements unconditionally."""
|
| 421 |
+
return "".join(t[0] for t in self.types)
|
|
|
|
| 422 |
|
| 423 |
+
def set_readable(self, readable: bool) -> None:
|
| 424 |
+
"""Set the readable permission flag."""
|
| 425 |
+
self.readable = readable
|
| 426 |
|
| 427 |
+
def set_writable(self, writable: bool) -> None:
|
| 428 |
+
"""Set the writable permission flag."""
|
| 429 |
+
self.writable = writable
|
| 430 |
|
| 431 |
+
def get_name(self) -> str:
|
| 432 |
+
"""Return the list's name."""
|
| 433 |
+
return self.name
|
| 434 |
|
| 435 |
+
def is_writable(self) -> bool:
|
| 436 |
+
"""Return True if the list is writable."""
|
| 437 |
+
return self.writable
|
| 438 |
|
| 439 |
+
def is_readable(self) -> bool:
|
| 440 |
+
"""Return True if the list is readable."""
|
| 441 |
+
return self.readable
|
| 442 |
|
| 443 |
+
def get_values(self) -> List[Value]:
|
| 444 |
+
"""Return the raw list of Value elements."""
|
| 445 |
+
return self.values
|
| 446 |
|
| 447 |
+
def get_types(self) -> List[str]:
|
| 448 |
+
"""Return the list of type strings for each element."""
|
| 449 |
+
return self.types
|
| 450 |
|
| 451 |
+
def get_size(self) -> int:
|
| 452 |
+
"""Return the current size of the list."""
|
| 453 |
+
return self.size
|
| 454 |
|
| 455 |
+
def make_copy(self) -> 'VmList':
|
| 456 |
+
"""Return a full deep copy of this list as a writable instance."""
|
| 457 |
+
copy = VmList(self.name, self.size, self.readable, True, self.type)
|
| 458 |
+
copy.types = self.types.copy()
|
| 459 |
+
copy.values = [val.make_copy() for val in self.values]
|
| 460 |
+
return copy
|
| 461 |
|
| 462 |
+
def override(self, values: List[Value], types: List[str], size: int) -> None:
|
| 463 |
+
"""Directly replace the list's contents, bypassing all checks."""
|
| 464 |
+
self.values = values
|
| 465 |
+
self.types = types
|
| 466 |
+
self.size = size
|
| 467 |
|
| 468 |
+
def give_type(self) -> str:
|
| 469 |
+
"""Return the type string of this list object."""
|
| 470 |
+
return self.type
|
| 471 |
|
| 472 |
+
def copy_var(self, source_list: 'VmList') -> None:
|
| 473 |
+
"""Copy all content from source_list into this list, with type and read checks."""
|
| 474 |
+
if self.type != source_list.type:
|
| 475 |
+
error_handler.type_error(self.name, self.type, source_list.type)
|
| 476 |
+
else:
|
| 477 |
+
if not source_list.is_readable():
|
| 478 |
+
error_handler.read_error(source_list.get_name())
|
| 479 |
+
else:
|
| 480 |
+
self.type = 'LIST'
|
| 481 |
+
self.types = source_list.types.copy()
|
| 482 |
+
self.size = source_list.size
|
| 483 |
+
self.values = [var.make_copy() for var in source_list.values]
|
| 484 |
|
| 485 |
|
| 486 |
+
# ---------------------------------------------------------------------------
|
| 487 |
+
# Condition
|
| 488 |
+
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
| 489 |
|
| 490 |
+
class Condition:
|
| 491 |
+
"""A named conditional that compares two Value objects using a stored operator."""
|
| 492 |
|
| 493 |
+
def __init__(self, compare: str):
|
| 494 |
+
self.compare_index = COMPARE_OP_INDEX[compare]
|
| 495 |
+
self.left = Value("0", 0, False, False, "INT")
|
| 496 |
+
self.right = Value("0", 0, False, False, "INT")
|
| 497 |
+
self.type = "COND"
|
| 498 |
|
| 499 |
+
def set_compare(self, compare: str) -> None:
|
| 500 |
+
"""Change the comparison operator."""
|
| 501 |
+
self.compare_index = COMPARE_OP_INDEX[compare]
|
| 502 |
|
| 503 |
+
def set_left(self, left: Value) -> None:
|
| 504 |
+
"""Set the left operand Value."""
|
| 505 |
self.left = left
|
| 506 |
|
| 507 |
+
def set_right(self, right: Value) -> None:
|
| 508 |
+
"""Set the right operand Value."""
|
| 509 |
self.right = right
|
| 510 |
|
| 511 |
+
def evaluate(self) -> bool:
|
| 512 |
+
"""Evaluate the condition and return the boolean result."""
|
| 513 |
+
return COMPARE_OPS[self.compare_index](self.left.read(), self.right.read())
|
| 514 |
|
| 515 |
+
def give_type(self) -> str:
|
| 516 |
+
"""Return the type string of this object."""
|
| 517 |
return self.type
|
| 518 |
|
| 519 |
|
| 520 |
+
# ---------------------------------------------------------------------------
|
| 521 |
+
# Condition helpers
|
| 522 |
+
# ---------------------------------------------------------------------------
|
| 523 |
|
| 524 |
+
def add_cond(name, compare):
|
| 525 |
+
"""Create a new global condition with the given comparison operator."""
|
| 526 |
+
global cond_registry
|
| 527 |
+
if name in cond_registry:
|
| 528 |
+
error_handler.var_exists_error(name)
|
| 529 |
+
else:
|
| 530 |
+
cond_registry[name] = Condition(compare)
|
| 531 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 532 |
|
| 533 |
+
def add_local_cond(name, compare):
|
| 534 |
+
"""Create a new condition scoped to the current function call."""
|
| 535 |
+
global cond_registry, current_local_conds
|
| 536 |
+
if name in cond_registry:
|
| 537 |
+
error_handler.var_exists_error(name)
|
| 538 |
+
else:
|
| 539 |
+
cond_registry[name] = Condition(compare)
|
| 540 |
+
current_local_conds[name] = cond_registry[name]
|
| 541 |
+
return current_local_conds
|
| 542 |
+
|
| 543 |
+
|
| 544 |
+
def push_local_conds(local_conds):
|
| 545 |
+
"""Save the current local conditions onto the stack and remove them from the registry."""
|
| 546 |
+
global local_conds_stack, cond_registry, saved_conds
|
| 547 |
+
for name in local_conds:
|
| 548 |
+
del cond_registry[name]
|
| 549 |
+
local_conds_stack.push(local_conds)
|
| 550 |
+
|
| 551 |
+
|
| 552 |
+
def pop_local_conds():
|
| 553 |
+
"""Restore the previous function's local conditions from the stack."""
|
| 554 |
+
global local_conds_stack, cond_registry, saved_conds
|
| 555 |
+
popped = local_conds_stack.pop()
|
| 556 |
+
for name in saved_conds:
|
| 557 |
+
if name in cond_registry:
|
| 558 |
+
del cond_registry[name]
|
| 559 |
+
saved_conds = {}
|
| 560 |
+
for name in popped:
|
| 561 |
+
cond_registry[name] = popped[name]
|
| 562 |
+
saved_conds[name] = popped[name]
|
| 563 |
+
return popped
|
| 564 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 565 |
|
| 566 |
+
def get_cond(name):
|
| 567 |
+
"""Look up and return a condition by name, raising an error if absent."""
|
| 568 |
+
global cond_registry
|
| 569 |
+
if name in cond_registry:
|
| 570 |
+
return cond_registry[name]
|
| 571 |
+
else:
|
| 572 |
+
error_handler.doesnt_exist_error(name)
|
| 573 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 574 |
|
| 575 |
+
# ---------------------------------------------------------------------------
|
| 576 |
+
# Debug / print helpers
|
| 577 |
+
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 578 |
|
| 579 |
+
def print_vars():
|
| 580 |
+
"""Print all current variable values and the accumulated output buffer."""
|
| 581 |
+
global var_registry, printed_output
|
| 582 |
+
print("END OF PROGRAM")
|
| 583 |
+
print()
|
| 584 |
+
for var_type in var_registry:
|
| 585 |
+
print("All the vars used from type " + var_type)
|
| 586 |
+
for var_name in var_registry[var_type]:
|
| 587 |
+
if var_registry[var_type][var_name].is_writable():
|
| 588 |
+
print(var_name + " : " + var_registry[var_type][var_name].to_string())
|
| 589 |
+
print("")
|
| 590 |
+
print("All that was printed during the program")
|
| 591 |
+
print(printed_output)
|
| 592 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 593 |
|
| 594 |
+
# ---------------------------------------------------------------------------
|
| 595 |
+
# Variable helpers
|
| 596 |
+
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 597 |
|
| 598 |
+
def add_var(var_type, name, value):
|
| 599 |
+
"""Add a new global variable of the given type and initial value."""
|
| 600 |
+
global var_registry
|
| 601 |
+
if name in var_registry[var_type]:
|
| 602 |
+
error_handler.var_exists_error(name)
|
| 603 |
+
if var_type == "LIST":
|
| 604 |
+
var_registry[var_type][name] = VmList(name, value, True, True, var_type)
|
| 605 |
+
else:
|
| 606 |
+
var_registry[var_type][name] = Value(name, value, True, True, var_type)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 607 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 608 |
|
| 609 |
+
def get_var(var_type, name):
|
| 610 |
+
"""Return the variable object for the given type and name."""
|
| 611 |
+
global var_registry
|
| 612 |
+
if name in var_registry[var_type]:
|
| 613 |
+
return var_registry[var_type][name]
|
| 614 |
+
else:
|
| 615 |
+
error_handler.doesnt_exist_error(name)
|
| 616 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 617 |
|
| 618 |
+
def add_local_var(var_type, name, value):
|
| 619 |
+
"""Add a new variable scoped to the current function call."""
|
| 620 |
+
global current_local_vars, var_registry
|
| 621 |
+
if name in var_registry[var_type]:
|
| 622 |
+
error_handler.var_exists_error(name)
|
| 623 |
+
if var_type == "LIST":
|
| 624 |
+
var_registry[var_type][name] = VmList(name, value, True, True, var_type)
|
| 625 |
+
current_local_vars[var_type][name] = var_registry[var_type][name]
|
| 626 |
+
else:
|
| 627 |
+
var_registry[var_type][name] = Value(name, value, True, True, var_type)
|
| 628 |
+
current_local_vars[var_type][name] = var_registry[var_type][name]
|
| 629 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 630 |
|
| 631 |
+
def push_local_vars(var_registry_ref: dict, local_vars: dict, stack: Stack):
|
| 632 |
+
"""Save current local variables to the stack and clear them from the registry."""
|
| 633 |
+
for var_type in local_vars:
|
| 634 |
+
for name in local_vars[var_type]:
|
| 635 |
+
del var_registry_ref[var_type][name]
|
| 636 |
+
stack.push(local_vars)
|
| 637 |
+
return {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 638 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
|
| 640 |
+
def pop_local_vars(var_registry_ref: dict):
|
| 641 |
+
"""Restore the previous call's local variables from the stack."""
|
| 642 |
+
stack = local_vars_stack
|
| 643 |
+
last_call = stack.pop()
|
| 644 |
+
global saved_locals
|
| 645 |
+
for var_type in saved_locals:
|
| 646 |
+
for name in saved_locals[var_type]:
|
| 647 |
+
if name in var_registry_ref[var_type]:
|
| 648 |
+
del var_registry_ref[var_type][name]
|
| 649 |
+
saved_locals = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
|
| 650 |
+
for var_type in last_call:
|
| 651 |
+
for name in last_call[var_type]:
|
| 652 |
+
var_registry_ref[var_type][name] = last_call[var_type][name]
|
| 653 |
+
saved_locals[var_type][name] = last_call[var_type][name]
|
| 654 |
+
return last_call
|
| 655 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 656 |
|
| 657 |
+
# ---------------------------------------------------------------------------
|
| 658 |
+
# VM print
|
| 659 |
+
# ---------------------------------------------------------------------------
|
| 660 |
|
| 661 |
+
def vm_print(var, newline):
|
| 662 |
+
"""Print a VM variable's value and append it to the output buffer."""
|
| 663 |
+
global printed_output
|
| 664 |
+
text = var.to_string() + newline * '\n' + ' ' * (1 - newline)
|
| 665 |
+
print(text, end='')
|
| 666 |
+
printed_output = printed_output + text
|
| 667 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 668 |
|
| 669 |
+
# ---------------------------------------------------------------------------
|
| 670 |
+
# Arithmetic / string operations
|
| 671 |
+
# ---------------------------------------------------------------------------
|
| 672 |
|
| 673 |
+
def vm_add(var1, var2):
|
| 674 |
+
"""Add two INT variables and store the result in TEMPORARY."""
|
| 675 |
+
get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() + get_var("INT", var2).read())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 677 |
|
| 678 |
+
def vm_sub(var1, var2):
|
| 679 |
+
"""Subtract var2 from var1 and store the result in TEMPORARY."""
|
| 680 |
+
get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() - get_var("INT", var2).read())
|
| 681 |
|
|
|
|
|
|
|
| 682 |
|
| 683 |
+
def vm_mul(var1, var2):
|
| 684 |
+
"""Multiply two INT variables and store the result in TEMPORARY."""
|
| 685 |
+
get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() * get_var("INT", var2).read())
|
| 686 |
|
|
|
|
|
|
|
| 687 |
|
| 688 |
+
def vm_div(var1, var2):
|
| 689 |
+
"""Integer-divide var1 by var2 and store the result in TEMPORARY."""
|
| 690 |
+
if get_var("INT", var2).read() == 0:
|
| 691 |
+
error_handler.div_zero_error(var2)
|
| 692 |
+
get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() // get_var("INT", var2).read())
|
| 693 |
|
|
|
|
|
|
|
| 694 |
|
| 695 |
+
def vm_float_div(var1, var2):
|
| 696 |
+
"""Float-divide var1 by var2 and store the result in TEMPORARY."""
|
| 697 |
+
if get_var("INT", var2).read() == 0:
|
| 698 |
+
error_handler.div_zero_error(var2)
|
| 699 |
+
get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() / get_var("INT", var2).read())
|
| 700 |
|
|
|
|
|
|
|
| 701 |
|
| 702 |
+
def vm_pow(var1, var2):
|
| 703 |
+
"""Raise var1 to the power of var2 and store the result in TEMPORARY."""
|
| 704 |
+
get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() ** get_var("INT", var2).read())
|
|
|
|
|
|
|
| 705 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 706 |
|
| 707 |
+
def vm_mod(var1, var2):
|
| 708 |
+
"""Compute var1 modulo var2 and store the result in TEMPORARY."""
|
| 709 |
+
if get_var("INT", var2).read() == 0:
|
| 710 |
+
error_handler.div_zero_error(var2)
|
| 711 |
+
get_var("INT", "TEMPORARY").force_write(int(get_var("INT", var1).read() % get_var("INT", var2).read()))
|
| 712 |
|
| 713 |
+
|
| 714 |
+
def vm_concat(var1, var2):
|
| 715 |
+
"""Concatenate two STR variables and store the result in TEMPSTRING."""
|
| 716 |
+
get_var("STR", "TEMPSTRING").force_write(get_var("STR", var1).read() + get_var("STR", var2).read())
|
| 717 |
+
|
| 718 |
+
|
| 719 |
+
# ---------------------------------------------------------------------------
|
| 720 |
+
# Explicit-destination ALU operations (4-word dialect)
|
| 721 |
+
# ---------------------------------------------------------------------------
|
| 722 |
+
|
| 723 |
+
def vm_add_to(dest: str, var1: str, var2: str) -> None:
|
| 724 |
+
"""Add two INT variables and store the result in *dest*."""
|
| 725 |
+
get_var("INT", dest).force_write(get_var("INT", var1).read() + get_var("INT", var2).read())
|
| 726 |
+
|
| 727 |
+
|
| 728 |
+
def vm_sub_to(dest: str, var1: str, var2: str) -> None:
|
| 729 |
+
"""Subtract var2 from var1 and store the result in *dest*."""
|
| 730 |
+
get_var("INT", dest).force_write(get_var("INT", var1).read() - get_var("INT", var2).read())
|
| 731 |
+
|
| 732 |
+
|
| 733 |
+
def vm_mul_to(dest: str, var1: str, var2: str) -> None:
|
| 734 |
+
"""Multiply two INT variables and store the result in *dest*."""
|
| 735 |
+
get_var("INT", dest).force_write(get_var("INT", var1).read() * get_var("INT", var2).read())
|
| 736 |
+
|
| 737 |
+
|
| 738 |
+
def vm_div_to(dest: str, var1: str, var2: str) -> None:
|
| 739 |
+
"""Integer-divide var1 by var2 and store the result in *dest*."""
|
| 740 |
+
if get_var("INT", var2).read() == 0:
|
| 741 |
+
error_handler.div_zero_error(var2)
|
| 742 |
+
get_var("INT", dest).force_write(get_var("INT", var1).read() // get_var("INT", var2).read())
|
| 743 |
+
|
| 744 |
+
|
| 745 |
+
def vm_float_div_to(dest: str, var1: str, var2: str) -> None:
|
| 746 |
+
"""Float-divide var1 by var2 and store the result in *dest*."""
|
| 747 |
+
if get_var("INT", var2).read() == 0:
|
| 748 |
+
error_handler.div_zero_error(var2)
|
| 749 |
+
get_var("INT", dest).force_write(get_var("INT", var1).read() / get_var("INT", var2).read())
|
| 750 |
+
|
| 751 |
+
|
| 752 |
+
def vm_pow_to(dest: str, var1: str, var2: str) -> None:
|
| 753 |
+
"""Raise var1 to the power of var2 and store the result in *dest*."""
|
| 754 |
+
get_var("INT", dest).force_write(get_var("INT", var1).read() ** get_var("INT", var2).read())
|
|
|
|
|
|
|
|
|
|
|
|
|
| 755 |
|
| 756 |
|
| 757 |
+
def vm_mod_to(dest: str, var1: str, var2: str) -> None:
|
| 758 |
+
"""Compute var1 modulo var2 and store the result in *dest*."""
|
| 759 |
+
if get_var("INT", var2).read() == 0:
|
| 760 |
+
error_handler.div_zero_error(var2)
|
| 761 |
+
get_var("INT", dest).force_write(int(get_var("INT", var1).read() % get_var("INT", var2).read()))
|
| 762 |
+
|
| 763 |
+
|
| 764 |
+
def vm_concat_to(dest: str, var1: str, var2: str) -> None:
|
| 765 |
+
"""Concatenate two STR variables and store the result in *dest*."""
|
| 766 |
+
get_var("STR", dest).force_write(get_var("STR", var1).read() + get_var("STR", var2).read())
|
| 767 |
+
|
| 768 |
+
|
| 769 |
+
def vm_list_grow(list_name, size_var):
|
| 770 |
+
"""Increase the size of a LIST variable by the value of an INT variable."""
|
| 771 |
+
get_var("LIST", list_name).add_size(get_var("INT", size_var).read())
|
| 772 |
+
|
| 773 |
+
|
| 774 |
+
def vm_assign_list(dest, src):
|
| 775 |
+
"""Copy a LIST variable from src into dest."""
|
| 776 |
+
get_var("LIST", dest).copy_var(get_var("LIST", src))
|
| 777 |
+
|
| 778 |
+
|
| 779 |
+
def vm_assign_str(dest, src):
|
| 780 |
+
"""Copy a STR variable from src into dest."""
|
| 781 |
+
get_var("STR", dest).copy_var(get_var("STR", src))
|
| 782 |
+
|
| 783 |
+
|
| 784 |
+
def vm_assign_int(dest, src):
|
| 785 |
+
"""Copy an INT variable from src into dest."""
|
| 786 |
+
get_var("INT", dest).copy_var(get_var("INT", src))
|
| 787 |
+
|
| 788 |
+
|
| 789 |
+
def vm_pad_str(var_name, num_spaces):
|
| 790 |
+
"""Append a fixed number of blank spaces to a STR variable."""
|
| 791 |
+
get_var("STR", var_name).write(get_var("STR", var_name).read() + ' ' * num_spaces)
|
| 792 |
+
|
| 793 |
+
|
| 794 |
+
def vm_type_to_int(type_str_var, dest_int_var):
|
| 795 |
+
"""Write the integer index of a type-name string variable into an INT variable."""
|
| 796 |
+
lookup = {"INT": 0, "STR": 1, "BOOLEAN": 2, "LIST": 3}
|
| 797 |
+
type_str = get_var('STR', type_str_var).read()
|
| 798 |
+
if type_str in lookup:
|
| 799 |
+
get_var('INT', dest_int_var).write(lookup[type_str])
|
| 800 |
+
else:
|
| 801 |
+
error_handler.type_to_int_error(type_str)
|
| 802 |
+
|
| 803 |
+
|
| 804 |
+
# ---------------------------------------------------------------------------
|
| 805 |
+
# Program-locals save/restore
|
| 806 |
+
# ---------------------------------------------------------------------------
|
| 807 |
+
|
| 808 |
+
def restore_program_locals(global_vars: dict, saved_program_locals: dict):
|
| 809 |
+
"""Restore program-level local variables into the global registry after a function call."""
|
| 810 |
+
global saved_globals
|
| 811 |
+
for var_type in saved_globals:
|
| 812 |
+
for name in saved_globals[var_type]:
|
| 813 |
+
del global_vars[var_type][name]
|
| 814 |
+
saved_globals = {"INT": {}, "STR": {}, "LIST": {}}
|
| 815 |
+
for var_type in saved_program_locals:
|
| 816 |
+
for name in saved_program_locals[var_type]:
|
| 817 |
+
global_vars[var_type][name] = saved_program_locals[var_type][name]
|
| 818 |
+
saved_globals[var_type][name] = saved_program_locals[var_type][name]
|
| 819 |
+
|
| 820 |
+
|
| 821 |
+
# ---------------------------------------------------------------------------
|
| 822 |
+
# Line tick
|
| 823 |
+
# ---------------------------------------------------------------------------
|
| 824 |
+
|
| 825 |
+
def tick_line():
|
| 826 |
+
"""Increment the execution line counter and abort if the line limit is exceeded."""
|
| 827 |
+
global line_count
|
| 828 |
+
line_count += 1
|
| 829 |
+
if line_count == line_limit:
|
| 830 |
+
error_handler.line_limit_error()
|
| 831 |
else:
|
| 832 |
return True
|
| 833 |
|
| 834 |
|
| 835 |
+
# ---------------------------------------------------------------------------
|
| 836 |
+
# Function call enter / exit
|
| 837 |
+
# ---------------------------------------------------------------------------
|
| 838 |
+
|
| 839 |
+
def enter_function_call(input_type, input_var_name, function, output_type, output_var_name, call_line):
|
| 840 |
+
"""Push a new function call frame, execute the function, and copy its return value."""
|
| 841 |
+
set_current_line(call_line)
|
| 842 |
+
global var_registry, function_count, program_locals_stack, current_program_locals
|
| 843 |
+
global program_local_names, current_local_vars, function_limit, current_local_conds
|
| 844 |
|
| 845 |
+
program_locals_stack.push(current_program_locals)
|
| 846 |
+
var_input = get_var(input_type, input_var_name)
|
| 847 |
+
|
| 848 |
+
# Reset program-local slots to unreadable/unwritable defaults
|
| 849 |
+
for slot_type in program_local_names:
|
| 850 |
+
slot_name = program_local_names[slot_type]
|
| 851 |
+
if slot_type == 'STR':
|
| 852 |
+
var_registry[slot_type][slot_name] = Value(slot_name, '', False, False, 'STR')
|
| 853 |
+
elif slot_type == 'INT':
|
| 854 |
+
var_registry[slot_type][slot_name] = Value(slot_name, 0, False, False, 'INT')
|
| 855 |
else:
|
| 856 |
+
var_registry[slot_type][slot_name] = VmList(slot_name, 8, False, False, 'LIST')
|
| 857 |
+
|
| 858 |
+
current_program_locals = {
|
| 859 |
+
"INT": {"LOCALINT": var_registry["INT"]["LOCALINT"]},
|
| 860 |
+
"STR": {"LOCALSTR": var_registry["STR"]["LOCALSTR"]},
|
| 861 |
+
"LIST": {"LOCALLIST": var_registry["LIST"]["LOCALLIST"]},
|
| 862 |
+
}
|
| 863 |
+
|
| 864 |
+
var_registry[input_type][program_local_names[input_type]].copy_var(var_input)
|
| 865 |
+
var_to_send = var_registry[input_type][program_local_names[input_type]]
|
| 866 |
+
|
| 867 |
+
function_count += 1
|
| 868 |
+
if function_count == function_limit:
|
| 869 |
+
error_handler.overflow_error(function_call_stack)
|
| 870 |
else:
|
| 871 |
+
current_local_vars = push_local_vars(var_registry, current_local_vars, local_vars_stack)
|
| 872 |
+
var_to_send.set_readable(True)
|
| 873 |
+
var_to_send.set_writable(True)
|
| 874 |
+
push_local_conds(current_local_conds)
|
| 875 |
+
current_local_conds = {}
|
| 876 |
+
output = get_var(output_type, output_var_name)
|
| 877 |
+
result = function()
|
| 878 |
+
output.copy_var(result)
|
| 879 |
+
tick_line()
|
| 880 |
+
|
| 881 |
+
|
| 882 |
+
def exit_function_call(return_type, return_var_name):
|
| 883 |
+
"""Pop the current function call frame and return the named output variable."""
|
| 884 |
+
global var_registry, program_locals_stack, function_count, current_local_conds
|
| 885 |
+
global current_local_vars, local_vars_stack
|
| 886 |
+
|
| 887 |
+
return_var = get_var(return_type, return_var_name)
|
| 888 |
+
function_count -= 1
|
| 889 |
+
|
| 890 |
+
saved_program_locals = program_locals_stack.pop()
|
| 891 |
+
# Restore program-level globals
|
| 892 |
+
restore_program_locals(var_registry, saved_program_locals)
|
| 893 |
+
# Restore call-level locals
|
| 894 |
+
current_local_vars = pop_local_vars(var_registry)
|
| 895 |
+
# Restore conditions
|
| 896 |
+
current_local_conds = pop_local_conds()
|
| 897 |
+
tick_line()
|
| 898 |
+
return return_var
|
| 899 |
+
|
| 900 |
+
|
| 901 |
+
# ---------------------------------------------------------------------------
|
| 902 |
+
# VM initialisation
|
| 903 |
+
# ---------------------------------------------------------------------------
|
| 904 |
+
|
| 905 |
+
local_vars_stack = Stack() # stack of local variable dicts per function call
|
| 906 |
+
local_vars_stack.push({"INT": {}, "STR": {}, "LIST": {}})
|
| 907 |
+
program_locals_stack = Stack() # stack of program-local variable dicts
|
| 908 |
+
|
| 909 |
+
LOOP_INTEGER = Value(name="LOOPINTEGER", value=0, readable=True, writable=False, type_name="INT")
|
| 910 |
+
LOOP_STRING = Value(name="LOOPSTRING", value="", readable=True, writable=False, type_name="STR")
|
| 911 |
+
LOOP_BOOL = Value(name="LOOPBOOL", value=True, readable=True, writable=False, type_name="BOOL")
|
| 912 |
+
LOOP_LIST = VmList( name="LOOPLIST", size=8, readable=True, writable=False, type_name="LIST")
|
| 913 |
+
TEMPORARY = Value(name="TEMPORARY", value=0, readable=True, writable=True, type_name="INT")
|
| 914 |
+
LOCAL_INT = Value(name="LOCALINT", value=0, readable=False, writable=False, type_name="INT")
|
| 915 |
+
TEMP_STRING = Value(name="TEMPSTRING", value="", readable=True, writable=False, type_name="STR")
|
| 916 |
+
LOCAL_STR = Value(name="LOCALSTR", value="", readable=False, writable=False, type_name="STR")
|
| 917 |
+
LOCAL_LIST = VmList( name="LOCALLIST", size=8, readable=False, writable=False, type_name="LIST")
|
| 918 |
+
|
| 919 |
+
TYPE_INT_VAL = Value(name="INTEGER", value="INT", readable=True, writable=False, type_name="STR")
|
| 920 |
+
TYPE_STR_VAL = Value(name="STRING", value="STR", readable=True, writable=False, type_name="STR")
|
| 921 |
+
TYPE_LIST_VAL = Value(name="LIST", value="LIST", readable=True, writable=False, type_name="STR")
|
| 922 |
+
TYPE_BOOLEAN_VAL = Value(name="BOOLEAN", value="BOOLEAN", readable=True, writable=False, type_name="STR")
|
| 923 |
+
|
| 924 |
+
loop_var_registry = {
|
| 925 |
+
"INT": {"LOOPINTEGER": LOOP_INTEGER},
|
| 926 |
+
"STR": {"LOOPSTRING": LOOP_STRING},
|
| 927 |
+
"LIST": {"LOOPLIST": LOOP_LIST},
|
| 928 |
+
"BOOLEAN": {"LOOPBOOL": LOOP_BOOL},
|
| 929 |
+
}
|
| 930 |
+
loop_var_by_type = {
|
| 931 |
+
"INT": LOOP_INTEGER,
|
| 932 |
+
"STR": LOOP_STRING,
|
| 933 |
+
"LIST": LOOP_LIST,
|
| 934 |
+
"BOOLEAN": LOOP_BOOL,
|
| 935 |
+
}
|
| 936 |
+
|
| 937 |
+
THE_TRUTH = Condition('EQUALS')
|
| 938 |
+
THE_TRUTH.set_left(TEMPORARY)
|
| 939 |
+
THE_TRUTH.set_right(TEMPORARY)
|
| 940 |
+
|
| 941 |
+
var_registry = {
|
| 942 |
+
"INT": {
|
| 943 |
+
"LOOPINTEGER": LOOP_INTEGER,
|
| 944 |
+
"TEMPORARY": TEMPORARY,
|
| 945 |
+
"LOCALINT": LOCAL_INT,
|
| 946 |
+
},
|
| 947 |
+
"STR": {
|
| 948 |
+
"LOOPSTRING": LOOP_STRING,
|
| 949 |
+
"TEMPSTRING": TEMP_STRING,
|
| 950 |
+
"LOCALSTR": LOCAL_STR,
|
| 951 |
+
"INTEGER": TYPE_INT_VAL,
|
| 952 |
+
"STRING": TYPE_STR_VAL,
|
| 953 |
+
"LIST": TYPE_LIST_VAL,
|
| 954 |
+
"BOOLEAN": TYPE_BOOLEAN_VAL,
|
| 955 |
+
},
|
| 956 |
+
"LIST": {
|
| 957 |
+
"LOOPLIST": LOOP_LIST,
|
| 958 |
+
"LOCALLIST": LOCAL_LIST,
|
| 959 |
+
},
|
| 960 |
+
"BOOLEAN": {
|
| 961 |
+
"LOOPBOOL": LOOP_BOOL,
|
| 962 |
+
},
|
| 963 |
+
}
|
| 964 |
+
|
| 965 |
# Number names (ZERO..ONEHUNDRED) are compile-time constants only.
|
| 966 |
# They are resolved to plain integer literals by the compiler (toline/word_to_num)
|
| 967 |
+
# and must NOT live in var_registry["INT"] — that would prevent users from naming
|
| 968 |
# their own variables ONE, ZERO, etc.
|
| 969 |
+
|
| 970 |
+
empty_local_vars = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
|
| 971 |
+
current_local_vars = empty_local_vars.copy()
|
| 972 |
+
current_program_locals = {
|
| 973 |
+
"INT": {"LOCALINT": var_registry["INT"]["LOCALINT"]},
|
| 974 |
+
"STR": {"LOCALSTR": var_registry["STR"]["LOCALSTR"]},
|
| 975 |
+
"LIST": {"LOCALLIST": var_registry["LIST"]["LOCALLIST"]},
|
| 976 |
+
}
|
| 977 |
+
program_local_names = {"INT": "LOCALINT", "STR": "LOCALSTR", "LIST": "LOCALLIST"}
|
| 978 |
+
|
| 979 |
+
local_conds_stack = Stack()
|
| 980 |
+
current_local_conds = {}
|
| 981 |
+
|
| 982 |
+
program_locals_stack.push(current_program_locals)
|
| 983 |
+
|
| 984 |
+
error_handler = ErrorHandler()
|
| 985 |
+
cond_registry['THETRUTH'] = THE_TRUTH
|
language/dialects.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
dialects.py – Dialect normalisation for Tzefa source lines.
|
| 3 |
+
|
| 4 |
+
The canonical internal bytecode is a **4-word tuple**::
|
| 5 |
+
|
| 6 |
+
[VERB, TYPE, ARG1, ARG2]
|
| 7 |
+
|
| 8 |
+
Two source dialects produce these tuples:
|
| 9 |
+
|
| 10 |
+
THREE_WORD – ``OPCODE ARG1 ARG2`` (classic, expanded to 4-word internally)
|
| 11 |
+
FOUR_WORD – ``VERB TYPE ARG1 ARG2`` (verbose, already native)
|
| 12 |
+
|
| 13 |
+
Two casing modes:
|
| 14 |
+
|
| 15 |
+
CAPS_ONLY – every token is UPPERCASE
|
| 16 |
+
MIXED_CASE – commands Titlecase, user vars lowercase; all uppercased internally
|
| 17 |
+
"""
|
| 18 |
+
from __future__ import annotations
|
| 19 |
+
|
| 20 |
+
from typing import Dict, List, Tuple
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# ---------------------------------------------------------------------------
|
| 24 |
+
# Public constants
|
| 25 |
+
# ---------------------------------------------------------------------------
|
| 26 |
+
|
| 27 |
+
THREE_WORD: str = "three_word"
|
| 28 |
+
FOUR_WORD: str = "four_word"
|
| 29 |
+
|
| 30 |
+
CAPS_ONLY: str = "caps_only"
|
| 31 |
+
MIXED_CASE: str = "mixed_case"
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# ---------------------------------------------------------------------------
|
| 35 |
+
# 3-word → 4-word expansion table
|
| 36 |
+
# ---------------------------------------------------------------------------
|
| 37 |
+
# Every classic 3-word opcode maps to a (VERB, TYPE) pair.
|
| 38 |
+
|
| 39 |
+
THREE_TO_FOUR: Dict[str, Tuple[str, str]] = {
|
| 40 |
+
# Variable declarations
|
| 41 |
+
"MAKEINTEGER": ("MAKE", "INTEGER"),
|
| 42 |
+
"MAKESTR": ("MAKE", "STRING"),
|
| 43 |
+
"MAKEBOOLEAN": ("MAKE", "BOOLEAN"),
|
| 44 |
+
"NEWLIST": ("NEW", "LIST"),
|
| 45 |
+
"BASICCONDITION": ("NEW", "CONDITION"),
|
| 46 |
+
|
| 47 |
+
# Assignment / copy
|
| 48 |
+
"ASSSIGNINT": ("SET", "INTEGER"),
|
| 49 |
+
"STRINGASSIGN": ("SET", "STRING"),
|
| 50 |
+
"COPYLIST": ("SET", "LIST"),
|
| 51 |
+
"SETINDEX": ("SET", "INDEX"),
|
| 52 |
+
"LEFTSIDE": ("SET", "LEFT"),
|
| 53 |
+
"RIGHTSIDE": ("SET", "RIGHT"),
|
| 54 |
+
|
| 55 |
+
# Condition
|
| 56 |
+
"CHANGECOMPARE": ("CHANGE", "COMPARE"),
|
| 57 |
+
|
| 58 |
+
# Control flow
|
| 59 |
+
"WHILE": ("WHILE", "CONDITION"),
|
| 60 |
+
"WHILETRUE": ("WHILE", "BOOLEAN"),
|
| 61 |
+
"COMPARE": ("IF", "CONDITION"),
|
| 62 |
+
"IFTRUE": ("IF", "BOOLEAN"),
|
| 63 |
+
"ELSECOMPARE": ("ELIF", "CONDITION"),
|
| 64 |
+
"ELSEIF": ("ELIF", "BOOLEAN"),
|
| 65 |
+
"ITERATE": ("ITERATE", "LIST"),
|
| 66 |
+
|
| 67 |
+
# Print
|
| 68 |
+
"PRINTSTRING": ("PRINT", "STRING"),
|
| 69 |
+
"PRINTINTEGER": ("PRINT", "INTEGER"),
|
| 70 |
+
|
| 71 |
+
# List read
|
| 72 |
+
"GETINTEGER": ("GET", "INTEGER"),
|
| 73 |
+
"GETSTRING": ("GET", "STRING"),
|
| 74 |
+
"GETBOOL": ("GET", "BOOLEAN"),
|
| 75 |
+
"GETLIST": ("GET", "LIST"),
|
| 76 |
+
"GETTYPE": ("GET", "TYPE"),
|
| 77 |
+
"LENGTH": ("GET", "LENGTH"),
|
| 78 |
+
|
| 79 |
+
# List write
|
| 80 |
+
"WRITEINTEGER": ("WRITE", "INTEGER"),
|
| 81 |
+
"WRITESTRING": ("WRITE", "STRING"),
|
| 82 |
+
"WRITEBOOL": ("WRITE", "BOOLEAN"),
|
| 83 |
+
"WRITELIST": ("WRITE", "LIST"),
|
| 84 |
+
|
| 85 |
+
# List resize
|
| 86 |
+
"ADDSIZE": ("ADD", "SIZE"),
|
| 87 |
+
|
| 88 |
+
# String utilities
|
| 89 |
+
"BLANKSPACES": ("PAD", "STRING"),
|
| 90 |
+
|
| 91 |
+
# Type introspection
|
| 92 |
+
"TYPETOINT": ("TYPE", "TOINT"),
|
| 93 |
+
|
| 94 |
+
# Functions
|
| 95 |
+
"INTEGERFUNCTION": ("FUNCTION", "INTEGER"),
|
| 96 |
+
"STRINGFUNCTION": ("FUNCTION", "STRING"),
|
| 97 |
+
"LISTFUNCTION": ("FUNCTION", "LIST"),
|
| 98 |
+
"RETURN": ("RETURN", "VALUE"),
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
# 3-word ALU opcodes that implicitly write to TEMPORARY (or TEMPSTRING for COMBINE).
|
| 102 |
+
# They expand differently: OPCODE A B → [VERB, DEST, A, B]
|
| 103 |
+
_THREE_WORD_ALU: Dict[str, Tuple[str, str]] = {
|
| 104 |
+
"ADDVALUES": ("ADD", "TEMPORARY"),
|
| 105 |
+
"SUBTRACT": ("SUBTRACT", "TEMPORARY"),
|
| 106 |
+
"MULTIPLY": ("MULTIPLY", "TEMPORARY"),
|
| 107 |
+
"DIVIDE": ("DIVIDE", "TEMPORARY"),
|
| 108 |
+
"SIMPLEDIVIDE": ("SIMPLEDIVIDE","TEMPORARY"),
|
| 109 |
+
"MODULO": ("MODULO", "TEMPORARY"),
|
| 110 |
+
"MATHPOW": ("POWER", "TEMPORARY"),
|
| 111 |
+
"COMBINE": ("COMBINE", "TEMPSTRING"),
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
# Reverse lookup: (VERB, DEST) → old 3-word opcode (only for non-ALU ops)
|
| 115 |
+
FOUR_TO_THREE: Dict[Tuple[str, str], str] = {v: k for k, v in THREE_TO_FOUR.items()}
|
| 116 |
+
|
| 117 |
+
# Set of ALU verbs that use the [VERB, DEST, SRC1, SRC2] layout
|
| 118 |
+
ALU_VERBS = frozenset(_THREE_WORD_ALU[k][0] for k in _THREE_WORD_ALU)
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def words_per_line(dialect: str) -> int:
|
| 122 |
+
"""Return the expected token count for the given dialect."""
|
| 123 |
+
return 4 if dialect == FOUR_WORD else 3
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
# ---------------------------------------------------------------------------
|
| 127 |
+
# Normalisation — always produces a 4-word CAPS tuple
|
| 128 |
+
# ---------------------------------------------------------------------------
|
| 129 |
+
|
| 130 |
+
def normalize_line(tokens: List[str], dialect: str, casing: str) -> List[str]:
|
| 131 |
+
"""
|
| 132 |
+
Convert a raw token list into a canonical 4-word UPPERCASE tuple.
|
| 133 |
+
|
| 134 |
+
For most instructions the layout is [VERB, TYPE, ARG1, ARG2].
|
| 135 |
+
For ALU operations the layout is [VERB, DEST, SRC1, SRC2].
|
| 136 |
+
|
| 137 |
+
In the 3-word dialect ALU ops have no explicit dest:
|
| 138 |
+
ADDVALUES A B → [ADD, TEMPORARY, A, B]
|
| 139 |
+
MODULO A B → [MODULO, TEMPORARY, A, B]
|
| 140 |
+
|
| 141 |
+
In the 4-word dialect the dest is already present:
|
| 142 |
+
ADD RESULT A B → [ADD, RESULT, A, B]
|
| 143 |
+
|
| 144 |
+
Returns
|
| 145 |
+
-------
|
| 146 |
+
list[str]
|
| 147 |
+
Exactly 4 UPPERCASE tokens.
|
| 148 |
+
"""
|
| 149 |
+
upper = [t.upper() for t in tokens]
|
| 150 |
+
|
| 151 |
+
if dialect == FOUR_WORD:
|
| 152 |
+
while len(upper) < 4:
|
| 153 |
+
upper.append("")
|
| 154 |
+
return upper[:4]
|
| 155 |
+
|
| 156 |
+
# THREE_WORD → expand to 4-word
|
| 157 |
+
while len(upper) < 3:
|
| 158 |
+
upper.append("")
|
| 159 |
+
upper = upper[:3]
|
| 160 |
+
|
| 161 |
+
opcode, arg1, arg2 = upper[0], upper[1], upper[2]
|
| 162 |
+
|
| 163 |
+
# 3-word ALU: inject implicit dest
|
| 164 |
+
alu = _THREE_WORD_ALU.get(opcode)
|
| 165 |
+
if alu is not None:
|
| 166 |
+
return [alu[0], alu[1], arg1, arg2]
|
| 167 |
+
|
| 168 |
+
# Standard verb+type expansion
|
| 169 |
+
pair = THREE_TO_FOUR.get(opcode)
|
| 170 |
+
if pair is not None:
|
| 171 |
+
return [pair[0], pair[1], arg1, arg2]
|
| 172 |
+
|
| 173 |
+
# Unknown opcode — treat as user-defined function call: FUNCNAME INPUT OUTPUT
|
| 174 |
+
return ["CALL", opcode, arg1, arg2]
|
| 175 |
+
|
language/topy.py
CHANGED
|
@@ -1,454 +1,384 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
for i in range(len(listofvals) - 1):
|
| 4 |
-
stri = stri + " " + str(listofvals[i]) + " " + ","
|
| 5 |
-
stri = stri + " " + str(listofvals[-1]) + " )"
|
| 6 |
-
return stri
|
| 7 |
|
|
|
|
| 8 |
|
| 9 |
-
|
| 10 |
-
return "getvar" + makeparenthasis([tostri(type), tostri(name)]) + ".read()"
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
|
| 14 |
-
infunction = False
|
| 15 |
-
dictoffunct = {i[0]: i for i in [[0]]}
|
| 16 |
-
dictofinstructions = {i: "thetext" for i in dictoffunct}
|
| 17 |
-
listfunctionswithtypes = {i[0]: i for i in [[0]]}
|
| 18 |
-
listfunctionswithtypes["GREATESTDIV"] = ["GREATESTDIV", "LIST", "LIST"]
|
| 19 |
-
for i in listfunctionswithtypes:
|
| 20 |
-
for j in range(len(listfunctionswithtypes[i])):
|
| 21 |
-
if (listfunctionswithtypes[i][j] == "BOOL"):
|
| 22 |
-
listfunctionswithtypes[i][j] = "BOOLEAN"
|
| 23 |
|
| 24 |
-
listofindentchanges = [0 for i in range(1, 1000 + 1)]
|
| 25 |
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
listfunctionswithtypes = {i[0]: i for i in listfunctions}
|
| 31 |
|
|
|
|
| 32 |
|
| 33 |
-
|
| 34 |
-
return "'" + str(value) + "'"
|
| 35 |
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
if (infunction):
|
| 41 |
-
declarestr = "addlocalvar" + inparan
|
| 42 |
-
else:
|
| 43 |
-
declarestr = "addvar" + inparan
|
| 44 |
-
stri = "line(" + str(linenum) + ")" + "; " + declarestr + "; " + lineupdate
|
| 45 |
-
return stri
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
def MAKESTR(name, value, linenum):
|
| 49 |
-
global infunction
|
| 50 |
-
inparan = makeparenthasis(['"STR"', tostri(name), "'" + str(value) + "'"])
|
| 51 |
-
if (infunction):
|
| 52 |
-
declarestr = "addlocalvar" + inparan
|
| 53 |
-
else:
|
| 54 |
-
declarestr = "addvar" + inparan
|
| 55 |
-
stri = "line(" + str(linenum) + ")" + "; " + declarestr + "; " + lineupdate
|
| 56 |
-
return stri
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
def MAKEBOOLEAN(name, value, linenum):
|
| 60 |
-
global infunction
|
| 61 |
-
if value == "TRUE":
|
| 62 |
-
value = "True"
|
| 63 |
-
elif value == "FALSE":
|
| 64 |
-
value = "False"
|
| 65 |
-
inparan = makeparenthasis(['"BOOLEAN"', tostri(name), value])
|
| 66 |
-
if (infunction):
|
| 67 |
-
declarestr = "addlocalvar" + inparan
|
| 68 |
-
else:
|
| 69 |
-
declarestr = "addvar" + inparan
|
| 70 |
-
stri = "line(" + str(linenum) + ")" + "; " + declarestr + "; " + lineupdate
|
| 71 |
-
return stri
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
def NEWLIST(name, value, linenum):
|
| 76 |
-
global infunction
|
| 77 |
-
# value is already a plain integer string (e.g. '6') resolved at compile time
|
| 78 |
-
inparan = makeparenthasis(['"LIST"', tostri(name), str(int(value))])
|
| 79 |
-
if (infunction):
|
| 80 |
-
declarestr = "addlocalvar" + inparan
|
| 81 |
-
else:
|
| 82 |
-
declarestr = "addvar" + inparan
|
| 83 |
-
stri = "line(" + str(linenum) + ")" + "; " + declarestr + "; " + lineupdate
|
| 84 |
-
return stri
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
def BASICCONDITION(name, compare, linenum):
|
| 88 |
-
global infunction
|
| 89 |
-
if (infunction == False):
|
| 90 |
-
declarestr = "addcond" + makeparenthasis([tostri(name), tostri(compare)])
|
| 91 |
-
else:
|
| 92 |
-
declarestr = "addlocalcond" + makeparenthasis([tostri(name), tostri(compare)])
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
|
|
|
| 96 |
|
| 97 |
|
| 98 |
-
def
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
"getcond" + makeparenthasis([tostri(name)]) + ".changeleft(" + thegetvar + ")" + "; " + lineupdate
|
| 102 |
|
| 103 |
-
return (stri)
|
| 104 |
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
-
def RIGHTSIDE(name, othername, linenum):
|
| 107 |
-
thegetvar = "getvar" + makeparenthasis(['"INT"', tostri(othername)])
|
| 108 |
-
stri = "line(" + str(linenum) + ")" + "; " + \
|
| 109 |
-
"getcond" + makeparenthasis([tostri(name)]) + ".changeright(" + thegetvar + ")" + "; " + lineupdate
|
| 110 |
-
return (stri)
|
| 111 |
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
-
def CHANGECOMPARE(name, valuecompare, linenum):
|
| 114 |
-
stri = "line(" + str(linenum) + ")" + "; " + \
|
| 115 |
-
"getcond" + makeparenthasis([tostri(name)]) + ".changecompare(" + tostri(
|
| 116 |
-
valuecompare) + ")" + "; " + lineupdate
|
| 117 |
-
return (stri)
|
| 118 |
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
-
def WHILE(compare, endline, linenum):
|
| 121 |
-
global listofindentchanges
|
| 122 |
-
lineofwhile = "while" + makeparenthasis(["line(" + str(linenum) + ") and " + (
|
| 123 |
-
"getcond" + makeparenthasis([tostri(compare)])) + ".giveresult() and endline()"]) + ":"
|
| 124 |
-
listofindentchanges[linenum + 1] = 1
|
| 125 |
-
listofindentchanges[int(endline) + 1] = -1
|
| 126 |
-
return (lineofwhile)
|
| 127 |
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
-
def
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
listofindentchanges[linenum + 1] = 1
|
| 133 |
-
listofindentchanges[int(endline) + 1] = -1
|
| 134 |
-
return lineofwhile
|
| 135 |
|
| 136 |
|
| 137 |
-
def
|
| 138 |
-
|
| 139 |
-
lineofwhile = "if" + makeparenthasis(["line(" + str(linenum) + ") and " + (
|
| 140 |
-
"getcond" + makeparenthasis([tostri(compare)])) + ".giveresult() and endline()"]) + ":"
|
| 141 |
-
listofindentchanges[linenum + 1] = 1
|
| 142 |
-
listofindentchanges[int(endline) + 1] = -1
|
| 143 |
-
return (lineofwhile)
|
| 144 |
|
| 145 |
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
"getcond" + makeparenthasis([tostri(compare)])) + ".giveresult() and endline()"]) + ":"
|
| 150 |
-
listofindentchanges[linenum + 1] = 1
|
| 151 |
-
listofindentchanges[int(endline) + 1] = -1
|
| 152 |
-
return (lineofwhile)
|
| 153 |
|
|
|
|
| 154 |
|
| 155 |
-
def
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
|
| 164 |
-
|
| 165 |
-
global listofindentchanges
|
| 166 |
-
lineofwhile = "if" + makeparenthasis(["line(" + str(linenum) + ") and " + (
|
| 167 |
-
"getvar('BOOLEAN'," + tostri(bool) + ").read() " + "and endline()")]) + ":"
|
| 168 |
-
listofindentchanges[linenum + 1] = 1
|
| 169 |
-
listofindentchanges[int(endline) + 1] = -1
|
| 170 |
-
return (lineofwhile)
|
| 171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
-
def ELSEIF(bool, endline, linenum):
|
| 174 |
-
global listofindentchanges
|
| 175 |
-
lineofwhile = "elif" + makeparenthasis(["line(" + str(linenum) + ") and " + (
|
| 176 |
-
"getvar('BOOLEAN'," + tostri(bool) + ").read() " + "and endline()")]) + ":"
|
| 177 |
-
listofindentchanges[linenum + 1] = 1
|
| 178 |
-
listofindentchanges[int(endline) + 1] = -1
|
| 179 |
-
return (lineofwhile)
|
| 180 |
|
|
|
|
| 181 |
|
| 182 |
-
def
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
thetype = "INT"
|
| 186 |
-
listofindentchanges[linenum + 1] = 1
|
| 187 |
-
return "def " + name + "" + '():'
|
| 188 |
|
| 189 |
|
| 190 |
-
|
| 191 |
-
global thetype, infunction
|
| 192 |
-
infunction = True
|
| 193 |
-
thetype = "STR"
|
| 194 |
-
listofindentchanges[linenum + 1] = 1
|
| 195 |
-
return "def " + name + "" + '():'
|
| 196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
-
def LISTFUNCTION(name, type, linenum):
|
| 199 |
-
global thetype, infunction
|
| 200 |
-
infunction = True
|
| 201 |
-
thetype = "LIST"
|
| 202 |
-
listofindentchanges[linenum + 1] = 1
|
| 203 |
-
return "def " + name + "" + '():'
|
| 204 |
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
infunction = False
|
| 211 |
-
return ("line(" + str(linenum) + "); " + "return(updatelineexitingcall" + makeparenthasis(
|
| 212 |
-
[tostri(thetype), tostri(name)]) + ")")
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
def PRINTSTRING(name, state, linenum):
|
| 216 |
-
if (state == "BREAK"):
|
| 217 |
-
state = "True"
|
| 218 |
else:
|
| 219 |
-
|
| 220 |
-
return "
|
| 221 |
|
| 222 |
|
| 223 |
-
def
|
| 224 |
-
|
| 225 |
-
|
|
|
|
|
|
|
| 226 |
else:
|
| 227 |
-
|
| 228 |
-
return "
|
| 229 |
-
name) + ")," + state + "); " + "endline()"
|
| 230 |
|
| 231 |
|
| 232 |
-
def
|
| 233 |
-
|
| 234 |
-
|
|
|
|
| 235 |
|
| 236 |
|
| 237 |
-
|
| 238 |
-
name = tostri(name)
|
| 239 |
-
listname = tostri(listname)
|
| 240 |
-
return ("line(" + str(
|
| 241 |
-
linenum) + ");getvar('STR'," + name + ").copyvar(getvar('LIST'," + listname + ").read()); endline()")
|
| 242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
-
def GETINTEGER(listname, name, linenum):
|
| 245 |
-
name = tostri(name)
|
| 246 |
-
listname = tostri(listname)
|
| 247 |
-
return ("line(" + str(
|
| 248 |
-
linenum) + ");getvar('INT'," + name + ").copyvar(getvar('LIST'," + listname + ").read()); endline()")
|
| 249 |
|
|
|
|
| 250 |
|
| 251 |
-
|
| 252 |
-
name = tostri(name)
|
| 253 |
-
listname = tostri(listname)
|
| 254 |
-
return ("line(" + str(
|
| 255 |
-
linenum) + ");getvar('LIST'," + name + ").copyvar(getvar('LIST'," + listname + ").read()); endline()")
|
| 256 |
|
| 257 |
|
| 258 |
-
def
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
|
|
|
|
|
|
| 263 |
|
| 264 |
|
| 265 |
-
|
| 266 |
-
name = tostri(name)
|
| 267 |
-
listname = tostri(listname)
|
| 268 |
-
return ("line(" + str(linenum) + ");getvar('LIST'," + listname + ") .placevalue(" + name + ',"STR"'"); endline()")
|
| 269 |
|
|
|
|
| 270 |
|
| 271 |
-
def WRITEINTEGER(listname, name, linenum):
|
| 272 |
-
name = tostri(name)
|
| 273 |
-
listname = tostri(listname)
|
| 274 |
-
return ("line(" + str(linenum) + ");getvar('LIST'," + listname + ") .placevalue(" + name + ',"INT"'"); endline()")
|
| 275 |
|
|
|
|
|
|
|
|
|
|
| 276 |
|
| 277 |
-
def WRITEBOOL(listname, name, linenum):
|
| 278 |
-
name = tostri(name)
|
| 279 |
-
listname = tostri(listname)
|
| 280 |
-
return ("line(" + str(
|
| 281 |
-
linenum) + ");getvar('LIST'," + listname + ") .placevalue(" + name + ',"BOOLEAN"'"); endline()")
|
| 282 |
|
|
|
|
| 283 |
|
| 284 |
-
def
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
|
|
|
|
|
|
| 288 |
|
| 289 |
|
| 290 |
-
|
| 291 |
-
strname = tostri(strname)
|
| 292 |
-
listname = tostri(listname)
|
| 293 |
-
return ("line(" + str(
|
| 294 |
-
linenum) + ");getvar('STR'," + strname + ").write(getvar('LIST'," + listname + ").returntype()); endline()")
|
| 295 |
|
|
|
|
|
|
|
| 296 |
|
| 297 |
-
def LENGTH(listname, intname, linenum):
|
| 298 |
-
intname = tostri(intname)
|
| 299 |
-
listname = tostri(listname)
|
| 300 |
-
return ("line(" + str(
|
| 301 |
-
linenum) + ");getvar('INT'," + intname + ").write(getvar('LIST'," + listname + ").getsize()); endline()")
|
| 302 |
|
|
|
|
|
|
|
| 303 |
|
| 304 |
-
def ADDVALUES(vali, vali2, linenum):
|
| 305 |
-
return ("line(" + str(linenum) + "); " + "add" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
|
| 306 |
|
|
|
|
|
|
|
| 307 |
|
| 308 |
-
def MULTIPLY(vali, vali2, linenum):
|
| 309 |
-
return ("line(" + str(linenum) + "); " + "mult" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
|
| 310 |
|
|
|
|
|
|
|
| 311 |
|
| 312 |
-
def MATHPOW(vali, vali2, linenum):
|
| 313 |
-
return ("line(" + str(linenum) + "); " + "pow" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
|
| 314 |
|
|
|
|
|
|
|
| 315 |
|
| 316 |
-
def DIVIDE(vali, vali2, linenum):
|
| 317 |
-
return ("line(" + str(linenum) + "); " + "betterdiv" + makeparenthasis(
|
| 318 |
-
[tostri(vali), tostri(vali2)]) + "; endline()")
|
| 319 |
|
|
|
|
|
|
|
| 320 |
|
| 321 |
-
def SIMPLEDIVIDE(vali, vali2, linenum):
|
| 322 |
-
return ("line(" + str(linenum) + "); " + "div" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
|
| 323 |
|
|
|
|
|
|
|
| 324 |
|
| 325 |
-
def SUBTRACT(vali, vali2, linenum):
|
| 326 |
-
return ("line(" + str(linenum) + "); " + "dec" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
|
| 327 |
|
|
|
|
| 328 |
|
| 329 |
-
def
|
| 330 |
-
return (
|
| 331 |
|
| 332 |
|
| 333 |
-
|
| 334 |
-
return ("line(" + str(linenum) + "); " + "comb" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
|
| 335 |
|
|
|
|
|
|
|
| 336 |
|
| 337 |
-
def ADDSIZE(vali, vali2, linenum):
|
| 338 |
-
return ("line(" + str(linenum) + "); " + "addsize" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
|
| 339 |
|
|
|
|
| 340 |
|
| 341 |
-
def
|
| 342 |
-
|
| 343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
|
| 345 |
|
| 346 |
-
|
| 347 |
-
return ("line(" + str(linenum) + "); " + "assignstr" + makeparenthasis(
|
| 348 |
-
[tostri(vali), tostri(vali2)]) + "; endline()")
|
| 349 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
|
| 351 |
-
def COPYLIST(vali, vali2, linenum):
|
| 352 |
-
return ("line(" + str(linenum) + "); " + "assignlist" + makeparenthasis(
|
| 353 |
-
[tostri(vali), tostri(vali2)]) + "; endline()")
|
| 354 |
|
|
|
|
| 355 |
|
| 356 |
-
def
|
| 357 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
|
| 359 |
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
|
| 365 |
-
dictofinstructions["MAKEINTEGER"] = MAKEINTEGER
|
| 366 |
-
dictofinstructions["MAKESTR"] = MAKESTR
|
| 367 |
-
dictofinstructions["MAKEBOOLEAN"] = MAKEBOOLEAN
|
| 368 |
-
dictofinstructions["NEWLIST"] = NEWLIST
|
| 369 |
-
dictofinstructions["BASICCONDITION"] = BASICCONDITION
|
| 370 |
-
dictofinstructions["LEFTSIDE"] = LEFTSIDE
|
| 371 |
-
dictofinstructions["RIGHTSIDE"] = RIGHTSIDE
|
| 372 |
-
dictofinstructions["CHANGECOMPARE"] = CHANGECOMPARE
|
| 373 |
-
dictofinstructions["WHILE"] = WHILE
|
| 374 |
-
dictofinstructions["ITERATE"] = ITERATE
|
| 375 |
-
dictofinstructions["COMPARE"] = COMPARE
|
| 376 |
-
dictofinstructions["ELSECOMPARE"] = ELSECOMPARE
|
| 377 |
-
dictofinstructions["WHILETRUE"] = WHILETRUE
|
| 378 |
-
dictofinstructions["IFTRUE"] = IFTRUE
|
| 379 |
-
dictofinstructions["ELSEIF"] = ELSEIF
|
| 380 |
-
dictofinstructions["SETINDEX"] = SETINDEX
|
| 381 |
-
dictofinstructions["INTEGERFUNCTION"] = INTEGERFUNCTION
|
| 382 |
-
dictofinstructions["STRINGFUNCTION"] = STRINGFUNCTION
|
| 383 |
-
dictofinstructions["LISTFUNCTION"] = LISTFUNCTION
|
| 384 |
-
dictofinstructions["PRINTSTRING"] = PRINTSTRING
|
| 385 |
-
dictofinstructions["PRINTINTEGER"] = PRINTINTEGER
|
| 386 |
-
dictofinstructions["GETSTRING"] = GETSTRING
|
| 387 |
-
dictofinstructions["GETINTEGER"] = GETINTEGER
|
| 388 |
-
dictofinstructions["GETLIST"] = GETLIST
|
| 389 |
-
dictofinstructions["GETBOOL"] = GETBOOL
|
| 390 |
-
dictofinstructions["WRITESTRING"] = WRITESTRING
|
| 391 |
-
dictofinstructions["WRITEINTEGER"] = WRITEINTEGER
|
| 392 |
-
dictofinstructions["WRITEBOOL"] = WRITEBOOL
|
| 393 |
-
dictofinstructions["WRITELIST"] = WRITELIST
|
| 394 |
-
dictofinstructions["GETTYPE"] = GETTYPE
|
| 395 |
-
dictofinstructions["LENGTH"] = LENGTH
|
| 396 |
-
dictofinstructions["ASSSIGNINT"] = ASSSIGNINT
|
| 397 |
-
dictofinstructions["ADDSIZE"] = ADDSIZE
|
| 398 |
-
dictofinstructions["STRINGASSIGN"] = STRINGASSIGN
|
| 399 |
-
dictofinstructions["COPYLIST"] = COPYLIST
|
| 400 |
-
dictofinstructions["ADDVALUES"] = ADDVALUES
|
| 401 |
-
dictofinstructions["MULTIPLY"] = MULTIPLY
|
| 402 |
-
dictofinstructions["MATHPOW"] = MATHPOW
|
| 403 |
-
dictofinstructions["DIVIDE"] = DIVIDE
|
| 404 |
-
dictofinstructions["SIMPLEDIVIDE"] = SIMPLEDIVIDE
|
| 405 |
-
dictofinstructions["SUBTRACT"] = SUBTRACT
|
| 406 |
-
dictofinstructions["MODULO"] = MODULO
|
| 407 |
-
dictofinstructions["COMBINE"] = COMBINE
|
| 408 |
-
dictofinstructions["BLANKSPACES"] = BLANKSPACES
|
| 409 |
-
dictofinstructions["RETURN"] = RETURN
|
| 410 |
-
dictofinstructions["TYPETOINT"] = TYPETOINT
|
| 411 |
|
|
|
|
|
|
|
|
|
|
| 412 |
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
|
| 422 |
|
| 423 |
-
def
|
|
|
|
| 424 |
from pathlib import Path
|
| 425 |
|
| 426 |
-
|
| 427 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
f.write("from Tzefa_Language.createdpython import *\n")
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
topy.py – Tzefa IR → Python code generator.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
+
The bytecode is a 4-element tuple::
|
| 5 |
|
| 6 |
+
[VERB, TYPE, ARG1, ARG2]
|
|
|
|
| 7 |
|
| 8 |
+
Each handler receives (verb, type_word, arg1, arg2, line_num) and returns a
|
| 9 |
+
Python source-code string that is later assembled by make_py_file().
|
| 10 |
+
"""
|
| 11 |
+
from __future__ import annotations
|
| 12 |
|
| 13 |
+
from typing import Any, Callable, Dict, List, Tuple
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
|
|
|
| 15 |
|
| 16 |
+
# ---------------------------------------------------------------------------
|
| 17 |
+
# Globals
|
| 18 |
+
# ---------------------------------------------------------------------------
|
| 19 |
|
| 20 |
+
_TICK: str = "tick_line() ;"
|
| 21 |
+
_in_function: bool = False
|
| 22 |
+
_current_return_type: str = ""
|
|
|
|
| 23 |
|
| 24 |
+
_user_functions: Dict[str, List[str]] = {}
|
| 25 |
|
| 26 |
+
_indent_changes: List[int] = [0] * 1001
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
+
# ---------------------------------------------------------------------------
|
| 30 |
+
# Tiny code-gen helpers
|
| 31 |
+
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
def _args(*values: Any) -> str:
|
| 34 |
+
"""Parenthesised, comma-separated argument list."""
|
| 35 |
+
return "( " + ", ".join(str(v) for v in values) + " )" if values else "()"
|
| 36 |
|
| 37 |
|
| 38 |
+
def _q(value: Any) -> str:
|
| 39 |
+
"""Single-quote a value for generated code."""
|
| 40 |
+
return f"'{value}'"
|
|
|
|
| 41 |
|
|
|
|
| 42 |
|
| 43 |
+
def _gv(var_type: str, name: str) -> str:
|
| 44 |
+
"""get_var() call expression."""
|
| 45 |
+
return f"get_var({_q(var_type)}, {_q(name)})"
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
+
def _lp(n: int) -> str:
|
| 49 |
+
"""set_current_line() prefix."""
|
| 50 |
+
return f"set_current_line({n})"
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
+
def _stmt(line_num: int, *parts: str) -> str:
|
| 54 |
+
"""Standard statement: set_current_line; body; tick_line."""
|
| 55 |
+
return f"{_lp(line_num)}; " + "; ".join(parts) + f"; {_TICK}"
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
+
# ---------------------------------------------------------------------------
|
| 59 |
+
# Register user-defined functions (called by ErrorCorrection after parsing)
|
| 60 |
+
# ---------------------------------------------------------------------------
|
| 61 |
|
| 62 |
+
def register_user_function(name: str, input_type: str, output_type: str) -> None:
|
| 63 |
+
"""Register a user-defined function so the code generator can emit calls."""
|
| 64 |
+
_user_functions[name] = [name, input_type, output_type]
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
|
| 67 |
+
def get_user_functions() -> Dict[str, List[str]]:
|
| 68 |
+
return _user_functions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
|
| 71 |
+
# ---------------------------------------------------------------------------
|
| 72 |
+
# Handlers — each takes (type_word, arg1, arg2, line_num) -> str
|
| 73 |
+
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
+
# -- MAKE: declare variables -----------------------------------------------
|
| 76 |
|
| 77 |
+
def _make(type_word: str, arg1: str, arg2: str, ln: int) -> str:
|
| 78 |
+
call = "add_local_var" if _in_function else "add_var"
|
| 79 |
+
call_c = "add_local_cond" if _in_function else "add_cond"
|
| 80 |
+
if type_word == "BOOLEAN":
|
| 81 |
+
val = "True" if arg2 == "TRUE" else ("False" if arg2 == "FALSE" else arg2)
|
| 82 |
+
return _stmt(ln, f"{call}{_args(_q('BOOLEAN'), _q(arg1), val)}")
|
| 83 |
+
if type_word == "STRING":
|
| 84 |
+
return _stmt(ln, f"{call}{_args(_q('STR'), _q(arg1), _q(arg2))}")
|
| 85 |
+
if type_word == "INTEGER":
|
| 86 |
+
return _stmt(ln, f"{call}{_args(_q('INT'), _q(arg1), arg2)}")
|
| 87 |
+
if type_word == "LIST":
|
| 88 |
+
return _stmt(ln, f"{call}{_args(_q('LIST'), _q(arg1), int(arg2))}")
|
| 89 |
+
if type_word == "CONDITION":
|
| 90 |
+
return _stmt(ln, f"{call_c}{_args(_q(arg1), _q(arg2))}")
|
| 91 |
+
return ""
|
| 92 |
|
| 93 |
|
| 94 |
+
# -- SET: assignment / index / condition sides -----------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
+
def _set(type_word: str, arg1: str, arg2: str, ln: int) -> str:
|
| 97 |
+
if type_word == "INTEGER":
|
| 98 |
+
return _stmt(ln, f"vm_assign_int{_args(_q(arg1), _q(arg2))}")
|
| 99 |
+
if type_word == "STRING":
|
| 100 |
+
return _stmt(ln, f"vm_assign_str{_args(_q(arg1), _q(arg2))}")
|
| 101 |
+
if type_word == "LIST":
|
| 102 |
+
return _stmt(ln, f"vm_assign_list{_args(_q(arg1), _q(arg2))}")
|
| 103 |
+
if type_word == "INDEX":
|
| 104 |
+
return _stmt(ln, f"get_var('LIST',{_q(arg1)}).change_index({int(arg2)})")
|
| 105 |
+
if type_word == "LEFT":
|
| 106 |
+
return _stmt(ln, f"get_cond({_q(arg1)}).set_left({_gv('INT', arg2)})")
|
| 107 |
+
if type_word == "RIGHT":
|
| 108 |
+
return _stmt(ln, f"get_cond({_q(arg1)}).set_right({_gv('INT', arg2)})")
|
| 109 |
+
return ""
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
+
# -- CHANGE ----------------------------------------------------------------
|
| 113 |
|
| 114 |
+
def _change(type_word: str, arg1: str, arg2: str, ln: int) -> str:
|
| 115 |
+
# Only COMPARE for now
|
| 116 |
+
return _stmt(ln, f"get_cond({_q(arg1)}).set_compare({_q(arg2)})")
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
|
| 119 |
+
# -- Control flow ----------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
+
def _while(type_word: str, arg1: str, arg2: str, ln: int) -> str:
|
| 122 |
+
_indent_changes[ln + 1] = 1
|
| 123 |
+
_indent_changes[int(arg2) + 1] = -1
|
| 124 |
+
if type_word == "CONDITION":
|
| 125 |
+
guard = f"set_current_line({ln}) and get_cond({_q(arg1)}).evaluate() and tick_line()"
|
| 126 |
+
else: # BOOLEAN
|
| 127 |
+
guard = f"set_current_line({ln}) and get_var('BOOLEAN',{_q(arg1)}).read() and tick_line()"
|
| 128 |
+
return f"while( {guard} ):"
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
+
def _if(type_word: str, arg1: str, arg2: str, ln: int) -> str:
|
| 132 |
+
_indent_changes[ln + 1] = 1
|
| 133 |
+
_indent_changes[int(arg2) + 1] = -1
|
| 134 |
+
if type_word == "CONDITION":
|
| 135 |
+
guard = f"set_current_line({ln}) and get_cond({_q(arg1)}).evaluate() and tick_line()"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
else:
|
| 137 |
+
guard = f"set_current_line({ln}) and get_var('BOOLEAN',{_q(arg1)}).read() and tick_line()"
|
| 138 |
+
return f"if( {guard} ):"
|
| 139 |
|
| 140 |
|
| 141 |
+
def _elif(type_word: str, arg1: str, arg2: str, ln: int) -> str:
|
| 142 |
+
_indent_changes[ln + 1] = 1
|
| 143 |
+
_indent_changes[int(arg2) + 1] = -1
|
| 144 |
+
if type_word == "CONDITION":
|
| 145 |
+
guard = f"set_current_line({ln}) and get_cond({_q(arg1)}).evaluate() and tick_line()"
|
| 146 |
else:
|
| 147 |
+
guard = f"set_current_line({ln}) and get_var('BOOLEAN',{_q(arg1)}).read() and tick_line()"
|
| 148 |
+
return f"elif( {guard} ):"
|
|
|
|
| 149 |
|
| 150 |
|
| 151 |
+
def _iterate(type_word: str, arg1: str, arg2: str, ln: int) -> str:
|
| 152 |
+
_indent_changes[ln + 1] = 1
|
| 153 |
+
_indent_changes[int(arg2) + 1] = -1
|
| 154 |
+
return f"for i in vm_loop_list({_gv('LIST', arg1)}, {ln}):"
|
| 155 |
|
| 156 |
|
| 157 |
+
# -- PRINT -----------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
+
def _print(type_word: str, arg1: str, arg2: str, ln: int) -> str:
|
| 160 |
+
vm_type = "STR" if type_word == "STRING" else "INT"
|
| 161 |
+
newline = "True" if arg2 == "BREAK" else "False"
|
| 162 |
+
return _stmt(ln, f"vm_print(get_var({_q(vm_type)},{_q(arg1)}),{newline})")
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
+
# -- GET: read from list ---------------------------------------------------
|
| 166 |
|
| 167 |
+
_GET_TYPE_MAP = {"INTEGER": "INT", "STRING": "STR", "BOOLEAN": "BOOLEAN", "LIST": "LIST"}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
|
| 170 |
+
def _get(type_word: str, arg1: str, arg2: str, ln: int) -> str:
|
| 171 |
+
if type_word == "TYPE":
|
| 172 |
+
return _stmt(ln, f"get_var('STR',{_q(arg2)}).write(get_var('LIST',{_q(arg1)}).read_type())")
|
| 173 |
+
if type_word == "LENGTH":
|
| 174 |
+
return _stmt(ln, f"get_var('INT',{_q(arg2)}).write(get_var('LIST',{_q(arg1)}).get_size())")
|
| 175 |
+
vm = _GET_TYPE_MAP[type_word]
|
| 176 |
+
return _stmt(ln, f"get_var({_q(vm)},{_q(arg2)}).copy_var(get_var('LIST',{_q(arg1)}).read())")
|
| 177 |
|
| 178 |
|
| 179 |
+
# -- WRITE: write to list --------------------------------------------------
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
+
_WRITE_TYPE_MAP = {"INTEGER": "INT", "STRING": "STR", "BOOLEAN": "BOOLEAN", "LIST": "LIST"}
|
| 182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
+
def _write(type_word: str, arg1: str, arg2: str, ln: int) -> str:
|
| 185 |
+
vm = _WRITE_TYPE_MAP[type_word]
|
| 186 |
+
return _stmt(ln, f"get_var('LIST',{_q(arg1)}).place_value({_q(arg2)},\"{vm}\")")
|
| 187 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
|
| 189 |
+
# -- ADD (dual purpose: list resize / arithmetic with explicit dest) --------
|
| 190 |
|
| 191 |
+
def _add(dest: str, src1: str, src2: str, ln: int) -> str:
|
| 192 |
+
if dest == "SIZE":
|
| 193 |
+
# ADD SIZE listname int_amount (list resize — dest is literally "SIZE")
|
| 194 |
+
return _stmt(ln, f"vm_list_grow{_args(_q(src1), _q(src2))}")
|
| 195 |
+
# ADD DEST SRC1 SRC2
|
| 196 |
+
return _stmt(ln, f"vm_add_to{_args(_q(dest), _q(src1), _q(src2))}")
|
| 197 |
|
| 198 |
|
| 199 |
+
# -- Arithmetic verbs — all take (dest, src1, src2, ln) --------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
|
| 201 |
+
def _subtract(dest: str, src1: str, src2: str, ln: int) -> str:
|
| 202 |
+
return _stmt(ln, f"vm_sub_to{_args(_q(dest), _q(src1), _q(src2))}")
|
| 203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
+
def _multiply(dest: str, src1: str, src2: str, ln: int) -> str:
|
| 206 |
+
return _stmt(ln, f"vm_mul_to{_args(_q(dest), _q(src1), _q(src2))}")
|
| 207 |
|
|
|
|
|
|
|
| 208 |
|
| 209 |
+
def _divide(dest: str, src1: str, src2: str, ln: int) -> str:
|
| 210 |
+
return _stmt(ln, f"vm_float_div_to{_args(_q(dest), _q(src1), _q(src2))}")
|
| 211 |
|
|
|
|
|
|
|
| 212 |
|
| 213 |
+
def _simpledivide(dest: str, src1: str, src2: str, ln: int) -> str:
|
| 214 |
+
return _stmt(ln, f"vm_div_to{_args(_q(dest), _q(src1), _q(src2))}")
|
| 215 |
|
|
|
|
|
|
|
| 216 |
|
| 217 |
+
def _modulo(dest: str, src1: str, src2: str, ln: int) -> str:
|
| 218 |
+
return _stmt(ln, f"vm_mod_to{_args(_q(dest), _q(src1), _q(src2))}")
|
| 219 |
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
+
def _power(dest: str, src1: str, src2: str, ln: int) -> str:
|
| 222 |
+
return _stmt(ln, f"vm_pow_to{_args(_q(dest), _q(src1), _q(src2))}")
|
| 223 |
|
|
|
|
|
|
|
| 224 |
|
| 225 |
+
def _combine(dest: str, src1: str, src2: str, ln: int) -> str:
|
| 226 |
+
return _stmt(ln, f"vm_concat_to{_args(_q(dest), _q(src1), _q(src2))}")
|
| 227 |
|
|
|
|
|
|
|
| 228 |
|
| 229 |
+
# -- PAD -------------------------------------------------------------------
|
| 230 |
|
| 231 |
+
def _pad(type_word: str, arg1: str, arg2: str, ln: int) -> str:
|
| 232 |
+
return _stmt(ln, f"vm_pad_str{_args(_q(arg1), arg2)}")
|
| 233 |
|
| 234 |
|
| 235 |
+
# -- TYPE ------------------------------------------------------------------
|
|
|
|
| 236 |
|
| 237 |
+
def _type(type_word: str, arg1: str, arg2: str, ln: int) -> str:
|
| 238 |
+
return _stmt(ln, f"vm_type_to_int{_args(_q(arg1), _q(arg2))}")
|
| 239 |
|
|
|
|
|
|
|
| 240 |
|
| 241 |
+
# -- FUNCTION: define ------------------------------------------------------
|
| 242 |
|
| 243 |
+
def _function(type_word: str, arg1: str, arg2: str, ln: int) -> str:
|
| 244 |
+
global _in_function, _current_return_type
|
| 245 |
+
_in_function = True
|
| 246 |
+
type_map = {"INTEGER": "INT", "STRING": "STR", "LIST": "LIST"}
|
| 247 |
+
_current_return_type = type_map.get(type_word, "INT")
|
| 248 |
+
_indent_changes[ln + 1] = 1
|
| 249 |
+
return f"def {arg1}():"
|
| 250 |
|
| 251 |
|
| 252 |
+
# -- RETURN ----------------------------------------------------------------
|
|
|
|
|
|
|
| 253 |
|
| 254 |
+
def _return(type_word: str, arg1: str, arg2: str, ln: int) -> str:
|
| 255 |
+
global _in_function
|
| 256 |
+
if arg2 == "BREAK":
|
| 257 |
+
_indent_changes[ln + 1] = -1
|
| 258 |
+
_in_function = False
|
| 259 |
+
return f"set_current_line({ln}); return(exit_function_call({_q(_current_return_type)}, {_q(arg1)}))"
|
| 260 |
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
+
# -- CALL: user-defined function -------------------------------------------
|
| 263 |
|
| 264 |
+
def _call(type_word: str, arg1: str, arg2: str, ln: int) -> str:
|
| 265 |
+
# type_word = function name, arg1 = input var, arg2 = output var
|
| 266 |
+
func_name = type_word
|
| 267 |
+
spec = _user_functions.get(func_name)
|
| 268 |
+
if spec:
|
| 269 |
+
return (
|
| 270 |
+
f"enter_function_call"
|
| 271 |
+
f"({_q(spec[1])}, {_q(arg1)}, {func_name}, {_q(spec[2])}, {_q(arg2)}, {ln})"
|
| 272 |
+
)
|
| 273 |
+
# Fallback — shouldn't happen if ErrorCorrection registered all functions
|
| 274 |
+
return f"enter_function_call('INT', {_q(arg1)}, {func_name}, 'INT', {_q(arg2)}, {ln})"
|
| 275 |
|
| 276 |
|
| 277 |
+
# ---------------------------------------------------------------------------
|
| 278 |
+
# Dispatch table — keyed by VERB
|
| 279 |
+
# ---------------------------------------------------------------------------
|
| 280 |
|
| 281 |
+
_DISPATCH: Dict[str, Callable[[str, str, str, int], str]] = {
|
| 282 |
+
"MAKE": _make,
|
| 283 |
+
"SET": _set,
|
| 284 |
+
"CHANGE": _change,
|
| 285 |
+
"WHILE": _while,
|
| 286 |
+
"IF": _if,
|
| 287 |
+
"ELIF": _elif,
|
| 288 |
+
"ITERATE": _iterate,
|
| 289 |
+
"PRINT": _print,
|
| 290 |
+
"GET": _get,
|
| 291 |
+
"WRITE": _write,
|
| 292 |
+
"ADD": _add,
|
| 293 |
+
"SUBTRACT": _subtract,
|
| 294 |
+
"MULTIPLY": _multiply,
|
| 295 |
+
"DIVIDE": _divide,
|
| 296 |
+
"SIMPLEDIVIDE": _simpledivide,
|
| 297 |
+
"MODULO": _modulo,
|
| 298 |
+
"POWER": _power,
|
| 299 |
+
"COMBINE": _combine,
|
| 300 |
+
"PAD": _pad,
|
| 301 |
+
"TYPE": _type,
|
| 302 |
+
"FUNCTION": _function,
|
| 303 |
+
"RETURN": _return,
|
| 304 |
+
"CALL": _call,
|
| 305 |
+
}
|
| 306 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
|
| 308 |
+
# ---------------------------------------------------------------------------
|
| 309 |
+
# Code generation
|
| 310 |
+
# ---------------------------------------------------------------------------
|
| 311 |
|
| 312 |
+
def make_instruction(quad: List[str], line_num: int) -> str:
|
| 313 |
+
"""Dispatch a 4-word bytecode tuple to its code-gen handler."""
|
| 314 |
+
verb = quad[0]
|
| 315 |
+
handler = _DISPATCH.get(verb)
|
| 316 |
+
if handler:
|
| 317 |
+
return handler(quad[1], quad[2], quad[3], line_num)
|
| 318 |
+
# Unknown verb — treat as user-defined function call
|
| 319 |
+
return _call(verb, quad[1], quad[2], line_num)
|
| 320 |
|
| 321 |
|
| 322 |
+
def make_py_file(instruction_list: List[List[str]]) -> None:
|
| 323 |
+
"""Compile *instruction_list* to Python and write it to test.py."""
|
| 324 |
from pathlib import Path
|
| 325 |
|
| 326 |
+
out_path = Path(__file__).parent / "test.py"
|
| 327 |
+
indent_unit = " "
|
| 328 |
+
|
| 329 |
+
with out_path.open("w", encoding="utf-8") as f:
|
| 330 |
+
f.write("import sys\nimport os\n")
|
| 331 |
+
f.write("sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))\n")
|
| 332 |
f.write("from Tzefa_Language.createdpython import *\n")
|
| 333 |
+
f.write("print('VM TEST START')\n")
|
| 334 |
+
|
| 335 |
+
indent_level = 0
|
| 336 |
+
for i, quad in enumerate(instruction_list, start=1):
|
| 337 |
+
indent_level += _indent_changes[i]
|
| 338 |
+
f.write(indent_unit * indent_level + make_instruction(quad, i) + "\n")
|
| 339 |
+
|
| 340 |
+
f.write("print_vars()\nprint('VM TEST END')\n")
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
# ---------------------------------------------------------------------------
|
| 344 |
+
# Self-test
|
| 345 |
+
# ---------------------------------------------------------------------------
|
| 346 |
+
|
| 347 |
+
if __name__ == "__main__":
|
| 348 |
+
register_user_function("GREATESTDIV", "LIST", "LIST")
|
| 349 |
+
_sample = [
|
| 350 |
+
["MAKE", "INTEGER", "THEINT", "2769"],
|
| 351 |
+
["MAKE", "INTEGER", "THEINTI", "1065"],
|
| 352 |
+
["MAKE", "INTEGER", "THROWONE", "1065"],
|
| 353 |
+
["MAKE", "INTEGER", "THROWTWO", "1065"],
|
| 354 |
+
["MAKE", "LIST", "LISTOFTWO", "2"],
|
| 355 |
+
["SET", "INDEX", "LISTOFTWO", "0"],
|
| 356 |
+
["WRITE", "INTEGER", "LISTOFTWO", "THEINT"],
|
| 357 |
+
["SET", "INDEX", "LISTOFTWO", "1"],
|
| 358 |
+
["WRITE", "INTEGER", "LISTOFTWO", "THEINTI"],
|
| 359 |
+
["MAKE", "INTEGER", "ZERO", "0"],
|
| 360 |
+
["ADD", "TEMPORARY", "THEINT", "THEINTI"],
|
| 361 |
+
["PRINT", "INTEGER", "TEMPORARY", "BREAK"],
|
| 362 |
+
["FUNCTION", "LIST", "GREATESTDIV", "LIST"],
|
| 363 |
+
["SET", "INDEX", "LISTOFTWO", "0"],
|
| 364 |
+
["GET", "INTEGER", "LISTOFTWO", "THROWONE"],
|
| 365 |
+
["SET", "INDEX", "LISTOFTWO", "1"],
|
| 366 |
+
["GET", "INTEGER", "LISTOFTWO", "THROWTWO"],
|
| 367 |
+
["MAKE", "CONDITION", "EUCLIDCOMPARE", "EQUALS"],
|
| 368 |
+
["SET", "LEFT", "EUCLIDCOMPARE", "THROWTWO"],
|
| 369 |
+
["SET", "RIGHT", "EUCLIDCOMPARE", "ZERO"],
|
| 370 |
+
["IF", "CONDITION", "EUCLIDCOMPARE", "23"],
|
| 371 |
+
["WRITE", "INTEGER", "LISTOFTWO", "THROWTWO"],
|
| 372 |
+
["RETURN", "VALUE", "LISTOFTWO", "STAY"],
|
| 373 |
+
["SET", "RIGHT", "EUCLIDCOMPARE", "THROWTWO"],
|
| 374 |
+
["SET", "INDEX", "LISTOFTWO", "0"],
|
| 375 |
+
["WRITE", "INTEGER", "LISTOFTWO", "THROWTWO"],
|
| 376 |
+
["MODULO", "TEMPORARY", "THROWONE", "THROWTWO"], # DEST=TEMPORARY
|
| 377 |
+
["SET", "INDEX", "LISTOFTWO", "1"],
|
| 378 |
+
["WRITE", "INTEGER", "LISTOFTWO", "TEMPORARY"],
|
| 379 |
+
["CALL", "GREATESTDIV","LISTOFTWO", "LISTOFTWO"],
|
| 380 |
+
["RETURN", "VALUE", "LISTOFTWO", "BREAK"],
|
| 381 |
+
["CALL", "GREATESTDIV","LISTOFTWO", "LISTOFTWO"],
|
| 382 |
+
]
|
| 383 |
+
make_py_file(_sample)
|
| 384 |
+
|
requirements.txt
CHANGED
|
@@ -1,17 +1,18 @@
|
|
| 1 |
-
torch
|
| 2 |
-
torchvision
|
| 3 |
-
transformers
|
| 4 |
-
segmentation-models-pytorch
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
sentencepiece
|
|
|
|
|
|
| 1 |
+
torch
|
| 2 |
+
torchvision
|
| 3 |
+
transformers
|
| 4 |
+
segmentation-models-pytorch
|
| 5 |
+
ultralytics
|
| 6 |
+
timm
|
| 7 |
+
opencv-python-headless
|
| 8 |
+
pillow
|
| 9 |
+
numpy
|
| 10 |
+
gradio
|
| 11 |
+
huggingface_hub
|
| 12 |
+
fast_edit_distance
|
| 13 |
+
pydantic
|
| 14 |
+
einops
|
| 15 |
+
safetensors
|
| 16 |
+
surya-ocr
|
| 17 |
+
sentencepiece
|
| 18 |
+
protobuf
|