WARAJA commited on
Commit
35febfb
·
verified ·
1 Parent(s): 40e0d16

Update Tzefa Space (full pipeline)

Browse files
app.py CHANGED
@@ -1,488 +1,434 @@
1
- """
2
- Tzefa - Complete Pipeline Demo Space
3
- Image -> Binarization -> Line Segmentation -> Word Segmentation -> OCR ->
4
- Error Correction -> Compilation -> Execution
5
-
6
- All models loaded from their HF repos. Modular: swap weights and this updates.
7
- Language files (ErrorCorrection, topy, createdpython, Number2Name) are bundled in language/
8
- """
9
- import os
10
- import gc
11
- import sys
12
- import subprocess
13
- import importlib
14
- import traceback
15
- import cv2
16
- import torch
17
- import numpy as np
18
- from PIL import Image
19
- import gradio as gr
20
- from huggingface_hub import hf_hub_download
21
- import segmentation_models_pytorch as smp
22
- import torch.nn as nn
23
- import torch.nn.functional as F
24
- from transformers import TrOCRProcessor, VisionEncoderDecoderModel
25
- from ultralytics import YOLO
26
-
27
- # Add language/ to path so ErrorCorrection can import Number2Name etc.
28
- SPACE_DIR = os.path.dirname(os.path.abspath(__file__))
29
- sys.path.insert(0, SPACE_DIR)
30
-
31
- from language import ErrorCorrection, topy
32
-
33
- # ══════════════════════════════════════════════════════════════
34
- # CONFIG
35
- # ══════════════════════════════════════════════════════════════
36
- # Fetches the token from your Space's Secrets for downloading private models
37
- HF_TOKEN = os.environ.get("HF_TOKEN")
38
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
39
-
40
- BIN_B3_REPO = "WARAJA/Model"
41
- BIN_B3_FILE = "b3_model.pth"
42
- BIN_B5_REPO = "WARAJA/b5_model"
43
- BIN_B5_FILE = "b5_model.pth"
44
- YOLO_REPO = "WARAJA/Tzefa-Line-Segmentation-YOLO"
45
- YOLO_FILE = "best.pt"
46
- TROCR_REPO = "WARAJA/Tzefa-Word-OCR-TrOCR"
47
- TROCR_BASE_PROC = "microsoft/trocr-small-stage1"
48
-
49
- TILE_SIZE = 640
50
- YOLO_IMGSZ = 640
51
- TARGET_WORDS = 3
52
- MAX_DILATE_ITERS = 200
53
-
54
-
55
- # ══════════════════════════════════════════════════════════════
56
- # 1. BINARIZATION
57
- # ══════════════════════════════════════════════════════════════
58
- class HighResMAnet(nn.Module):
59
- def __init__(self, encoder_name="mit_b5", classes=1):
60
- super().__init__()
61
- self.base_model = smp.MAnet(
62
- encoder_name=encoder_name, encoder_weights=None,
63
- in_channels=3, classes=classes, encoder_depth=5,
64
- decoder_channels=(256, 128, 64, 32, 16),
65
- )
66
- self.high_res_stem = nn.Sequential(
67
- nn.Conv2d(3, 16, 3, padding=1), nn.BatchNorm2d(16), nn.ReLU(True),
68
- nn.Conv2d(16, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(True),
69
- )
70
- self.final_fusion = nn.Sequential(
71
- nn.Conv2d(48, 16, 3, padding=1), nn.ReLU(True),
72
- nn.Conv2d(16, classes, 1),
73
- )
74
-
75
- def forward(self, x):
76
- hr = self.high_res_stem(x)
77
- feat = self.base_model.encoder(x)
78
- dec = self.base_model.decoder(feat)
79
- return self.final_fusion(torch.cat([dec, hr], dim=1))
80
-
81
-
82
- def _load_bin_models():
83
- models = {}
84
- b3_path = hf_hub_download(BIN_B3_REPO, BIN_B3_FILE, token=HF_TOKEN, repo_type="space")
85
- m3 = smp.Unet(encoder_name="mit_b3", encoder_weights=None, in_channels=3, classes=1)
86
- ckpt3 = torch.load(b3_path, map_location=DEVICE)
87
- m3.load_state_dict(ckpt3.get("model_state_dict", ckpt3))
88
- models["mit_b3 (Standard)"] = m3.to(DEVICE).eval()
89
- b5_path = hf_hub_download(BIN_B5_REPO, BIN_B5_FILE, token=HF_TOKEN, repo_type="model")
90
- m5 = HighResMAnet(encoder_name="mit_b5")
91
- ckpt5 = torch.load(b5_path, map_location=DEVICE)
92
- m5.load_state_dict(ckpt5.get("model_state_dict", ckpt5))
93
- models["mit_b5 (HighRes)"] = m5.to(DEVICE).eval()
94
- return models
95
-
96
-
97
- def _preprocess_tile(pil_img):
98
- arr = np.array(pil_img).astype(np.float32) / 255.0
99
- mean, std = np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225])
100
- return torch.from_numpy(((arr - mean) / std).transpose(2, 0, 1))
101
-
102
-
103
- def binarize(pil_img, model):
104
- orig_w, orig_h = pil_img.size
105
- pad_w = (TILE_SIZE - orig_w % TILE_SIZE) % TILE_SIZE
106
- pad_h = (TILE_SIZE - orig_h % TILE_SIZE) % TILE_SIZE
107
- padded = Image.new("RGB", (orig_w + pad_w, orig_h + pad_h), (255, 255, 255))
108
- padded.paste(pil_img, (0, 0))
109
- nw, nh = padded.size
110
- canvas = Image.new("L", (nw, nh), 255)
111
- for y in range(0, nh, TILE_SIZE):
112
- for x in range(0, nw, TILE_SIZE):
113
- tile = padded.crop((x, y, x + TILE_SIZE, y + TILE_SIZE))
114
- t = _preprocess_tile(tile).unsqueeze(0).to(DEVICE).float()
115
- with torch.no_grad():
116
- logits = model(t)
117
- if logits.shape[-2:] != (TILE_SIZE, TILE_SIZE):
118
- logits = F.interpolate(logits, (TILE_SIZE, TILE_SIZE), mode="bilinear")
119
- mask = (torch.sigmoid(logits) > 0.5).float().cpu().numpy()[0, 0]
120
- canvas.paste(Image.fromarray(((1.0 - mask) * 255).astype(np.uint8)), (x, y))
121
- return canvas.crop((0, 0, orig_w, orig_h))
122
-
123
-
124
- # ══════════════════════════════════════════════════════════════
125
- # 2. LINE SEGMENTATION
126
- # ══════════════════════════════════════════════════════════════
127
- def _load_yolo():
128
- path = hf_hub_download(YOLO_REPO, YOLO_FILE, token=HF_TOKEN, repo_type="model")
129
- return YOLO(path)
130
-
131
-
132
- def segment_lines(bin_arr, yolo_model):
133
- img_rgb = cv2.cvtColor(bin_arr, cv2.COLOR_GRAY2RGB) if len(bin_arr.shape) == 2 else bin_arr
134
- orig_h, orig_w = img_rgb.shape[:2]
135
- results = yolo_model.predict(img_rgb, imgsz=YOLO_IMGSZ, conf=0.18, iou=0.18, verbose=False)
136
- truelines = []
137
- if len(results) > 0 and results[0].obb is not None:
138
- obbs = sorted(results[0].obb.xyxyxyxy.cpu().numpy(), key=lambda p: np.min(p[:, 1]))
139
- for pts in obbs:
140
- rx0, rx1 = np.min(pts[:, 0]), np.max(pts[:, 0])
141
- ry0, ry1 = np.min(pts[:, 1]), np.max(pts[:, 1])
142
- pad = (rx1 - rx0) * 0.12
143
- x0 = int(np.clip(rx0 - pad, 0, orig_w))
144
- x1 = int(np.clip(rx1 + pad, 0, orig_w))
145
- y0, y1 = int(np.clip(ry0, 0, orig_h)), int(np.clip(ry1, 0, orig_h))
146
- if x1 - x0 > 0 and y1 - y0 > 0:
147
- truelines.append((x0, y0, x1 - x0, y1 - y0))
148
- return truelines
149
-
150
-
151
- # ══════════════════════════════════════════════════════════════
152
- # 3. WORD SEGMENTATION
153
- # ══════════════════════════════════════════════════════════════
154
- def _get_word_boxes(dilated, min_w, min_h):
155
- contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
156
- boxes = sorted([b for b in [cv2.boundingRect(c) for c in contours] if b[2] >= min_w and b[3] >= min_h],
157
- key=lambda b: b[0])
158
- return boxes
159
-
160
-
161
- def segment_words(bin_arr, lines):
162
- words_dict = {}
163
- for i, (lx, ly, lw, lh) in enumerate(lines):
164
- ih, iw = bin_arr.shape[:2]
165
- ly, lx = max(0, ly), max(0, lx)
166
- lh, lw = min(lh, ih - ly), min(lw, iw - lx)
167
- if lw <= 0 or lh <= 0:
168
- continue
169
- crop = bin_arr[ly:ly+lh, lx:lx+lw]
170
- inv = cv2.bitwise_not(crop)
171
- min_ww, min_wh = max(5, int(lw * 0.02)), max(5, int(lh * 0.25))
172
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
173
- dilated, prev, found = inv.copy(), None, False
174
- for _ in range(MAX_DILATE_ITERS):
175
- dilated = cv2.dilate(dilated, kernel, iterations=1)
176
- boxes = _get_word_boxes(dilated, min_ww, min_wh)
177
- if len(boxes) == TARGET_WORDS:
178
- prev = boxes; found = True; break
179
- elif len(boxes) < TARGET_WORDS:
180
- break
181
- else:
182
- prev = boxes
183
- if not found and prev and len(prev) > TARGET_WORDS:
184
- while len(prev) > TARGET_WORDS:
185
- gaps = [(prev[j+1][0] - (prev[j][0]+prev[j][2]), j) for j in range(len(prev)-1)]
186
- _, mi = min(gaps)
187
- b1, b2 = prev[mi], prev[mi+1]
188
- merged = (min(b1[0],b2[0]), min(b1[1],b2[1]),
189
- max(b1[0]+b1[2],b2[0]+b2[2])-min(b1[0],b2[0]),
190
- max(b1[1]+b1[3],b2[1]+b2[3])-min(b1[1],b2[1]))
191
- prev = list(prev); prev[mi] = merged; prev.pop(mi+1)
192
- found = True
193
- if not found or not prev or len(prev) != TARGET_WORDS:
194
- continue
195
- line_words = {}
196
- for wi, (wx, wy, ww, wh) in enumerate(prev):
197
- line_words[wi+1] = (wx, wx+ww)
198
- words_dict[i+1] = line_words
199
- return words_dict
200
-
201
-
202
- # ══════════════════════════════════════════════════════════════
203
- # 4. OCR
204
- # ══════════════════════════════════════════════════════════════
205
- def _load_trocr():
206
- proc = TrOCRProcessor.from_pretrained(TROCR_BASE_PROC, use_fast=False)
207
- model = VisionEncoderDecoderModel.from_pretrained(TROCR_REPO, token=HF_TOKEN).to(DEVICE).eval()
208
- return proc, model
209
-
210
-
211
- def _pad_aspect(img, max_ratio=4.0):
212
- w, h = img.size
213
- if w <= max_ratio * h:
214
- return img
215
- th = int(w / max_ratio)
216
- pad = th - h
217
- from PIL import ImageOps
218
- return ImageOps.expand(img, (0, pad//2, 0, pad - pad//2), fill=(255,255,255))
219
-
220
-
221
- def ocr_word(img_pil, proc, model):
222
- if img_pil.mode != "RGB":
223
- img_pil = img_pil.convert("RGB")
224
- img_pil = _pad_aspect(img_pil)
225
- pv = proc(img_pil, return_tensors="pt").pixel_values.to(DEVICE)
226
- with torch.no_grad():
227
- ids = model.generate(pv)
228
- txt = proc.batch_decode(ids, skip_special_tokens=True)[0]
229
- parts = txt.split()
230
- return max(parts, key=len) if parts else txt
231
-
232
-
233
- # ══════════════════════════════════════════════════════════════
234
- # 5. VISUALIZATION
235
- # ══════════════════════════════════════════════════════════════
236
- def draw_line_bboxes(img_arr, bboxes):
237
- vis = cv2.cvtColor(img_arr, cv2.COLOR_GRAY2RGB) if len(img_arr.shape) == 2 else img_arr.copy()
238
- for i, (x, y, w, h) in enumerate(bboxes):
239
- cv2.rectangle(vis, (x, y), (x+w, y+h), (255, 50, 50), 2)
240
- cv2.putText(vis, str(i+1), (x, max(y-5, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (50, 50, 255), 2)
241
- return vis
242
-
243
-
244
- def draw_word_bboxes(img_arr, word_tuples):
245
- vis = cv2.cvtColor(img_arr, cv2.COLOR_GRAY2RGB) if len(img_arr.shape) == 2 else img_arr.copy()
246
- colors = [(50, 220, 50), (50, 180, 255), (255, 180, 50)]
247
- for lt in word_tuples:
248
- for wi, (text, (x1, y1, x2, y2)) in enumerate(lt):
249
- c = colors[wi % len(colors)]
250
- cv2.rectangle(vis, (x1, y1), (x2, y2), c, 2)
251
- cv2.putText(vis, text, (x1, max(y1-4, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.45, c, 1)
252
- return vis
253
-
254
-
255
- # ══════════════════════════════════════════════════════════════
256
- # 6. CLEAR VRAM
257
- # ══════════════════════════════════════════════════════════════
258
- def clear_vram():
259
- gc.collect()
260
- if torch.cuda.is_available():
261
- torch.cuda.empty_cache()
262
-
263
-
264
- # ══════════════════════════════════════════════════════════════
265
- # 7. CODE EXECUTION
266
- # ══════════════════════════════════════════════════════════════
267
- def execute_code(compiled_code):
268
- try:
269
- result = subprocess.run(
270
- [sys.executable, "-c", compiled_code],
271
- capture_output=True, text=True, timeout=15,
272
- cwd=SPACE_DIR,
273
- )
274
- output = result.stdout
275
- if result.stderr:
276
- output += "\n--- STDERR ---\n" + result.stderr
277
- if result.returncode != 0:
278
- output += f"\n[Process exited with code {result.returncode}]"
279
- return output.strip() if output.strip() else "(no output)"
280
- except subprocess.TimeoutExpired:
281
- return "[Execution timed out after 15 seconds]"
282
- except Exception as e:
283
- return f"[Execution error: {e}]"
284
-
285
-
286
- # ══════════════════════════════════════════════════════════════
287
- # 8. FULL PIPELINE
288
- # ══════════════════════════════════════════════════════════════
289
- def run_full_pipeline(input_image, bin_model_choice):
290
- """Returns: binarized, line_vis, word_vis, raw_ocr, corrected, compiled, execution, status"""
291
- if input_image is None:
292
- return None, None, None, "", "", "", "", "No image provided."
293
-
294
- if isinstance(input_image, np.ndarray):
295
- pil_img = Image.fromarray(input_image).convert("RGB")
296
- else:
297
- pil_img = input_image.convert("RGB")
298
-
299
- status = []
300
-
301
- # Reset language global state between runs
302
- importlib.reload(ErrorCorrection)
303
- importlib.reload(topy)
304
-
305
- # ── Stage 1: Binarization ──
306
- try:
307
- status.append("[1/6] Binarization...")
308
- bin_models = _load_bin_models()
309
- model = bin_models[bin_model_choice]
310
- bin_pil = binarize(pil_img, model)
311
- bin_arr = np.array(bin_pil)
312
- del bin_models; clear_vram()
313
- status.append(" OK")
314
- except Exception as e:
315
- status.append(f" Failed!\n\n--- ERROR LOG ---\n{traceback.format_exc()}")
316
- return None, None, None, "", "", "", "", "\n".join(status)
317
-
318
- # ── Stage 2: Line Segmentation ──
319
- try:
320
- status.append("[2/6] Line Segmentation...")
321
- yolo_model = _load_yolo()
322
- truelines = segment_lines(bin_arr, yolo_model)
323
- del yolo_model; clear_vram()
324
- status.append(f" Found {len(truelines)} lines")
325
- line_vis = draw_line_bboxes(bin_arr, truelines)
326
- except Exception as e:
327
- status.append(f" Failed!\n\n--- ERROR LOG ---\n{traceback.format_exc()}")
328
- return bin_arr, None, None, "", "", "", "", "\n".join(status)
329
-
330
- # ── Stage 3: Word Seg + OCR ──
331
- try:
332
- status.append("[3/6] Word Segmentation + OCR...")
333
- words = segment_words(bin_arr, truelines)
334
- proc, trocr_model = _load_trocr()
335
- all_line_tuples, raw_lines = [], []
336
- for ln in sorted(words.keys()):
337
- if ln - 1 >= len(truelines):
338
- continue
339
- lx, ly, lw, lh = truelines[ln - 1]
340
- line_tuples = []
341
- for wn in sorted(words[ln].keys()):
342
- wx1, wx2 = words[ln][wn]
343
- ax1, ax2 = max(0, int(lx + wx1)), min(bin_arr.shape[1], int(lx + wx2))
344
- ay1, ay2 = max(0, ly - 20), min(bin_arr.shape[0], ly + lh + 20)
345
- crop_pil = Image.fromarray(bin_arr[ay1:ay2, ax1:ax2])
346
- text = ocr_word(crop_pil, proc, trocr_model)
347
- line_tuples.append((text, (ax1, ay1, ax2, ay2)))
348
- raw_lines.append(" ".join(t[0] for t in line_tuples))
349
- all_line_tuples.append(line_tuples)
350
- del proc, trocr_model; clear_vram()
351
- word_vis = draw_word_bboxes(bin_arr, all_line_tuples)
352
- raw_text = "\n".join(raw_lines)
353
- status.append(f" {len(raw_lines)} lines recognized")
354
- except Exception as e:
355
- status.append(f" Failed!\n\n--- ERROR LOG ---\n{traceback.format_exc()}")
356
- return bin_arr, line_vis, None, "", "", "", "", "\n".join(status)
357
-
358
- # ── Stage 4: Error Correction ──
359
- try:
360
- status.append("[4/6] Error Correction...")
361
- ErrorCorrection.sendlines(len(truelines))
362
- index_list, corrected_lines = [], []
363
- for line_entries in all_line_tuples:
364
- if not line_entries:
365
- corrected_lines.append(""); index_list.append(0); continue
366
- raw_tokens = [t[0].upper() for t in line_entries]
367
- while len(raw_tokens) < 3:
368
- raw_tokens.append("")
369
- raw_tokens = raw_tokens[:3]
370
- cleaned_first, index, _ = ErrorCorrection.handelfirstword(raw_tokens[0])
371
- index_list.append(index)
372
- simpler = ErrorCorrection.listsimplefunc[index]
373
- if simpler[1] == 0:
374
- bucket_idx = simpler[2]
375
- if isinstance(bucket_idx, int) and bucket_idx < len(ErrorCorrection.listall):
376
- bucket = ErrorCorrection.listall[bucket_idx]
377
- if raw_tokens[1] and raw_tokens[1] not in bucket:
378
- bucket.append(raw_tokens[1])
379
- corrected_lines.append(f"{cleaned_first} {raw_tokens[1]} {raw_tokens[2]}")
380
- corrected_text = "\n".join(corrected_lines)
381
- status.append(" OK")
382
- except Exception as e:
383
- status.append(f" Failed!\n\n--- ERROR LOG ---\n{traceback.format_exc()}")
384
- return bin_arr, line_vis, word_vis, raw_text, "", "", "", "\n".join(status)
385
-
386
- # ── Stage 5: Compilation ──
387
- try:
388
- status.append("[5/6] Compilation...")
389
- linelist = []
390
- for i in range(len(corrected_lines)):
391
- idx = index_list[i] if i < len(index_list) else 0
392
- line_obj = ErrorCorrection.toline(corrected_lines[i], idx, ErrorCorrection.giveindents())
393
- linelist.append(line_obj)
394
- listfunctions_out, listezfunctions_out = ErrorCorrection.giveinstructions()
395
- topy.getinstructions(listfunctions_out, listezfunctions_out)
396
- compiled = ["from language.createdpython import *"]
397
- counterindent = 0
398
- for i in range(1, len(linelist) + 1):
399
- counterindent += topy.listofindentchanges[i]
400
- compiled.append(" " * counterindent + topy.makepredict(linelist[i - 1], i))
401
- compiled.append("printvars()")
402
- compiled_code = "\n".join(compiled)
403
- status.append(" OK")
404
- except Exception as e:
405
- status.append(f" Failed!\n\n--- ERROR LOG ---\n{traceback.format_exc()}")
406
- return bin_arr, line_vis, word_vis, raw_text, corrected_text, "", "", "\n".join(status)
407
-
408
- # ── Stage 6: Execution ──
409
- try:
410
- status.append("[6/6] Execution...")
411
- exec_output = execute_code(compiled_code)
412
- # If the pipeline successfully made it here, overwrite the status logs
413
- # with the actual output of the code so it displays in the main terminal box.
414
- final_status = exec_output
415
- except Exception as e:
416
- exec_output = f"Execution error: {e}"
417
- status.append(f" Failed!\n\n--- ERROR LOG ---\n{traceback.format_exc()}")
418
- final_status = "\n".join(status)
419
-
420
- return bin_arr, line_vis, word_vis, raw_text, corrected_text, compiled_code, exec_output, final_status
421
-
422
-
423
- # ══════════════════════════════════════════════════════════════
424
- # 9. GRADIO UI
425
- # ═══════════════════════��══════════════════════════════════════
426
- with gr.Blocks(title="Tzefa - Handwritten Code to Execution", theme=gr.themes.Soft()) as demo:
427
- gr.Markdown(
428
- "# Tzefa - Handwritten Code to Execution\n"
429
- "Upload a photo of handwritten Tzefa code. The pipeline runs binarization, "
430
- "line detection, word OCR, error correction, compilation, and execution."
431
- )
432
-
433
- with gr.Row():
434
- with gr.Column(scale=1):
435
- input_image = gr.Image(type="pil", label="Upload Image")
436
- bin_choice = gr.Dropdown(
437
- choices=["mit_b3 (Standard)", "mit_b5 (HighRes)"],
438
- value="mit_b5 (HighRes)",
439
- label="Binarization Model",
440
- )
441
- run_btn = gr.Button("Run Full Pipeline", variant="primary", size="lg")
442
- with gr.Column(scale=1):
443
- status_box = gr.Textbox(label="Terminal / Pipeline Status", lines=12, interactive=False)
444
-
445
- with gr.Tabs():
446
- with gr.Tab("Binarized"):
447
- bin_out = gr.Image(label="Binarized Image")
448
- with gr.Tab("Line Detection"):
449
- line_out = gr.Image(label="Line Bounding Boxes")
450
- with gr.Tab("Word Detection + OCR"):
451
- word_out = gr.Image(label="Word Bboxes with OCR Labels")
452
- with gr.Tab("Raw OCR"):
453
- raw_out = gr.Textbox(label="Raw OCR (before correction)", lines=15, interactive=False)
454
- with gr.Tab("Error Corrected"):
455
- corrected_out = gr.Textbox(label="After Error Correction", lines=15, interactive=False)
456
- with gr.Tab("Compiled Python"):
457
- compiled_out = gr.Code(language="python", label="Generated Python Code")
458
- with gr.Tab("Execution Output"):
459
- exec_out = gr.Textbox(label="Program Output", lines=10, interactive=False)
460
-
461
- run_btn.click(
462
- fn=run_full_pipeline,
463
- inputs=[input_image, bin_choice],
464
- outputs=[bin_out, line_out, word_out, raw_out, corrected_out, compiled_out, exec_out, status_box],
465
- api_name="predict"
466
- )
467
-
468
- gr.Examples(
469
- examples=[["demo.png", "mit_b5 (HighRes)"]],
470
- inputs=[input_image, bin_choice],
471
- label="Example Images"
472
- )
473
-
474
- gr.Markdown(
475
- "### Resources\n"
476
- "| Component | Link |\n"
477
- "|-----------|------|\n"
478
- "| Binarization Demo | [WARAJA/Tzefa-Binarization](https://huggingface.co/spaces/WARAJA/Tzefa-Binarization) |\n"
479
- "| b5 Model | [WARAJA/b5_model](https://huggingface.co/WARAJA/b5_model) |\n"
480
- "| YOLO Model | [WARAJA/Tzefa-Line-Segmentation-YOLO](https://huggingface.co/WARAJA/Tzefa-Line-Segmentation-YOLO) |\n"
481
- "| TrOCR Model | [WARAJA/Tzefa-Word-OCR-TrOCR](https://huggingface.co/WARAJA/Tzefa-Word-OCR-TrOCR) |\n"
482
- "| Binarization Dataset | [WARAJA/Tzefa-Binarization-Dataset](https://huggingface.co/datasets/WARAJA/Tzefa-Binarization-Dataset) |\n"
483
- "| Line Seg Dataset | [WARAJA/Tzefa-Line-Segmentation-Dataset](https://huggingface.co/datasets/WARAJA/Tzefa-Line-Segmentation-Dataset) |\n"
484
- "| Word OCR Dataset | [WARAJA/Tzefa-Word-OCR-Dataset](https://huggingface.co/datasets/WARAJA/Tzefa-Word-OCR-Dataset) |"
485
- )
486
-
487
- if __name__ == "__main__":
488
- demo.queue().launch()
 
1
+ """
2
+ Tzefa - Complete Pipeline Demo Space
3
+ Image Binarization Line Segmentation Word Segmentation OCR
4
+ Error Correction Compilation Execution
5
+
6
+ Supports:
7
+ - Dialect toggle: 3-word (classic) / 4-word (verbose)
8
+ - Line segmentation toggle: YOLO (trained model) / Surya (general detector)
9
+ - Binarization model toggle: mit_b3 / mit_b5
10
+ """
11
+ import os
12
+ import gc
13
+ import sys
14
+ import subprocess
15
+ import importlib
16
+ import traceback
17
+
18
+ import cv2
19
+ import torch
20
+ import numpy as np
21
+ from PIL import Image
22
+ import gradio as gr
23
+ from huggingface_hub import hf_hub_download
24
+ import segmentation_models_pytorch as smp
25
+ import torch.nn as nn
26
+ import torch.nn.functional as F
27
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
28
+ from ultralytics import YOLO
29
+
30
+ SPACE_DIR = os.path.dirname(os.path.abspath(__file__))
31
+ sys.path.insert(0, SPACE_DIR)
32
+
33
+ from language.dialects import THREE_WORD, FOUR_WORD, CAPS_ONLY, MIXED_CASE
34
+ from language.ErrorCorrection import TzefaParser
35
+ from language import topy
36
+
37
+ # ══════════════════════════════════════════════════════════════
38
+ # CONFIG
39
+ # ══════════════════════════════════════════════════════════════
40
+ HF_TOKEN = os.environ.get("HF_TOKEN")
41
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
42
+
43
+ BIN_B3_REPO = "WARAJA/Model"
44
+ BIN_B3_FILE = "b3_model.pth"
45
+ BIN_B5_REPO = "WARAJA/b5_model"
46
+ BIN_B5_FILE = "b5_model.pth"
47
+ YOLO_REPO = "WARAJA/Tzefa-Line-Segmentation-YOLO"
48
+ YOLO_FILE = "best.pt"
49
+ TROCR_REPO = "WARAJA/Tzefa-Word-OCR-TrOCR"
50
+ TROCR_BASE_PROC = "microsoft/trocr-small-stage1"
51
+
52
+ TILE_SIZE = 640
53
+ YOLO_IMGSZ = 640
54
+ MAX_DILATE_ITERS = 200
55
+
56
+ _DIALECT_MAP = {"4-word (verbose)": FOUR_WORD, "3-word (classic)": THREE_WORD}
57
+ _CASING_MAP = {"CAPS only": CAPS_ONLY, "Mixed case": MIXED_CASE}
58
+
59
+
60
+ # ══════════════════════════════════════════════════════════════
61
+ # 1. BINARIZATION
62
+ # ══════════════════════════════════════════════════════════════
63
+ class HighResMAnet(nn.Module):
64
+ def __init__(self, encoder_name="mit_b5", classes=1):
65
+ super().__init__()
66
+ self.base_model = smp.MAnet(
67
+ encoder_name=encoder_name, encoder_weights=None,
68
+ in_channels=3, classes=classes, encoder_depth=5,
69
+ decoder_channels=(256, 128, 64, 32, 16),
70
+ )
71
+ self.high_res_stem = nn.Sequential(
72
+ nn.Conv2d(3, 16, 3, padding=1), nn.BatchNorm2d(16), nn.ReLU(True),
73
+ nn.Conv2d(16, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(True),
74
+ )
75
+ self.final_fusion = nn.Sequential(
76
+ nn.Conv2d(48, 16, 3, padding=1), nn.ReLU(True),
77
+ nn.Conv2d(16, classes, 1),
78
+ )
79
+
80
+ def forward(self, x):
81
+ hr = self.high_res_stem(x)
82
+ feat = self.base_model.encoder(x)
83
+ dec = self.base_model.decoder(feat)
84
+ return self.final_fusion(torch.cat([dec, hr], dim=1))
85
+
86
+
87
+ def _load_bin_models():
88
+ models = {}
89
+ b3_path = hf_hub_download(BIN_B3_REPO, BIN_B3_FILE, token=HF_TOKEN, repo_type="space")
90
+ m3 = smp.Unet(encoder_name="mit_b3", encoder_weights=None, in_channels=3, classes=1)
91
+ ckpt3 = torch.load(b3_path, map_location=DEVICE)
92
+ m3.load_state_dict(ckpt3.get("model_state_dict", ckpt3))
93
+ models["mit_b3 (Standard)"] = m3.to(DEVICE).eval()
94
+ b5_path = hf_hub_download(BIN_B5_REPO, BIN_B5_FILE, token=HF_TOKEN, repo_type="model")
95
+ m5 = HighResMAnet(encoder_name="mit_b5")
96
+ ckpt5 = torch.load(b5_path, map_location=DEVICE)
97
+ m5.load_state_dict(ckpt5.get("model_state_dict", ckpt5))
98
+ models["mit_b5 (HighRes)"] = m5.to(DEVICE).eval()
99
+ return models
100
+
101
+
102
+ def _preprocess_tile(pil_img):
103
+ arr = np.array(pil_img).astype(np.float32) / 255.0
104
+ mean = np.array([0.485, 0.456, 0.406])
105
+ std = np.array([0.229, 0.224, 0.225])
106
+ return torch.from_numpy(((arr - mean) / std).transpose(2, 0, 1))
107
+
108
+
109
+ def binarize(pil_img, model):
110
+ orig_w, orig_h = pil_img.size
111
+ pad_w = (TILE_SIZE - orig_w % TILE_SIZE) % TILE_SIZE
112
+ pad_h = (TILE_SIZE - orig_h % TILE_SIZE) % TILE_SIZE
113
+ padded = Image.new("RGB", (orig_w + pad_w, orig_h + pad_h), (255, 255, 255))
114
+ padded.paste(pil_img, (0, 0))
115
+ nw, nh = padded.size
116
+ canvas = Image.new("L", (nw, nh), 255)
117
+ for y in range(0, nh, TILE_SIZE):
118
+ for x in range(0, nw, TILE_SIZE):
119
+ tile = padded.crop((x, y, x + TILE_SIZE, y + TILE_SIZE))
120
+ t = _preprocess_tile(tile).unsqueeze(0).to(DEVICE).float()
121
+ with torch.no_grad():
122
+ logits = model(t)
123
+ if logits.shape[-2:] != (TILE_SIZE, TILE_SIZE):
124
+ logits = F.interpolate(logits, (TILE_SIZE, TILE_SIZE), mode="bilinear")
125
+ mask = (torch.sigmoid(logits) > 0.5).float().cpu().numpy()[0, 0]
126
+ canvas.paste(Image.fromarray(((1.0 - mask) * 255).astype(np.uint8)), (x, y))
127
+ return canvas.crop((0, 0, orig_w, orig_h))
128
+
129
+
130
+ # ══════════════════════════════════════════════════════════════
131
+ # 2. LINE SEGMENTATION
132
+ # ══════════════════════════════════════════════════════════════
133
+ def _load_yolo():
134
+ path = hf_hub_download(YOLO_REPO, YOLO_FILE, token=HF_TOKEN, repo_type="model")
135
+ return YOLO(path)
136
+
137
+
138
+ def segment_lines_yolo(bin_arr, yolo_model):
139
+ img_rgb = cv2.cvtColor(bin_arr, cv2.COLOR_GRAY2RGB) if len(bin_arr.shape) == 2 else bin_arr
140
+ orig_h, orig_w = img_rgb.shape[:2]
141
+ results = yolo_model.predict(img_rgb, imgsz=YOLO_IMGSZ, conf=0.2, iou=0.2, verbose=False)
142
+ truelines = []
143
+ if len(results) > 0 and results[0].obb is not None:
144
+ obbs = sorted(results[0].obb.xyxyxyxy.cpu().numpy(), key=lambda p: np.min(p[:, 1]))
145
+ for pts in obbs:
146
+ rx0, rx1 = np.min(pts[:, 0]), np.max(pts[:, 0])
147
+ ry0, ry1 = np.min(pts[:, 1]), np.max(pts[:, 1])
148
+ pad = (rx1 - rx0) * 0.12
149
+ x0 = int(np.clip(rx0 - pad, 0, orig_w))
150
+ x1 = int(np.clip(rx1 + pad, 0, orig_w))
151
+ y0 = int(np.clip(ry0, 0, orig_h))
152
+ y1 = int(np.clip(ry1, 0, orig_h))
153
+ if x1 - x0 > 0 and y1 - y0 > 0:
154
+ truelines.append((x0, y0, x1 - x0, y1 - y0))
155
+ return truelines
156
+
157
+
158
+ _surya_predictor = None
159
+
160
+ def segment_lines_surya(bin_arr):
161
+ global _surya_predictor
162
+ os.environ.setdefault("DETECTOR_TEXT_THRESHOLD", "0.75")
163
+ os.environ.setdefault("DETECTOR_BLANK_THRESHOLD", "0.45")
164
+ try:
165
+ from surya.detection import DetectionPredictor
166
+ except ImportError:
167
+ raise RuntimeError("surya-ocr not installed. Add 'surya-ocr' to requirements.txt.")
168
+ if _surya_predictor is None:
169
+ _surya_predictor = DetectionPredictor()
170
+ img_rgb = cv2.cvtColor(bin_arr, cv2.COLOR_GRAY2RGB) if len(bin_arr.shape) == 2 else bin_arr
171
+ pil_image = Image.fromarray(img_rgb)
172
+ predictions = _surya_predictor([pil_image])
173
+
174
+ CONF_THRESHOLD = 0.6
175
+ raw = []
176
+ if predictions and predictions[0].bboxes:
177
+ for bbox in predictions[0].bboxes:
178
+ conf = getattr(bbox, "confidence", 1.0)
179
+ if conf < CONF_THRESHOLD:
180
+ continue
181
+ x1, y1, x2, y2 = bbox.bbox
182
+ if (x2 - x1) > 5 and (y2 - y1) > 5:
183
+ raw.append([float(x1), float(y1), float(x2), float(y2)])
184
+
185
+ raw.sort(key=lambda b: (b[1] + b[3]) / 2)
186
+
187
+ def overlaps_v(a, b):
188
+ return a[1] < b[3] and b[1] < a[3]
189
+
190
+ merged = []
191
+ for box in raw:
192
+ placed = False
193
+ for m in merged:
194
+ if overlaps_v(m, box):
195
+ m[0] = min(m[0], box[0]); m[1] = min(m[1], box[1])
196
+ m[2] = max(m[2], box[2]); m[3] = max(m[3], box[3])
197
+ placed = True; break
198
+ if not placed:
199
+ merged.append(list(box))
200
+
201
+ merged.sort(key=lambda b: b[1])
202
+ return [(int(b[0]), int(b[1]), int(b[2]-b[0]), int(b[3]-b[1])) for b in merged]
203
+
204
+
205
+ # ══════════════════════════════════════════════════════════════
206
+ # 3. WORD SEGMENTATION
207
+ # ══════════════════════════════════════════════════════════════
208
+ def _get_word_boxes(dilated, min_w, min_h):
209
+ contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
210
+ return sorted(
211
+ [b for b in [cv2.boundingRect(c) for c in contours] if b[2] >= min_w and b[3] >= min_h],
212
+ key=lambda b: b[0],
213
+ )
214
+
215
+
216
+ def segment_words(bin_arr, lines, target_words):
217
+ words_dict = {}
218
+ for i, (lx, ly, lw, lh) in enumerate(lines):
219
+ ih, iw = bin_arr.shape[:2]
220
+ ly, lx = max(0, ly), max(0, lx)
221
+ lh, lw = min(lh, ih - ly), min(lw, iw - lx)
222
+ if lw <= 0 or lh <= 0:
223
+ continue
224
+ crop = bin_arr[ly:ly+lh, lx:lx+lw]
225
+ inv = cv2.bitwise_not(crop)
226
+ min_ww = max(5, int(lw * 0.02))
227
+ min_wh = max(5, int(lh * 0.25))
228
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
229
+ dilated, prev, found = inv.copy(), None, False
230
+ for _ in range(MAX_DILATE_ITERS):
231
+ dilated = cv2.dilate(dilated, kernel, iterations=1)
232
+ boxes = _get_word_boxes(dilated, min_ww, min_wh)
233
+ if len(boxes) == target_words:
234
+ prev = boxes; found = True; break
235
+ elif len(boxes) < target_words:
236
+ break
237
+ else:
238
+ prev = boxes
239
+ if not found and prev and len(prev) > target_words:
240
+ while len(prev) > target_words:
241
+ gaps = [(prev[j+1][0] - (prev[j][0]+prev[j][2]), j) for j in range(len(prev)-1)]
242
+ _, mi = min(gaps)
243
+ b1, b2 = prev[mi], prev[mi+1]
244
+ merged = (
245
+ min(b1[0],b2[0]), min(b1[1],b2[1]),
246
+ max(b1[0]+b1[2],b2[0]+b2[2])-min(b1[0],b2[0]),
247
+ max(b1[1]+b1[3],b2[1]+b2[3])-min(b1[1],b2[1]),
248
+ )
249
+ prev = list(prev); prev[mi] = merged; prev.pop(mi+1)
250
+ found = True
251
+ if not found or not prev or len(prev) != target_words:
252
+ continue
253
+ words_dict[i+1] = {wi+1: (wx, wx+ww) for wi, (wx, wy, ww, wh) in enumerate(prev)}
254
+ return words_dict
255
+
256
+
257
+ # ══════════════════════════════════════════════════════════════
258
+ # 4. OCR
259
+ # ══════════════════════════════════════════════════════════════
260
+ def _load_trocr():
261
+ proc = TrOCRProcessor.from_pretrained(TROCR_BASE_PROC, use_fast=False)
262
+ model = VisionEncoderDecoderModel.from_pretrained(TROCR_REPO, token=HF_TOKEN).to(DEVICE).eval()
263
+ return proc, model
264
+
265
+
266
+ def _pad_aspect(img, max_ratio=4.0):
267
+ w, h = img.size
268
+ if w <= max_ratio * h:
269
+ return img
270
+ th = int(w / max_ratio)
271
+ pad = th - h
272
+ from PIL import ImageOps
273
+ return ImageOps.expand(img, (0, pad//2, 0, pad - pad//2), fill=(255, 255, 255))
274
+
275
+
276
+ def ocr_word(img_pil, proc, model):
277
+ if img_pil.mode != "RGB":
278
+ img_pil = img_pil.convert("RGB")
279
+ img_pil = _pad_aspect(img_pil)
280
+ pv = proc(img_pil, return_tensors="pt").pixel_values.to(DEVICE)
281
+ with torch.no_grad():
282
+ ids = model.generate(pv)
283
+ txt = proc.batch_decode(ids, skip_special_tokens=True)[0]
284
+ parts = txt.split()
285
+ return max(parts, key=len) if parts else txt
286
+
287
+
288
+ # ══════════════════════════════════════════════════════════════
289
+ # 5. VISUALISATION
290
+ # ══════════════════════════════════════════════════════════════
291
+ def draw_line_bboxes(img_arr, bboxes):
292
+ vis = cv2.cvtColor(img_arr, cv2.COLOR_GRAY2RGB) if len(img_arr.shape) == 2 else img_arr.copy()
293
+ for i, (x, y, w, h) in enumerate(bboxes):
294
+ cv2.rectangle(vis, (x, y), (x+w, y+h), (255, 50, 50), 2)
295
+ cv2.putText(vis, str(i+1), (x, max(y-5, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (50, 50, 255), 2)
296
+ return vis
297
+
298
+
299
+ def draw_word_bboxes(img_arr, word_tuples):
300
+ vis = cv2.cvtColor(img_arr, cv2.COLOR_GRAY2RGB) if len(img_arr.shape) == 2 else img_arr.copy()
301
+ colors = [(50, 220, 50), (50, 180, 255), (255, 180, 50), (220, 50, 220)]
302
+ for lt in word_tuples:
303
+ for wi, (text, (x1, y1, x2, y2)) in enumerate(lt):
304
+ c = colors[wi % len(colors)]
305
+ cv2.rectangle(vis, (x1, y1), (x2, y2), c, 2)
306
+ cv2.putText(vis, text, (x1, max(y1-4, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.45, c, 1)
307
+ return vis
308
+
309
+
310
+ # ══════════════════════════════════════════════════════════════
311
+ # 6. UTILITIES
312
+ # ══════════════════════════════════════════════════════════════
313
+ def clear_vram():
314
+ gc.collect()
315
+ if torch.cuda.is_available():
316
+ torch.cuda.empty_cache()
317
+
318
+
319
+ def execute_code(compiled_code):
320
+ try:
321
+ result = subprocess.run(
322
+ [sys.executable, "-c", compiled_code],
323
+ capture_output=True, text=True, timeout=15,
324
+ cwd=SPACE_DIR,
325
+ )
326
+ output = result.stdout
327
+ if result.stderr:
328
+ output += "\n--- STDERR ---\n" + result.stderr
329
+ if result.returncode != 0:
330
+ output += f"\n[Process exited with code {result.returncode}]"
331
+ return output.strip() if output.strip() else "(no output)"
332
+ except subprocess.TimeoutExpired:
333
+ return "[Execution timed out after 15 seconds]"
334
+ except Exception as e:
335
+ return f"[Execution error: {e}]"
336
+
337
+
338
+ # ══════════════════════════════════════════════════════════════
339
+ # 7. FULL PIPELINE
340
+ # ══════════════════════════════════════════════════════════════
341
+ def run_full_pipeline(input_image, bin_model_choice, dialect_choice, casing_choice, seg_method):
342
+ if input_image is None:
343
+ return None, None, None, "", "", "", "", "No image provided."
344
+
345
+ if isinstance(input_image, np.ndarray):
346
+ pil_img = Image.fromarray(input_image).convert("RGB")
347
+ else:
348
+ pil_img = input_image.convert("RGB")
349
+
350
+ dialect = _DIALECT_MAP.get(dialect_choice, FOUR_WORD)
351
+ casing = _CASING_MAP.get(casing_choice, CAPS_ONLY)
352
+ status = []
353
+
354
+ # Fresh language state for every run
355
+ importlib.reload(topy)
356
+ parser = TzefaParser(dialect=dialect, casing=casing)
357
+ target_words = parser.expected_words_per_line
358
+
359
+ # ── Stage 1: Binarization ──
360
+ try:
361
+ status.append("[1/6] Binarization...")
362
+ bin_models = _load_bin_models()
363
+ bin_pil = binarize(pil_img, bin_models[bin_model_choice])
364
+ bin_arr = np.array(bin_pil)
365
+ del bin_models; clear_vram()
366
+ status.append(" OK")
367
+ except Exception as e:
368
+ return None, None, None, "", "", "", "", f"Binarization failed: {e}"
369
+
370
+ # ── Stage 2: Line Segmentation ──
371
+ try:
372
+ status.append(f"[2/6] Line Segmentation ({seg_method})...")
373
+ if seg_method == "Surya":
374
+ truelines = segment_lines_surya(bin_arr)
375
+ else:
376
+ yolo_model = _load_yolo()
377
+ truelines = segment_lines_yolo(bin_arr, yolo_model)
378
+ del yolo_model; clear_vram()
379
+ status.append(f" OK {len(truelines)} lines")
380
+ line_vis = draw_line_bboxes(bin_arr, truelines)
381
+ except Exception as e:
382
+ return bin_arr, None, None, "", "", "", "", f"Line Seg failed: {e}\n{traceback.format_exc()}"
383
+
384
+ # ── Stage 3: Word Seg + OCR ──
385
+ try:
386
+ status.append("[3/6] Word Segmentation + OCR...")
387
+ words = segment_words(bin_arr, truelines, target_words)
388
+ proc, trocr_model = _load_trocr()
389
+ all_line_tuples, raw_lines = [], []
390
+ for ln in sorted(words.keys()):
391
+ if ln - 1 >= len(truelines):
392
+ continue
393
+ lx, ly, lw, lh = truelines[ln - 1]
394
+ line_tuples = []
395
+ for wn in sorted(words[ln].keys()):
396
+ wx1, wx2 = words[ln][wn]
397
+ ax1 = max(0, int(lx + wx1))
398
+ ax2 = min(bin_arr.shape[1], int(lx + wx2))
399
+ ay1 = max(0, ly - 20)
400
+ ay2 = min(bin_arr.shape[0], ly + lh + 20)
401
+ text = ocr_word(Image.fromarray(bin_arr[ay1:ay2, ax1:ax2]), proc, trocr_model)
402
+ line_tuples.append((text, (ax1, ay1, ax2, ay2)))
403
+ raw_lines.append(" ".join(t[0] for t in line_tuples))
404
+ all_line_tuples.append(line_tuples)
405
+ del proc, trocr_model; clear_vram()
406
+ word_vis = draw_word_bboxes(bin_arr, all_line_tuples)
407
+ raw_text = "\n".join(raw_lines)
408
+ status.append(f" OK {len(raw_lines)} lines recognised")
409
+ except Exception as e:
410
+ return bin_arr, line_vis, None, "", "", "", "", f"OCR failed: {e}\n{traceback.format_exc()}"
411
+
412
+ # ── Stage 4: Error Correction ──
413
+ try:
414
+ status.append("[4/6] Error Correction...")
415
+ parser.init_indent_table(len(truelines))
416
+ corrected_lines, bytecode_list = [], []
417
+ for line_entries in all_line_tuples:
418
+ if not line_entries:
419
+ corrected_lines.append("")
420
+ bytecode_list.append(["MAKE", "INTEGER", "TEMPORARY", "0"])
421
+ continue
422
+ raw_tokens = [t[0] for t in line_entries]
423
+ while len(raw_tokens) < target_words:
424
+ raw_tokens.append("")
425
+ raw_tokens = raw_tokens[:target_words]
426
+ normalised = parser.normalize_source_line(raw_tokens)
427
+ bytecode = parser.parse_line(normalised)
428
+ bytecode_list.append(bytecode)
429
+ corrected_lines.append(" ".join(bytecode)) # post-correction output
430
+ corrected_text = "\n".join(corrected_lines)
431
+ status.append(" OK")
432
+ except Exception as e:
433
+ return bin_arr, line_vis, word_vis, raw_text, "", "", "", \
434
+ f"Error Correction failed: {e}\n{traceback.format_exc()}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
demo.png CHANGED

Git LFS Details

  • SHA256: 96bb166ee0d15a28d3815b9548335479a48be821dad26a6f9dbfa2c4903d2d87
  • Pointer size: 132 Bytes
  • Size of remote file: 8.64 MB

Git LFS Details

  • SHA256: b36d53a25224741dffa9e1cc009c6282995c9fbf900efaf2bdfaf682918f1298
  • Pointer size: 132 Bytes
  • Size of remote file: 8.57 MB
language/ErrorCorrection.py CHANGED
@@ -1,373 +1,483 @@
 
 
 
 
 
 
 
 
 
 
1
  from language import Number2Name
 
 
 
 
 
2
  from fast_edit_distance import edit_distance
3
 
4
 
5
- def giveinstructions():
6
- ### returns instructions for each function in the language for topy
7
- return listfunctions, listezfunc
8
-
9
-
10
- listofindents = []
11
-
12
-
13
- def updatesizelistofindnets(size):
14
- global listofindents
15
- listofindents = [0] * (size + 1)
16
-
17
-
18
- def tosimple(func):
19
- simpler = ["a", "b", "c", "d"]
20
- simpler[0] = func[0]
21
- if func[1].startswith("NEW"):
22
- simpler[1] = 0
23
- else:
24
- simpler[1] = 1
25
- i = 1
26
- j = 2
27
- if func[i].endswith("INT"):
28
- simpler[j] = 0
29
- elif func[i].endswith("STR"):
30
- simpler[j] = 1
31
- elif func[i].endswith("LIST"):
32
- simpler[j] = 2
33
- elif func[i].endswith("BOOL"):
34
- simpler[j] = 3
35
- elif func[i].endswith("COND"):
36
- simpler[j] = 4
37
- elif func[i].endswith("STATE"):
38
- simpler[j] = 5
39
- elif func[i].endswith("TYPE"):
40
- simpler[j] = 6
41
- elif func[i].endswith("FUNC"):
42
- simpler[j] = 7
43
- elif func[i].endswith("TRUTH"):
44
- simpler[j] = 8
45
- elif func[i].endswith("COMPARE"):
46
- simpler[j] = 9
47
- elif func[i].endswith("NUMNAME"):
48
- simpler[j] = 10
49
- elif func[i].endswith("TEXT"):
50
- simpler[j] = 11
51
- i = 2
52
- j = 3
53
- if func[i].endswith("INT"):
54
- simpler[j] = 0
55
- elif func[i].endswith("STR"):
56
- simpler[j] = 1
57
- elif func[i].endswith("LIST"):
58
- simpler[j] = 2
59
- elif func[i].endswith("BOOL"):
60
- simpler[j] = 3
61
- elif func[i].endswith("COND"):
62
- simpler[j] = 4
63
- elif func[i].endswith("STATE"):
64
- simpler[j] = 5
65
- elif func[i].endswith("TYPE"):
66
- simpler[j] = 6
67
- elif func[i].endswith("FUNC"):
68
- simpler[j] = 7
69
- elif func[i].endswith("TRUTH"):
70
- simpler[j] = 8
71
- elif func[i].endswith("COMPARE"):
72
- simpler[j] = 9
73
- elif func[i].endswith("NUMNAME"):
74
- simpler[j] = 10
75
- elif func[i].endswith("TEXT"):
76
- simpler[j] = 11
77
- simpler.append(0)
78
- return simpler
79
-
80
-
81
-
82
- # CHANGED: Replaced NUM/INT with NUMNAME for immediate value reading (Index 10)
83
- listfunctions = [
84
- ["MAKEINTEGER", "NEWINT", "NUMNAME"],
85
- ["MAKEBOOLEAN", "NEWBOOL", "TRUTH"],
86
- ["MAKESTR", "NEWSTR", "TEXT"],
87
- ["NEWLIST", "NEWLIST", "NUMNAME"],
88
- ["BASICCONDITION", "NEWCOND", "COMPARE"],
89
- ["LEFTSIDE", "COND", "INT"],
90
- ["RIGHTSIDE", "COND", "INT"],
91
- ["CHANGECOMPARE", "COND", "COMPARE"],
92
- ["WHILE", "COND", "NUMNAME"],
93
- ["COMPARE", "COND", "NUMNAME"],
94
- ["ELSECOMPARE", "COND", "NUMNAME"],
95
- ["ITERATE", "LIST", "NUMNAME"],
96
- ["WHILETRUE", "BOOL", "NUMNAME"],
97
- ["IFTRUE", "BOOL", "NUMNAME"],
98
- ["ELSEIF", "BOOL", "NUMNAME"],
99
- ["INTEGERFUNCTION", "NEWFUNC", "TYPE"],
100
- ["STRINGFUNCTION", "NEWFUNC", "TYPE"],
101
- ["LISTFUNCTION", "NEWFUNC", "TYPE"],
102
- ["RETURN", "VALUE", "STATE"],
103
- ["PRINTSTRING", "STR", "STATE"],
104
- ["PRINTINTEGER", "INT", "STATE"],
105
- ["SETINDEX", "LIST", "INT"],
106
- ["TYPETOINT", "STR", "INT"],
107
- ["GETSTRING", "LIST", "STR"],
108
- ["GETINTEGER", "LIST", "INT"],
109
- ["WRITEINTEGER", "LIST", "INT"],
110
- ["WRITESTRING", "LIST", "STR"],
111
- ["WRITEBOOL", "LIST", "BOOL"],
112
- ["WRITELIST", "LIST", "LIST"],
113
- ["GETLIST", "LIST", "LIST"],
114
- ["GETBOOL", "LIST", "BOOL"],
115
- ["GETTYPE", "LIST", "STR"],
116
- ["LENGTH", "LIST", "INT"],
117
- ["ADDVALUES", "INT", "INT"],
118
- ["MULTIPLY", "INT", "INT"],
119
- ["MATHPOW", "INT", "INT"],
120
- ["DIVIDE", "INT", "INT"],
121
- ["SIMPLEDIVIDE", "INT", "INT"],
122
- ["SUBTRACT", "INT", "INT"],
123
- ["MODULO", "INT", "INT"],
124
- ["COMBINE", "STR", "STR"],
125
- ["BLANKSPACES", "STR", "NUMNAME"],
126
- ["ADDSIZE", "LIST", "INT"],
127
- ["ASSSIGNINT", "INT", "INT"],
128
- ["STRINGASSIGN", "STR", "STR"],
129
- ["COPYLIST", "LIST", "LIST"],
130
- ]
131
- listsimplefunc = [tosimple(i) for i in listfunctions]
132
- listofindents = []
133
- listezfunc = [i[0] for i in listfunctions]
134
-
135
- # Variable Names only
136
- listintegers = ["TEMPORARY", "LOCALINT", "LOOPINTEGER"]
137
-
138
- # Immediate Number Names (Index 10)
139
- listnumnames = []
140
- word_to_num = {}
141
- for i in range(101):
142
- name = Number2Name.get_name(i)
143
- listnumnames.append(name)
144
- word_to_num[name] = str(i)
145
-
146
- liststrings = ["TEMPSTRING", "GLOBALSTR", "LOOPSTRING", "INTEGER", "STRING", "LIST", "BOOLEAN"]
147
- listlists = ["GLOBALLIST", "LOOPLIST"]
148
- listconds = ["THETRUTH"]
149
- listbools = ["LOOPBOOL"]
150
- liststate = ["STAY", "BREAK"]
151
- listype = ["INTEGER", "STRING", "LIST", "BOOLEAN"]
152
- lookuptype = {"INTEGER": "INT", "STRING": "STR", "LIST": "LIST", "BOOLEAN": "BOOL"}
153
- listtruth = ["TRUE", "FALSE"]
154
- listcompare = ["EQUALS", "BIGEQUALS", "BIGGER"]
155
- listtext = [] # Placeholder for raw text arguments (Index 11)
156
-
157
- listall = [
158
- listintegers,
159
- liststrings,
160
- listlists,
161
- listbools,
162
- listconds,
163
- liststate,
164
- listype,
165
- listezfunc,
166
- listtruth,
167
- listcompare,
168
- listnumnames, # Index 10: Immediates
169
- listtext, # Index 11: Text
170
  ]
171
- thetype = []
172
- insidefunction = False
173
- counter = 0
174
-
175
-
176
- def getsimples():
177
- return listsimplefunc
178
-
179
-
180
- def sendlines(i):
181
- global listofindents
182
- listofindents = [0] * max(i + 1, 1000)
183
-
184
-
185
- def giveindents():
186
- return listofindents
187
-
188
- def ocr_edit_distance(word1, word2):
189
- """
190
- Custom Levenshtein distance tailored for OCR.
191
- Heavily penalizes distant letter swaps (H vs Q), but forgives common OCR shapes.
192
- """
193
- word1, word2 = word1.upper(), word2.upper()
194
-
195
- # Common OCR confusions get a low penalty (0.5).
196
- # Add more to this dictionary as you find specific model confusions!
197
- low_cost_subs = {
198
- ('O', '0'): 0.5, ('0', 'O'): 0.5,
199
- ('I', '1'): 0.5, ('1', 'I'): 0.5,
200
- ('I', 'L'): 0.5, ('L', 'I'): 0.5,
201
- ('S', '5'): 0.5, ('5', 'S'): 0.5,
202
- ('Z', '2'): 0.5, ('2', 'Z'): 0.5,
203
- ('C', 'O'): 0.5, ('O', 'C'): 0.5,
204
- ('C', 'G'): 0.5, ('G', 'C'): 0.5,
205
- ('B', '8'): 0.5, ('8', 'B'): 0.5,
206
- ('D', 'O'): 0.5, ('O', 'D'): 0.5,
207
- ('E', 'F'): 0.5, ('F', 'E'): 0.5,
208
- ('A', '4'): 0.5, ('4', 'A'): 0.5,
209
- }
210
-
211
- m, n = len(word1), len(word2)
212
- dp = [[0.0] * (n + 1) for _ in range(m + 1)]
213
-
214
- for i in range(m + 1):
215
- dp[i][0] = i * 1.0 # Cost of deletion
216
- for j in range(n + 1):
217
- dp[0][j] = j * 1.0 # Cost of insertion
218
-
219
- for i in range(1, m + 1):
220
- for j in range(1, n + 1):
221
- if word1[i-1] == word2[j-1]:
222
- cost = 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  else:
224
- sub_pair = (word1[i-1], word2[j-1])
225
- # Generic substitution is penalized heavily (2.0)
226
- cost = low_cost_subs.get(sub_pair, 2.0)
227
-
228
- dp[i][j] = min(
229
- dp[i-1][j] + 1.0, # deletion
230
- dp[i][j-1] + 1.0, # insertion
231
- dp[i-1][j-1] + cost # substitution
232
- )
233
- return dp[m][n]
234
-
235
- def findword(somelist, word, use_ocr_weights=False):
236
- """
237
- Find the closest match to `word` in `somelist`.
238
-
239
- use_ocr_weights=True → ocr_edit_distance (custom weighted, no cap)
240
- used for function/command name lookups where OCR
241
- can produce arbitrarily garbled prefixes/suffixes.
242
- use_ocr_weights=False → standard edit_distance with a generous cap (32)
243
- used for argument vocab lookups (short words, small lists).
244
- """
245
- min_dist = 999
246
- tobereturned = [word, 0]
247
- lentobereturned = 16
248
- for b in range(len(somelist)):
249
- lenword = len(word)
250
- i = somelist[b]
251
- lenofi = len(i)
252
- if i == word:
253
- return [i, b]
254
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  if use_ocr_weights:
256
- distance = ocr_edit_distance(word, i)
257
  else:
258
- distance = edit_distance(word, i, 32)
259
- if distance < min_dist:
260
- min_dist = distance
261
- tobereturned = [i, b]
262
- lentobereturned = len(tobereturned[0])
263
- elif distance == min_dist:
264
- if abs(lenword - lenofi) < abs(lenword - lentobereturned):
265
- tobereturned = [i, b]
266
- lentobereturned = len(tobereturned[0])
267
-
268
- return tobereturned
269
-
270
-
271
- def handelfirstword(firstword):
272
- func, index = findword(listezfunc, firstword, use_ocr_weights=True)
273
- # Check if Arg2 (Index 2 in definition) is NUMNAME (Index 10 in listall)
274
- # We use listfunctions directly to check the string type
275
- if listfunctions[index][2] == "NUMNAME":
276
- return (func, index, 1)
277
- else:
278
- return (func, index, 0)
279
-
280
-
281
- def toline(line, index, listofindents):
282
- global counter
283
- global thetype
284
- global insidefunction
285
- disthreeline = line.split(" ")
286
- threeline = ["", "", ""]
287
- problem = False
288
- threeline[0] = listezfunc[index]
289
- simpler = listsimplefunc[index]
290
- if threeline[0] == "STRINGFUNCTION":
291
- if insidefunction:
292
- problem = True
293
- else:
294
- insidefunction = True
295
- threeline[1] = disthreeline[1]
296
- threeline[2] = findword(listype, disthreeline[2])[0]
297
- thetype.append(threeline[2])
298
- newsomething = [threeline[1], "STR", lookuptype[threeline[2]]]
299
- listezfunc.append(newsomething[0])
300
- listfunctions.append(newsomething)
301
- simplerer = tosimple(newsomething)
302
- listsimplefunc.append(simplerer)
303
- elif threeline[0] == "INTEGERFUNCTION":
304
- if insidefunction:
305
- problem = True
306
- else:
307
- insidefunction = True
308
- threeline[1] = disthreeline[1]
309
- threeline[2] = findword(listype, disthreeline[2])[0]
310
- thetype.append(threeline[2])
311
- newsomething = [threeline[1], "INT", lookuptype[threeline[2]]]
312
- listezfunc.append(newsomething[0])
313
- listfunctions.append(newsomething)
314
- simplerer = tosimple(newsomething)
315
- listsimplefunc.append(simplerer)
316
- elif threeline[0] == "LISTFUNCTION":
317
- if insidefunction:
318
- problem = True
319
- else:
320
- insidefunction = True
321
- threeline[1] = disthreeline[1]
322
- threeline[2] = findword(listype, disthreeline[2])[0]
323
- thetype.append(threeline[2])
324
- newsomething = [threeline[1], "LIST", lookuptype[threeline[2]]]
325
- listezfunc.append(newsomething[0])
326
- listfunctions.append(newsomething)
327
- simplerer = tosimple(newsomething)
328
- listsimplefunc.append(simplerer)
329
- elif simpler[0] == "RETURN":
330
- if len(thetype) == 0:
331
- problem = True
332
- else:
333
- threeline[0] = "RETURN"
334
- threeline[1] = findword(listall[listype.index(thetype[-1])], disthreeline[1])[0]
335
- threeline[2] = findword(liststate, disthreeline[2])[0]
336
- if threeline[2] == "BREAK":
337
- insidefunction = False
338
- thetype.pop()
339
- listofindents[counter] = -1
340
- else:
341
- # Arg 1
342
- if simpler[1] == 0:
343
- listall[simpler[2]].append(disthreeline[1])
344
- threeline[1] = disthreeline[1]
345
- else:
346
- threeline[1] = findword(listall[simpler[2]], disthreeline[1])[0]
347
- # If Arg 1 is a NUMNAME, replace with actual value
348
- if simpler[2] == 10:
349
- threeline[1] = word_to_num[threeline[1]]
350
-
351
- # Arg 2
352
- if simpler[3] < len(listall):
353
- threeline[2] = findword(listall[simpler[3]], disthreeline[2])[0]
354
- # If Arg 2 is a NUMNAME, replace with actual value
355
- if simpler[3] == 10:
356
- threeline[2] = word_to_num[threeline[2]]
357
- else:
358
- threeline[2] = disthreeline[2]
359
-
360
- # Use the now-numeric value in threeline[2] for indents
361
- # Only actual control flow: WHILE, ITERATE, COMPARE, ELSECOMPARE, WHILETRUE, IFTRUE, ELSEIF
362
- # NOT BASICCONDITION, CHANGECOMPARE (these aren't control flow, their arg2 isn't a line number)
363
- control_flow = {"WHILE", "ITERATE", "COMPARE", "ELSECOMPARE", "WHILETRUE", "IFTRUE", "ELSEIF"}
364
- if threeline[0] in control_flow:
365
- listofindents[counter] = 1
366
- listofindents[int(threeline[2])] = -1
367
- elif threeline[0] == "DEFINE":
368
- listfunctions.append(threeline[0])
369
- listofindents[int(counter)] = 1
370
- counter += 1
371
- if len(threeline[1]) == 0 or len(threeline[2]) == 0:
372
- problem = True
373
- return threeline
 
1
+ """
2
+ ErrorCorrection.py – Tzefa source-text parser and error-correcting compiler front-end.
3
+
4
+ TzefaParser converts raw text lines (e.g. from OCR) into validated 4-word
5
+ bytecode tuples consumed by topy.make_instruction().
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from typing import Any, Dict, List, Tuple
10
+
11
  from language import Number2Name
12
+ from language.dialects import (
13
+ THREE_WORD, CAPS_ONLY,
14
+ normalize_line, words_per_line, ALU_VERBS,
15
+ )
16
+ from language import topy
17
  from fast_edit_distance import edit_distance
18
 
19
 
20
+ # ---------------------------------------------------------------------------
21
+ # Instruction definitions now in 4-word form
22
+ # ---------------------------------------------------------------------------
23
+ # Each entry: [VERB, TYPE, ARG1_KIND, ARG2_KIND]
24
+ #
25
+ # ARG_KIND values:
26
+ # "NEWINT" – declares a new integer name
27
+ # "NEWSTR" – declares a new string name
28
+ # "NEWBOOL" – declares a new boolean name
29
+ # "NEWLIST" – declares a new list name
30
+ # "NEWCOND" – declares a new condition name
31
+ # "NEWFUNC" – declares a new function name
32
+ # "INT" – existing integer var
33
+ # "STR" – existing string var
34
+ # "LIST" existing list var
35
+ # "BOOL" – existing boolean var
36
+ # "COND" – existing condition
37
+ # "STATE" – STAY / BREAK
38
+ # "TYPE" – INTEGER / STRING / LIST / BOOLEAN
39
+ # "TRUTH" – TRUE / FALSE
40
+ # "COMPARE" – EQUALS / BIGEQUALS / BIGGER
41
+ # "NUMNAME" – numeric name (ZERO … ONEHUNDRED)
42
+ # "TEXT" – free text (no correction)
43
+ # "VALUE" – context-dependent (return var, resolved at parse time)
44
+
45
+ _BUILTIN_INSTRUCTIONS: List[List[str]] = [
46
+ # Variable declarations
47
+ ["MAKE", "INTEGER", "NEWINT", "NUMNAME"],
48
+ ["MAKE", "BOOLEAN", "NEWBOOL", "TRUTH"],
49
+ ["MAKE", "STRING", "NEWSTR", "TEXT"],
50
+ ["MAKE", "LIST", "NEWLIST", "NUMNAME"],
51
+ ["MAKE", "CONDITION", "NEWCOND", "COMPARE"],
52
+
53
+ # Condition manipulation
54
+ ["SET", "LEFT", "COND", "INT"],
55
+ ["SET", "RIGHT", "COND", "INT"],
56
+ ["CHANGE", "COMPARE", "COND", "COMPARE"],
57
+
58
+ # Control flow
59
+ ["WHILE", "CONDITION", "COND", "NUMNAME"],
60
+ ["IF", "CONDITION", "COND", "NUMNAME"],
61
+ ["ELIF", "CONDITION", "COND", "NUMNAME"],
62
+ ["ITERATE", "LIST", "LIST", "NUMNAME"],
63
+ ["WHILE", "BOOLEAN", "BOOL", "NUMNAME"],
64
+ ["IF", "BOOLEAN", "BOOL", "NUMNAME"],
65
+ ["ELIF", "BOOLEAN", "BOOL", "NUMNAME"],
66
+
67
+ # Function definition
68
+ ["FUNCTION", "INTEGER", "NEWFUNC", "TYPE"],
69
+ ["FUNCTION", "STRING", "NEWFUNC", "TYPE"],
70
+ ["FUNCTION", "LIST", "NEWFUNC", "TYPE"],
71
+
72
+ # Return
73
+ ["RETURN", "VALUE", "VALUE", "STATE"],
74
+
75
+ # Print
76
+ ["PRINT", "STRING", "STR", "STATE"],
77
+ ["PRINT", "INTEGER", "INT", "STATE"],
78
+
79
+ # Assignment / copy
80
+ ["SET", "INTEGER", "INT", "INT"],
81
+ ["SET", "STRING", "STR", "STR"],
82
+ ["SET", "LIST", "LIST", "LIST"],
83
+ ["SET", "INDEX", "LIST", "NUMNAME"],
84
+
85
+ # Type introspection
86
+ ["TYPE", "TOINT", "STR", "INT"],
87
+
88
+ # List read
89
+ ["GET", "STRING", "LIST", "STR"],
90
+ ["GET", "INTEGER", "LIST", "INT"],
91
+ ["GET", "BOOLEAN", "LIST", "BOOL"],
92
+ ["GET", "LIST", "LIST", "LIST"],
93
+ ["GET", "TYPE", "LIST", "STR"],
94
+ ["GET", "LENGTH", "LIST", "INT"],
95
+
96
+ # List write
97
+ ["WRITE", "INTEGER", "LIST", "INT"],
98
+ ["WRITE", "STRING", "LIST", "STR"],
99
+ ["WRITE", "BOOLEAN", "LIST", "BOOL"],
100
+ ["WRITE", "LIST", "LIST", "LIST"],
101
+
102
+ # Arithmetic — layout: [VERB, DEST, SRC1, SRC2]
103
+ # arg1_kind=INT is the dest (existing or new), arg2/3 are sources
104
+ ["ADD", "INT", "INT", "INT"],
105
+ ["MULTIPLY", "INT", "INT", "INT"],
106
+ ["POWER", "INT", "INT", "INT"],
107
+ ["DIVIDE", "INT", "INT", "INT"],
108
+ ["SIMPLEDIVIDE","INT", "INT", "INT"],
109
+ ["SUBTRACT", "INT", "INT", "INT"],
110
+ ["MODULO", "INT", "INT", "INT"],
111
+
112
+ # String ops — COMBINE layout: [COMBINE, DEST, SRC1, SRC2]
113
+ ["COMBINE", "STR", "STR", "STR"],
114
+ ["PAD", "STRING", "STR", "NUMNAME"],
115
+
116
+ # List resize — ADD SIZE layout: [ADD, SIZE, listname, int_amount]
117
+ ["ADD", "SIZE", "LIST", "INT"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  ]
119
+
120
+ # Which kinds declare new names (start with "NEW")
121
+ _NEW_KINDS = {"NEWINT", "NEWSTR", "NEWBOOL", "NEWLIST", "NEWCOND", "NEWFUNC"}
122
+
123
+ # Kind → bucket index in the all_names list
124
+ _KIND_TO_BUCKET: Dict[str, int] = {
125
+ "INT": 0, "NEWINT": 0,
126
+ "STR": 1, "NEWSTR": 1,
127
+ "LIST": 2, "NEWLIST": 2,
128
+ "BOOL": 3, "NEWBOOL": 3,
129
+ "COND": 4, "NEWCOND": 4,
130
+ "STATE": 5,
131
+ "TYPE": 6,
132
+ "NEWFUNC": 7,
133
+ "TRUTH": 8,
134
+ "COMPARE": 9,
135
+ "NUMNAME": 10,
136
+ "TEXT": 11,
137
+ "VALUE": -1, # resolved dynamically
138
+ }
139
+
140
+ # The lookup type for function return types
141
+ _FUNC_TYPE_MAP: Dict[str, str] = {
142
+ "INTEGER": "INT", "STRING": "STR", "LIST": "LIST", "BOOLEAN": "BOOL",
143
+ }
144
+
145
+
146
+ class TzefaParser:
147
+ """Parse and error-correct Tzefa source lines into 4-word bytecode."""
148
+
149
+ def __init__(
150
+ self,
151
+ dialect: str = THREE_WORD,
152
+ casing: str = CAPS_ONLY,
153
+ ) -> None:
154
+ self.dialect = dialect
155
+ self.casing = casing
156
+
157
+ # Build instruction table from the static definitions
158
+ self.instructions: List[List[str]] = [row[:] for row in _BUILTIN_INSTRUCTIONS]
159
+
160
+ # Opcode keys: (VERB, TYPE) tuples for lookup
161
+ self.opcode_keys: List[Tuple[str, str]] = [(r[0], r[1]) for r in self.instructions]
162
+
163
+ # Name buckets for fuzzy-matching (index-aligned with _KIND_TO_BUCKET)
164
+ self.all_names: List[List[str]] = [
165
+ # 0: INT names
166
+ ["TEMPORARY", "LOCALINT", "LOOPINTEGER"],
167
+ # 1: STR names
168
+ ["TEMPSTRING", "GLOBALSTR", "LOOPSTRING",
169
+ "INTEGER", "STRING", "LIST", "BOOLEAN"],
170
+ # 2: LIST names
171
+ ["GLOBALLIST", "LOOPLIST"],
172
+ # 3: BOOL names
173
+ ["LOOPBOOL"],
174
+ # 4: COND names
175
+ ["THETRUTH"],
176
+ # 5: STATE
177
+ ["STAY", "BREAK"],
178
+ # 6: TYPE
179
+ ["INTEGER", "STRING", "LIST", "BOOLEAN"],
180
+ # 7: opcode verbs (populated below)
181
+ [],
182
+ # 8: TRUTH
183
+ ["TRUE", "FALSE"],
184
+ # 9: COMPARE
185
+ ["EQUALS", "BIGEQUALS", "BIGGER"],
186
+ # 10: NUMNAME
187
+ [],
188
+ # 11: TEXT (free, no correction)
189
+ [],
190
+ ]
191
+
192
+ # Populate bucket 7 (opcode verbs) from instruction table
193
+ seen_verbs: set = set()
194
+ for row in self.instructions:
195
+ key = (row[0], row[1])
196
+ label = f"{row[0]}_{row[1]}"
197
+ if label not in seen_verbs:
198
+ seen_verbs.add(label)
199
+ self.all_names[7].append(label)
200
+
201
+ # Numeric name immediates
202
+ self.word_to_num: Dict[str, str] = {}
203
+ for i in range(101):
204
+ name = Number2Name.get_name(i)
205
+ self.all_names[10].append(name)
206
+ self.word_to_num[name] = str(i)
207
+
208
+ # Build verb→[valid types] lookup for sequential word matching
209
+ self._verb_to_types: Dict[str, List[str]] = {}
210
+ for row in self.instructions:
211
+ v, t = row[0], row[1]
212
+ if v not in self._verb_to_types:
213
+ self._verb_to_types[v] = []
214
+ if t not in self._verb_to_types[v]:
215
+ self._verb_to_types[v].append(t)
216
+
217
+ # Deduplicated verb list (order preserved, for fuzzy matching)
218
+ # Always include CALL even before functions are registered
219
+ self._verb_list: List[str] = ["CALL"]
220
+ for row in self.instructions:
221
+ if row[0] not in self._verb_list:
222
+ self._verb_list.append(row[0])
223
+
224
+ # Indent tracking
225
+ self.indent_table: List[int] = []
226
+
227
+ # Function definition state
228
+ self.function_type_stack: List[str] = []
229
+ self.inside_function: bool = False
230
+ self.line_counter: int = 0
231
+
232
+ # ------------------------------------------------------------------
233
+ # Public interface
234
+ # ------------------------------------------------------------------
235
+
236
+ @property
237
+ def expected_words_per_line(self) -> int:
238
+ return words_per_line(self.dialect)
239
+
240
+ def normalize_source_line(self, raw_tokens: List[str]) -> List[str]:
241
+ """Normalize raw tokens into a canonical 4-word CAPS tuple."""
242
+ return normalize_line(raw_tokens, self.dialect, self.casing)
243
+
244
+ def init_indent_table(self, line_count: int) -> None:
245
+ """Allocate the indent-change table for *line_count* lines."""
246
+ self.indent_table = [0] * max(line_count + 2, 1002)
247
+
248
+ def get_indent_table(self) -> List[int]:
249
+ return self.indent_table
250
+
251
+ def match_opcode(self, verb: str, type_word: str) -> Tuple[int, List[str]]:
252
+ """Exact lookup of (verb, type_word) → instruction row."""
253
+ key = (verb, type_word)
254
+ for i, k in enumerate(self.opcode_keys):
255
+ if k == key:
256
+ return i, self.instructions[i]
257
+ return 0, self.instructions[0]
258
+
259
+ def parse_line(self, quad: List[str]) -> List[str]:
260
+ """
261
+ Sequential error-correction:
262
+ W1 → fuzzy match against verb list
263
+ W2 → fuzzy match against valid types for that verb
264
+ (ALU: dest var auto-registered; CALL: known function names)
265
+ W3,W4 → resolved by the spec (arg1_kind, arg2_kind)
266
+ """
267
+ while len(quad) < 4:
268
+ quad.append("")
269
+
270
+ # ── W1: verb ─────────────────────────────────────────────────────────
271
+ verb = self.find_word(self._verb_list, quad[0], use_ocr_weights=True)[0]
272
+
273
+ # ── ALU fast path (W2 = dest var, W3/W4 = sources) ──────────────────
274
+ if verb in ALU_VERBS:
275
+ # ADD SIZE is the non-ALU outlier — treat normally
276
+ if verb == "ADD":
277
+ size_types = self._verb_to_types.get("ADD", [])
278
+ w2 = self.find_word(size_types, quad[1], use_ocr_weights=True)[0]
279
+ if w2 == "SIZE":
280
+ # fall through to standard path
281
+ type_word = "SIZE"
282
+ verb = "ADD"
283
+ _, spec = self.match_opcode(verb, type_word)
284
+ result = [verb, type_word,
285
+ self._resolve_arg(spec[2], quad[2]),
286
+ self._resolve_arg(spec[3], quad[3])]
287
+ self.line_counter += 1
288
+ return result
289
+ if verb == "COMBINE":
290
+ dest = self._resolve_arg("STR", quad[1])
291
+ src1 = self._resolve_arg("STR", quad[2])
292
+ src2 = self._resolve_arg("STR", quad[3])
293
+ else:
294
+ dest = self._resolve_arg("INT", quad[1])
295
+ src1 = self._resolve_arg("INT", quad[2])
296
+ src2 = self._resolve_arg("INT", quad[3])
297
+ self.line_counter += 1
298
+ return [verb, dest, src1, src2]
299
+
300
+ # ── CALL (W2 = function name, W3 = input var, W4 = output var) ───────
301
+ if verb == "CALL":
302
+ known_funcs = [k[1] for k in self.opcode_keys if k[0] == "CALL"]
303
+ func_name = self.find_word(known_funcs, quad[1], use_ocr_weights=True)[0] if known_funcs else quad[1]
304
+ func_spec = next((r for r in self.instructions if r[0] == "CALL" and r[1] == func_name), None)
305
+ arg1 = self._resolve_arg(func_spec[2] if func_spec else "INT", quad[2])
306
+ arg2 = self._resolve_arg("INT", quad[3])
307
+ self.line_counter += 1
308
+ return ["CALL", func_name, arg1, arg2]
309
+
310
+ # ── W2: type keyword for this verb ───────────────────────────────────
311
+ valid_types = self._verb_to_types.get(verb, [])
312
+ type_word = self.find_word(valid_types, quad[1], use_ocr_weights=True)[0] if valid_types else quad[1]
313
+
314
+ # ── Look up full spec ─────────────────────────────────────────────────
315
+ _, spec = self.match_opcode(verb, type_word)
316
+ arg1_kind, arg2_kind = spec[2], spec[3]
317
+
318
+ result = [verb, type_word, "", ""]
319
+
320
+ # ── FUNCTION ─────────────────────────────────────────────────────────
321
+ if verb == "FUNCTION":
322
+ if not self.inside_function:
323
+ self.inside_function = True
324
+ func_name = quad[2] # new name, register as-is
325
+ param_type = self.find_word(self.all_names[6], quad[3], use_ocr_weights=True)[0]
326
+ result[2] = func_name
327
+ result[3] = param_type
328
+ self.function_type_stack.append(type_word)
329
+ topy.register_user_function(
330
+ func_name,
331
+ _FUNC_TYPE_MAP.get(type_word, "INT"),
332
+ _FUNC_TYPE_MAP.get(param_type, "INT"),
333
+ )
334
+ self.opcode_keys.append(("CALL", func_name))
335
+ self.instructions.append(["CALL", func_name, "INT", "INT"])
336
+ if "CALL" not in self._verb_to_types:
337
+ self._verb_to_types["CALL"] = []
338
+ if func_name not in self._verb_to_types["CALL"]:
339
+ self._verb_to_types["CALL"].append(func_name)
340
+ label = f"CALL_{func_name}"
341
+ if label not in self.all_names[7]:
342
+ self.all_names[7].append(label)
343
+
344
+ # ── RETURN ────────────────────────────────────────────────────────────
345
+ elif verb == "RETURN":
346
+ if self.function_type_stack:
347
+ ret_kind = _FUNC_TYPE_MAP.get(self.function_type_stack[-1], "INT")
348
+ bucket = _KIND_TO_BUCKET.get(ret_kind, 0)
349
+ result[2] = self.find_word(self.all_names[bucket], quad[2], use_ocr_weights=True)[0]
350
  else:
351
+ result[2] = quad[2]
352
+ result[3] = self.find_word(self.all_names[5], quad[3], use_ocr_weights=True)[0]
353
+ if result[3] == "BREAK" and self.function_type_stack:
354
+ self.inside_function = False
355
+ self.function_type_stack.pop()
356
+ self.indent_table[self.line_counter] = -1
357
+
358
+ # ── Everything else ───────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  else:
360
+ result[2] = self._resolve_arg(arg1_kind, quad[2])
361
+ result[3] = self._resolve_arg(arg2_kind, quad[3])
362
+
363
+ # Control-flow indent tracking
364
+ if verb in {"WHILE", "IF", "ELIF", "ITERATE"}:
365
+ self.indent_table[self.line_counter] = 1
366
+ try:
367
+ self.indent_table[int(result[3])] = -1
368
+ except (ValueError, IndexError):
369
+ pass
370
+
371
+ self.line_counter += 1
372
+ return result
373
+
374
+ # ------------------------------------------------------------------
375
+ # Argument resolution
376
+ # ------------------------------------------------------------------
377
+
378
+
379
+ def _resolve_arg(self, kind: str, raw: str) -> str:
380
+ """Resolve a single argument against its kind's name bucket via fuzzy-match."""
381
+ if not kind or kind == "VALUE":
382
+ return raw
383
+
384
+ bucket_idx = _KIND_TO_BUCKET.get(kind, -1)
385
+ if bucket_idx < 0 or bucket_idx >= len(self.all_names):
386
+ return raw
387
+
388
+ # New-name kinds: register as-is, no correction
389
+ if kind in _NEW_KINDS:
390
+ if raw and raw not in self.all_names[bucket_idx]:
391
+ self.all_names[bucket_idx].append(raw)
392
+ return raw
393
+
394
+ # NUMNAME: digit strings pass through, words get fuzzy-matched then converted
395
+ if kind == "NUMNAME" and raw.isdigit():
396
+ return raw
397
+
398
+ # Fuzzy-match against the bucket — always, no exceptions
399
+ matched, _ = self.find_word(self.all_names[bucket_idx], raw, use_ocr_weights=True)
400
+
401
+ if kind == "NUMNAME":
402
+ matched = self.word_to_num.get(matched, matched)
403
+
404
+ return matched
405
+
406
+ # ------------------------------------------------------------------
407
+ # Edit distance helpers
408
+ # ------------------------------------------------------------------
409
+
410
+ @staticmethod
411
+ def ocr_edit_distance(word1: str, word2: str) -> float:
412
+ """Levenshtein distance with reduced cost for common OCR confusions."""
413
+ word1, word2 = word1.upper(), word2.upper()
414
+
415
+ _LOW_COST: Dict[Tuple[str, str], float] = {
416
+ ('O', '0'): 0.5, ('0', 'O'): 0.5,
417
+ ('I', '1'): 0.5, ('1', 'I'): 0.5,
418
+ ('I', 'L'): 0.5, ('L', 'I'): 0.5,
419
+ ('S', '5'): 0.5, ('5', 'S'): 0.5,
420
+ ('Z', '2'): 0.5, ('2', 'Z'): 0.5,
421
+ ('C', 'O'): 0.5, ('O', 'C'): 0.5,
422
+ ('C', 'G'): 0.5, ('G', 'C'): 0.5,
423
+ ('B', '8'): 0.5, ('8', 'B'): 0.5,
424
+ ('D', 'O'): 0.5, ('O', 'D'): 0.5,
425
+ ('E', 'F'): 0.5, ('F', 'E'): 0.5,
426
+ ('A', '4'): 0.5, ('4', 'A'): 0.5,
427
+ }
428
+
429
+ m, n = len(word1), len(word2)
430
+ dp = [[0.0] * (n + 1) for _ in range(m + 1)]
431
+ for i in range(m + 1):
432
+ dp[i][0] = float(i)
433
+ for j in range(n + 1):
434
+ dp[0][j] = float(j)
435
+ for i in range(1, m + 1):
436
+ for j in range(1, n + 1):
437
+ if word1[i - 1] == word2[j - 1]:
438
+ cost = 0.0
439
+ else:
440
+ cost = _LOW_COST.get((word1[i - 1], word2[j - 1]), 2.0)
441
+ dp[i][j] = min(
442
+ dp[i - 1][j] + 1.0,
443
+ dp[i][j - 1] + 1.0,
444
+ dp[i - 1][j - 1] + cost,
445
+ )
446
+ return dp[m][n]
447
+
448
+ @staticmethod
449
+ def find_word(
450
+ name_list: List[str],
451
+ word: str,
452
+ use_ocr_weights: bool = False,
453
+ ) -> Tuple[str, int]:
454
+ """Return the closest match to *word* in *name_list* and its index."""
455
+ if not name_list:
456
+ return word, 0
457
+
458
+ min_dist = 999.0
459
+ best: List[Any] = [word, 0]
460
+ best_len = 16
461
+ word_len = len(word)
462
+
463
+ for idx, item in enumerate(name_list):
464
+ if item == word:
465
+ return item, idx
466
+
467
  if use_ocr_weights:
468
+ dist = TzefaParser.ocr_edit_distance(word, item)
469
  else:
470
+ dist = float(edit_distance(word, item, 32))
471
+
472
+ item_len = len(item)
473
+ if dist < min_dist:
474
+ min_dist = dist
475
+ best = [item, idx]
476
+ best_len = item_len
477
+ elif dist == min_dist:
478
+ if abs(word_len - item_len) < abs(word_len - best_len):
479
+ best = [item, idx]
480
+ best_len = item_len
481
+
482
+ return tuple(best)
483
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
language/Number2Name.py CHANGED
@@ -1,15 +1,24 @@
1
- def get_name(number):
2
- namearray = ['ZERO', 'ONE', 'TWO', 'THREE', 'FOUR', 'FIVE', 'SIX', 'SEVEN', 'EIGHT', 'NINE', 'TEN', 'ELEVEN',
3
- 'TWELVE', 'THIRTEEN', 'FOURTEEN', 'FIFTEEN', 'SIXTEEN', 'SEVENTEEN', 'EIGHTEEN', 'NINETEEN', 'TWENTY',
4
- 'TWENTYONE', 'TWENTYTWO', 'TWENTYTHREE', 'TWENTYFOUR', 'TWENTYFIVE', 'TWENTYSIX', 'TWENTYSEVEN',
5
- 'TWENTYEIGHT', 'TWENTYNINE', 'THIRTY', 'THIRTYONE', 'THIRTYTWO', 'THIRTYTHREE', 'THIRTYFOUR',
6
- 'THIRTYFIVE', 'THIRTYSIX', 'THIRTYSEVEN', 'THIRTYEIGHT', 'THIRTYNINE', 'FORTY', 'FORTYONE', 'FORTYTWO',
7
- 'FORTYTHREE', 'FORTYFOUR', 'FORTYFIVE', 'FORTYSIX', 'FORTYSEVEN', 'FORTYEIGHT', 'FORTYNINE', 'FIFTY',
8
- 'FIFTYONE', 'FIFTYTWO', 'FIFTYTHREE', 'FIFTYFOUR', 'FIFTYFIVE', 'FIFTYSIX', 'FIFTYSEVEN', 'FIFTYEIGHT',
9
- 'FIFTYNINE', 'SIXTY', 'SIXTYONE', 'SIXTYTWO', 'SIXTYTHREE', 'SIXTYFOUR', 'SIXTYFIVE', 'SIXTYSIX',
10
- 'SIXTYSEVEN', 'SIXTYEIGHT', 'SIXTYNINE', 'SEVENTY', 'SEVENTYONE', 'SEVENTYTWO', 'SEVENTYTHREE',
11
- 'SEVENTYFOUR', 'SEVENTYFIVE', 'SEVENTYSIX', 'SEVENTYSEVEN', 'SEVENTYEIGHT', 'SEVENTYNINE', 'EIGHTY',
12
- 'EIGHTYONE', 'EIGHTYTWO', 'EIGHTYTHREE', 'EIGHTYFOUR', 'EIGHTYFIVE', 'EIGHTYSIX', 'EIGHTYSEVEN',
13
- 'EIGHTYEIGHT', 'EIGHTYNINE', 'NINETY', 'NINETYONE', 'NINETYTWO', 'NINETYTHREE', 'NINETYFOUR',
14
- 'NINETYFIVE', 'NINETYSIX', 'NINETYSEVEN', 'NINETYEIGHT', 'NINETYNINE', 'ONEHUNDRED']
15
- return namearray[number]
 
 
 
 
 
 
 
 
 
 
1
+ # python
2
+ from typing import Tuple
3
+
4
+ NUMBER_NAMES: Tuple[str, ...] = (
5
+ 'ZERO', 'ONE', 'TWO', 'THREE', 'FOUR', 'FIVE', 'SIX', 'SEVEN', 'EIGHT', 'NINE', 'TEN', 'ELEVEN',
6
+ 'TWELVE', 'THIRTEEN', 'FOURTEEN', 'FIFTEEN', 'SIXTEEN', 'SEVENTEEN', 'EIGHTEEN', 'NINETEEN', 'TWENTY',
7
+ 'TWENTYONE', 'TWENTYTWO', 'TWENTYTHREE', 'TWENTYFOUR', 'TWENTYFIVE', 'TWENTYSIX', 'TWENTYSEVEN',
8
+ 'TWENTYEIGHT', 'TWENTYNINE', 'THIRTY', 'THIRTYONE', 'THIRTYTWO', 'THIRTYTHREE', 'THIRTYFOUR',
9
+ 'THIRTYFIVE', 'THIRTYSIX', 'THIRTYSEVEN', 'THIRTYEIGHT', 'THIRTYNINE', 'FORTY', 'FORTYONE', 'FORTYTWO',
10
+ 'FORTYTHREE', 'FORTYFOUR', 'FORTYFIVE', 'FORTYSIX', 'FORTYSEVEN', 'FORTYEIGHT', 'FORTYNINE', 'FIFTY',
11
+ 'FIFTYONE', 'FIFTYTWO', 'FIFTYTHREE', 'FIFTYFOUR', 'FIFTYFIVE', 'FIFTYSIX', 'FIFTYSEVEN', 'FIFTYEIGHT',
12
+ 'FIFTYNINE', 'SIXTY', 'SIXTYONE', 'SIXTYTWO', 'SIXTYTHREE', 'SIXTYFOUR', 'SIXTYFIVE', 'SIXTYSIX',
13
+ 'SIXTYSEVEN', 'SIXTYEIGHT', 'SIXTYNINE', 'SEVENTY', 'SEVENTYONE', 'SEVENTYTWO', 'SEVENTYTHREE',
14
+ 'SEVENTYFOUR', 'SEVENTYFIVE', 'SEVENTYSIX', 'SEVENTYSEVEN', 'SEVENTYEIGHT', 'SEVENTYNINE', 'EIGHTY',
15
+ 'EIGHTYONE', 'EIGHTYTWO', 'EIGHTYTHREE', 'EIGHTYFOUR', 'EIGHTYFIVE', 'EIGHTYSIX', 'EIGHTYSEVEN',
16
+ 'EIGHTYEIGHT', 'EIGHTYNINE', 'NINETY', 'NINETYONE', 'NINETYTWO', 'NINETYTHREE', 'NINETYFOUR',
17
+ 'NINETYFIVE', 'NINETYSIX', 'NINETYSEVEN', 'NINETYEIGHT', 'NINETYNINE', 'ONEHUNDRED'
18
+ )
19
+
20
+ def get_name(number: int) -> str:
21
+ """Convert an integer (0-100) to its English uppercase name."""
22
+ if not 0 <= number < len(NUMBER_NAMES):
23
+ raise ValueError(f'number out of range: {number}')
24
+ return NUMBER_NAMES[number]
language/__pycache__/topy.cpython-313.pyc ADDED
Binary file (22.1 kB). View file
 
language/createdpython.py CHANGED
@@ -1,760 +1,985 @@
1
- ### finally found the bug
2
- ### here it is:
3
- ### lets say function f calls a new instance of itself
4
- ### the new instance creates cond b and a new instance of itself
5
- ### the new instance closes and returns to the instance with cond b that also closes itself
6
- ### now the original instance still owns cond b since no function was called to clean the stack or dict between the exiting calls
7
- dicte = {"EQUALS": 0, "BIGEQUALS": 1, "BIGGER": 2}
8
- listfunctions = [(lambda x, y: x == y), (lambda x, y: x >= y), (lambda x, y: x > y)]
9
  import sys
10
- import language.Number2Name as Number2Name
11
- linecount = 0
12
- currentline = 0
13
- linelimit = 1000
14
- functionlimit = 25
15
- functioncount = 0
16
- printed = ""
17
- alltheconds = {}
18
- reserveconds = {}
19
- reserveloc = {}
20
- reserveglob = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
21
-
22
-
23
- def line(linenum):
24
- global currentline
25
- currentline = linenum
26
- # if("LISTOFTWO" in allthevars["LIST"]):
27
- # print(linenum,getvar("LIST","LISTOFTWO").values)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  return True
29
 
30
 
 
 
 
 
31
  class Node:
32
- def __init__(self, value):
 
 
33
  self.value = value
34
- self.next = None
35
 
36
- def setnext(self, nextvalue):
37
- self.next = Node(value=nextvalue)
 
38
 
39
- def setnextnode(self, next):
40
- self.next = next
 
41
 
42
- def getnext(self):
 
43
  return self.next
44
 
45
- def getvalue(self):
 
46
  return self.value
47
 
48
 
49
  class Stack:
 
 
50
  def __init__(self):
51
- self.top = None
52
- self.list = []
53
 
54
- def isempty(self):
 
55
  return len(self.list) == 0
56
 
57
- def push(self, value):
 
58
  self.list.append(value)
59
 
60
- def pop(self):
 
61
  return self.list.pop()
62
 
63
 
64
- functioncalls = Stack()
65
 
66
 
67
- def addcond(name, compare):
68
- global alltheconds
69
- if (name in alltheconds):
70
- errore.varexistserror(name)
71
- else:
72
- alltheconds[name] = COND(compare)
73
 
 
 
74
 
75
- def addlocalcond(name, compare):
76
- global alltheconds, allthelocalconds
77
- if (name in alltheconds):
78
- errore.varexistserror(name)
79
- else:
80
- alltheconds[name] = COND(compare)
81
- allthelocalconds[name] = alltheconds[name]
82
- return allthelocalconds
83
-
84
-
85
- # import dis
86
- # print(dis.dis(addlocalcond))
87
- def movetonewconds(localconds):
88
- global stackoflocalconds
89
- global alltheconds
90
- global reserveconds
91
- for i in localconds:
92
- del alltheconds[i]
93
- stackoflocalconds.push(localconds)
94
-
95
-
96
- def backtooldconds():
97
- global stackoflocalconds
98
- popped = stackoflocalconds.pop()
99
- global alltheconds
100
- global reserveconds
101
- for i in reserveconds:
102
- if i in alltheconds:
103
- del alltheconds[i]
104
- reserveconds = {}
105
- for i in popped:
106
- alltheconds[i] = popped[i]
107
- reserveconds[i] = popped[i]
108
- return popped
109
-
110
-
111
- def getcond(name):
112
- global alltheconds
113
- if (name in alltheconds):
114
- return alltheconds[name]
115
- else:
116
- errore.doesntexisterror(name)
117
-
118
-
119
- def printvars():
120
- global allthevars, printed
121
- print("END OF PROGRAM")
122
- print()
123
- for i in allthevars:
124
- print("All the vars used from type " + i)
125
- for j in allthevars[i]:
126
- if (allthevars[i][j].iswritable()):
127
- print(j + " : " + allthevars[i][j].tostring())
128
- else:
129
- pass
130
- print("")
131
- print("All that was printed during the program")
132
- print(printed)
133
 
 
 
 
 
 
 
 
134
 
135
- def addvar(type, name, value):
136
- global allthevars
137
- if name in allthevars[type]:
138
- errore.varexistserror(name)
139
- if (type == "LIST"):
140
- allthevars[type][name] = LIST(name, value, True, True, type)
141
- else:
142
- allthevars[type][name] = VALUE(name, value, True, True, type)
143
 
 
 
 
 
 
 
144
 
145
- def getvar(type, name):
146
- global allthevars
147
- if (name in allthevars[type]):
148
- return allthevars[type][name]
149
- else:
150
- errore.doesntexisterror(name)
 
151
 
 
 
 
 
 
 
 
152
 
153
- def addlocalvar(type, name, value):
154
- global dictlocalvars, allthevars
155
- allthelocalrvars = dictlocalvars
156
- if name in allthevars[type]:
157
- errore.varexistserror(name)
158
- if (type == "LIST"):
159
- allthevars[type][name] = LIST(name, value, True, True, type)
160
- allthelocalrvars[type][name] = allthevars[type][name]
161
 
162
- else:
163
- allthevars[type][name] = VALUE(name, value, True, True, type)
164
- allthelocalrvars[type][name] = allthevars[type][name]
 
 
 
 
165
 
 
 
 
 
 
 
 
166
 
167
- def switchtonewcall(dict1: dict, dict2: dict, stack: Stack):
168
- for i in dict2:
169
- for j in dict2[i]:
170
- del dict1[i][j]
171
- stack.push(dict2)
172
- return {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
173
 
 
 
 
 
 
 
 
174
 
175
- def returntoolastcall(dict1: dict):
176
- stack = localsvars
177
- lastcall = stack.pop()
178
- global reserveloc
179
- for i in reserveloc:
180
- for j in reserveloc[i]:
181
- if j in dict1[i]:
182
- del dict1[i][j]
183
- reserveloc = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
184
- for i in lastcall:
185
- for j in lastcall[i]:
186
- dict1[i][j] = lastcall[i][j]
187
- reserveloc[i][j] = lastcall[i][j]
188
- return lastcall
189
 
190
 
191
- def Print(var, newline):
192
- global printed
193
- toprint = var.tostring() + newline * '\n' + ' ' * (1 - newline)
194
- print(toprint, end='')
195
- printed = printed + toprint
196
 
 
 
197
 
198
- class VALUE:
199
- def __init__(self, name, value, readable, writable, TYPE):
200
  self.name = name
201
  self.value = value
202
  self.readable = readable
203
  self.writable = writable
204
- self.type = TYPE
205
 
206
- def write(self, value):
207
- if (self.writable == True):
 
208
  self.value = value
209
  else:
210
- errore.writeerror(self.name, value)
211
 
212
- def forcewrite(self, value):
 
213
  self.value = value
214
 
215
- def read(self):
216
- if (self.readable == True):
 
217
  return self.value
218
  else:
219
- errore.readerror(self.name)
220
 
221
- def forceread(self):
 
222
  return self.value
223
 
224
- def changeread(self, readstatus):
225
- self.readable = readstatus
 
226
 
227
- def changewrite(self, writestatus):
228
- self.writable = writestatus
 
229
 
230
- def getname(self):
 
231
  return self.name
232
 
233
- def iswritable(self):
 
234
  return self.writable
235
 
236
- def isreadable(self):
 
237
  return self.readable
238
 
239
- def tostring(self):
 
240
  return str(self.value)
241
 
242
- def givetype(self):
 
243
  return self.type
244
 
245
- def override(self, value):
 
246
  self.value = value
247
 
248
- def makecopy(self):
249
- return VALUE(self.name, self.value, self.readable, True, self.type)
 
250
 
251
- def copyvar(self, vari):
252
- if (self.type != vari.type):
253
- errore.typeerror(self.name, self.type, vari.type)
 
254
  else:
255
- if (vari.isreadable() == False):
256
- errore.readerror(vari.getname())
257
  else:
258
- self.value = vari.value
259
 
260
 
261
- def add(Vali1, vali2):
262
- getvar("INT", "TEMPORARY").forcewrite(getvar("INT", Vali1).read() + getvar("INT", vali2).read())
 
263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
 
265
- def dec(Vali1, vali2):
266
- getvar("INT", "TEMPORARY").forcewrite(getvar("INT", Vali1).read() - getvar("INT", vali2).read())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
268
- def mult(Vali1, vali2):
269
- getvar("INT", "TEMPORARY").forcewrite(getvar("INT", Vali1).read() * getvar("INT", vali2).read())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
- def div(Vali1, vali2):
273
- if (getvar("INT", vali2).read() == 0):
274
- errore.DIVZEROERROR(vali2)
275
- getvar("INT", "TEMPORARY").forcewrite(getvar("INT", Vali1).read() // getvar("INT", vali2).read())
 
 
 
276
 
 
 
 
277
 
278
- def betterdiv(Vali1, vali2):
279
- if (getvar("INT", vali2).read() == 0):
280
- errore.DIVZEROERROR(vali2)
281
- getvar("INT", "TEMPORARY").forcewrite(getvar("INT", Vali1).read() / getvar("INT", vali2).read())
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
- def pow(Vali1, vali2):
285
- getvar("INT", "TEMPORARY").forcewrite(getvar("INT", Vali1).read() ** getvar("INT", vali2).read())
 
286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
- def mod(Vali1, vali2):
289
- if (getvar("INT", vali2).read() == 0):
290
- errore.DIVZEROERROR(vali2)
291
- getvar("INT", "TEMPORARY").forcewrite(int(getvar("INT", Vali1).read() % getvar("INT", vali2).read()))
292
 
 
 
 
293
 
294
- def comb(Vali1, vali2):
295
- getvar("STR", "TEMPSTRING").forcewrite(getvar("STR", Vali1).read() + getvar("STR", vali2).read())
 
296
 
 
 
 
297
 
298
- def addsize(Vali1, vali2):
299
- getvar("LIST", Vali1).addsize(getvar("INT", vali2).read())
 
300
 
 
 
 
301
 
302
- def assignlist(Vali1, vali2):
303
- getvar("LIST", Vali1).copyvar(getvar("LIST", vali2))
 
304
 
 
 
 
305
 
306
- def assignstr(Vali1, vali2):
307
- getvar("STR", Vali1).copyvar(getvar("STR", vali2))
 
308
 
 
 
 
 
 
 
309
 
310
- def assignint(Vali1, vali2):
311
- getvar("INT", Vali1).copyvar(getvar("INT", vali2))
 
 
 
312
 
 
 
 
313
 
314
- def blankspaces(Vali1, Vali2):
315
- getvar("STR", Vali1).write(getvar("STR", Vali1).read() + ' ' * Vali2)
 
 
 
 
 
 
 
 
 
 
316
 
317
 
318
- def typetoint(vali1, vali2):
319
- lookuptable = {"INT": 0, "STR": 1, "BOOLEAN": 2, "LIST": 3}
320
- if (getvar('STR', vali1).read() in lookuptable):
321
- getvar('INT', vali2).write(lookuptable[getvar('STR', vali1).read()])
322
- else:
323
- errore.typetointerror(getvar('STR', vali1).read())
324
 
 
 
325
 
326
- class COND:
327
- def __init__(self, compare):
328
- self.index = dicte[compare]
329
- self.left = VALUE("0", 0, False, False, "INT")
330
- self.right = VALUE("0", 0, False, False, "INT")
331
 
332
- def changecompare(self, compare):
333
- self.index = dicte[compare]
 
334
 
335
- def changeleft(self, left):
 
336
  self.left = left
337
 
338
- def changeright(self, right):
 
339
  self.right = right
340
 
341
- def giveresult(self):
342
- return listfunctions[self.index](self.left.read(), self.right.read())
 
343
 
344
- def givetype(self):
 
345
  return self.type
346
 
347
 
348
- class EERROR(Exception):
349
- global sizeoflistinuse, wantedindex
 
350
 
351
- def __init__(self):
352
- pass
 
 
 
 
 
353
 
354
- def nameerror(self, type, name):
355
- global currentline
356
- globalline = currentline
357
- print("Error Line: " + str(globalline))
358
- print("Var of name " + name + " doesn't exist as type " + type)
359
- print(" ")
360
- printvars()
361
- sys.exit(0)
362
 
363
- def makeeindexrror(self, sizeoflistinuse, wantedindex, name, name2):
364
- global currentline
365
- globalline = currentline
366
- print("Error Line: " + str(globalline))
367
- print("Tried to change index of list " + name + " with size of " + str(sizeoflistinuse) + " to value " + str(
368
- wantedindex) + " placed in " + str(name2))
369
- print(" ")
370
- printvars()
371
- sys.exit(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
 
373
- def DIVZEROERROR(self, name):
374
- global currentline
375
- globalline = currentline
376
- print("Error Line: " + str(globalline))
377
- print("Cant divide by zero and var " + name + " has value of zero")
378
- print(" ")
379
- printvars()
380
- sys.exit(0)
381
 
382
- def doesntexisterror(self, name):
383
- global currentline
384
- globalline = currentline
385
- print("Error Line: " + str(globalline))
386
- print("No object with name " + str(name) + " exists")
387
- printvars()
388
- sys.exit(0)
389
 
390
- def writeerror(self, name, value):
391
- global currentline
392
- globalline = currentline
393
- print("Error Line: " + str(globalline))
394
- print("Tried to write value of " + str(value) + " to unwritable variable " + name)
395
- print(" ")
396
- printvars()
397
- sys.exit(0)
398
 
399
- def typetointerror(self, value):
400
- global currentline
401
- globalline = currentline
402
- print("Error Line: " + str(globalline))
403
- print("No type such as " + str(value))
404
- print(" ")
405
- printvars()
406
- sys.exit(0)
407
 
408
- def readerror(self, name):
409
- global currentline
410
- globalline = currentline
411
- print("Error Line: " + str(globalline))
412
- print("Tried to read from unreadable variable " + name)
413
- print(" ")
414
- printvars()
415
- sys.exit(0)
 
 
 
 
 
416
 
417
- def linelimiterror(self):
418
- global currentline
419
- globalline = currentline
420
- print("Error Line: " + str(currentline))
421
- print("Program ran for too long")
422
- print(" ")
423
- printvars()
424
- sys.exit(0)
425
 
426
- def overflowerror(self, functioncalls):
427
- global currentline
428
- globalline = currentline
429
- print("Error Line: " + str(globalline))
430
- print("Executing too many function calls ")
431
- print("List of function calls")
432
- for i in functioncalls:
433
- pass
434
- print(" ")
435
- printvars()
436
- sys.exit(0)
437
 
438
- def cantchangeindexerror(self, name, value):
439
- global currentline
440
- globalline = currentline
441
- print("Error Line: " + str(globalline))
442
- print("Tried to change indexes of list " + name + " and add size " + str(value) + " but list is unwritable")
443
-
444
- def varexistserror(self, name):
445
- global currentline
446
- globalline = currentline
447
- print("Error Line: " + str(globalline))
448
- print("Tried to create object with name " + name + " but var already exists")
449
- print(" ")
450
- printvars()
451
- sys.exit(0)
452
 
453
- def typeerror(self, name, type1, type2):
454
- global currentline
455
- globalline = currentline
456
- print("Error Line: " + str(globalline))
457
- print("Mismatch of types " + type1 + " and " + type2 + " in list " + name)
458
- print(" ")
459
- printvars()
460
- sys.exit(0)
461
 
 
 
 
 
 
 
 
462
 
463
- class LIST:
464
- def __init__(self, name, size, readable, writable, TYPE):
465
- self.size = size
466
- self.index = 0
467
- self.values = [VALUE(name=(str(name) + " " + str(i)), value=0, writable=True, readable=True, TYPE="INT") for i
468
- in range(size)]
469
- self.types = ["INT" for i in range(size)]
470
- self.readable = readable
471
- self.writable = writable
472
- self.name = name
473
- self.type = TYPE
474
 
475
- def addsize(self, added):
476
- if (self.writable):
477
- self.size = self.size + added
478
- self.values = [
479
- self.values[i] if listfunctions[2](self.size, i) else VALUE(name=(str(i) + " " + str(i)), value=0,
480
- writable=True, readable=True, TYPE="INT")
481
- for i in range(self.size + added)]
482
- else:
483
- errore.cantchangeindexerror(self.name, added)
 
 
484
 
485
- def forceaddsize(self, added):
486
- self.size = self.size + added
487
- self.values = [
488
- self.values[i] if listfunctions[2](self.size, i) else VALUE(name=(str(i) + " " + str(i)), value=0,
489
- writable=True, readable=True, TYPE="INT") for i
490
- in range(self.size + added)]
491
-
492
- def changeindex(self, newindex):
493
- if (self.readable):
494
- if (newindex >= self.size):
495
- errore.makeeindexrror(newindex, self.size, self.name)
496
- else:
497
- self.index = newindex
498
- else:
499
- errore.readerror(self.name)
500
 
501
- def forcechangeindex(self, newindex):
502
- if (newindex >= self.size):
503
- errore.makeeindexrror(newindex, self.size, self.name)
504
- else:
505
- self.index = newindex
506
-
507
- def placevalue(self, name, type):
508
- if (self.writable):
509
- thevar = getvar(type, name)
510
- if (thevar.isreadable()):
511
- self.types[self.index] = type
512
- self.values[self.index] = thevar.makecopy()
513
- else:
514
- errore.readerror(name)
515
- else:
516
- errore.writeerror(self.name, name)
517
 
518
- def forceplacevalue(self, name, type):
519
- thevar = getvar(type, name)
520
- if (thevar.isreadable()):
521
- self.types[self.index] = type
522
- self.values[self.index] = thevar.makecopy()
523
- else:
524
- errore.readerror()
525
 
526
- def returnvalue(self):
527
- if (self.readable):
528
- return self.values[self.index]
529
- else:
530
- errore.readerror(self.name)
 
 
 
 
 
 
 
 
 
 
531
 
532
- def read(self):
533
- if (self.readable):
534
- return self.values[self.index]
535
- else:
536
- errore.readerror(self.name)
537
 
538
- def forcereturnvalue(self):
539
- return self.values[self.index]
 
540
 
541
- def copybyvalue(self, newvalue: VALUE):
542
- if (self.types[self.index] == newvalue.givetype()):
543
- newvalue.write(self.values[self.index].read())
544
- else:
545
- errore.typeerror(name=self.name, type1=self.types[self.index], type2=newvalue.givetype())
 
546
 
547
- def returntype(self):
548
- if (self.readable):
549
- return self.types[self.index]
550
- else:
551
- errore.readerror(self.name)
552
 
553
- def forcereturntype(self):
554
- return self.types[self.index]
 
555
 
556
- def tostring(self):
557
- strei = ""
558
- for i in self.values:
559
- strei = strei + str(i.tostring()) + " "
560
- return "[ " + strei + " ]"
561
-
562
- def tostringoftypes(self):
563
- if (self.readable):
564
- stre = ""
565
- for i in self.types:
566
- stre = stre + i[0]
567
- return stre
568
- else:
569
- errore.readerror(self.name)
570
 
571
- def forcetostringoftypes(self):
572
- stre = ""
573
- for i in self.types:
574
- stre = stre + i[0]
575
- return stre
576
 
577
- def changeread(self, readstatus):
578
- self.readable = readstatus
 
579
 
580
- def changewrite(self, writestatus):
581
- self.writable = writestatus
582
 
583
- def getname(self):
584
- return self.name
 
585
 
586
- def iswritable(self):
587
- return self.writable
588
 
589
- def isreadable(self):
590
- return self.readable
 
 
 
591
 
592
- def getvalues(self):
593
- return self.values
594
 
595
- def gettypes(self):
596
- return self.types
 
 
 
597
 
598
- def getsize(self):
599
- return self.size
600
 
601
- def makecopy(self):
602
- thelist = LIST(self.name, self.size, self.readable, True, self.type)
603
- thelist.types = self.types.copy()
604
- thelist.values = [val.makecopy() for val in self.values]
605
- return thelist
606
 
607
- def override(self, values, types, size):
608
- self.values = values
609
- self.types = types
610
- self.size = size
611
 
612
- def givetype(self):
613
- return self.type
 
 
 
614
 
615
- def copyvar(self, listi):
616
- if (self.type != listi.type):
617
- errore.typeerror(self.name, self.type, listi.type)
618
- else:
619
- if (listi.isreadable() == False):
620
- errore.readerror(listi.getname())
621
- else:
622
- self.type = 'LIST'
623
- self.types = listi.types.copy()
624
- self.size = listi.size
625
- self.values = [var.makecopy() for var in listi.values]
626
-
627
-
628
- def join(listi: LIST, linee):
629
- if (listi.isreadable() == False):
630
- errore.readerror(listi.name)
631
- for i in range(listi.size):
632
- listi.index = i
633
- value = listi.values[i]
634
- typi = listi.types[i]
635
- goodloopvars[typi].copyvar(value)
636
- getvar('STR', 'TEMPSTRING').forcewrite(typi)
637
- line(linee)
638
- endline()
639
- yield value
640
-
641
-
642
- def returntooldlocals(dictofglobals, dictoflocals):
643
- global reserveglob
644
- for i in reserveglob:
645
- for j in reserveglob[i]:
646
- del dictofglobals[i][j]
647
- reserveglob = {"INT": {}, "STR": {}, "LIST": {}}
648
- for i in dictoflocals:
649
- for j in dictoflocals[i]:
650
- dictofglobals[i][j] = dictoflocals[i][j]
651
- reserveglob[i][j] = dictoflocals[i][j]
652
-
653
-
654
- def endline():
655
- global errore, linecount
656
- linecount += 1
657
- if (linecount == linelimit):
658
- errore.linelimiterror()
659
- else:
660
- return True
661
 
662
 
663
- def updateline():
664
- global errore, linecount
665
- linecount += 1
666
- if (linecount == linelimit):
667
- errore.linelimiterror()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668
  else:
669
  return True
670
 
671
 
672
- def updatelinewithcall(type, namevarinput, function, typeoutput, nameoutput, lini):
673
- line(lini)
674
- global allthevars, functioncount, localsstack, programlocals, nameofprogramlocals, dictlocalvars, functionlimit, allthelocalconds, nameofprogramlocals
675
- localsstack.push(programlocals)
 
 
 
 
 
676
 
677
- varinput = getvar(type, namevarinput)
678
- for i in nameofprogramlocals:
679
- if (i == 'STR'):
680
- allthevars[i][nameofprogramlocals[i]] = VALUE(nameofprogramlocals[i], '', False, False, 'STR')
681
- elif (i == 'INT'):
682
- allthevars[i][nameofprogramlocals[i]] = VALUE(nameofprogramlocals[i], 0, False, False, 'INT')
 
 
 
 
683
  else:
684
- allthevars[i][nameofprogramlocals[i]] = LIST(nameofprogramlocals[i], 8, False, False, 'LIST')
685
- programlocals = {"INT": {"LOCALINT": allthevars["INT"]["LOCALINT"]},
686
- "STR": {"LOCALSTR": allthevars["STR"]["LOCALSTR"]},
687
- "LIST": {"LOCALLIST": allthevars["LIST"]["LOCALLIST"]}}
688
- allthevars[type][nameofprogramlocals[type]].copyvar(varinput)
689
- vartosend = allthevars[type][nameofprogramlocals[type]]
690
- functioncount += 1
691
- if (functioncount == functionlimit):
692
- errore.overflowerror(functioncalls)
 
 
 
 
 
693
  else:
694
- dictlocalvars = switchtonewcall(allthevars, dictlocalvars, localsvars)
695
- vartosend.changeread(True)
696
- vartosend.changewrite(True)
697
- movetonewconds(allthelocalconds)
698
- allthelocalconds = {}
699
- output = getvar(typeoutput, nameoutput)
700
- outi = function()
701
- output.copyvar(outi)
702
- endline()
703
-
704
-
705
- def updatelineexitingcall(type, namevaroutput):
706
- global allthevars, localsstack, functioncount, allthelocalconds, dictlocalvars, localsvars
707
- thevar = getvar(type, namevaroutput)
708
- functioncount = functioncount - 1
709
- popped = localsstack.pop()
710
- ### because i can't remember
711
- ### get old "global"
712
- returntooldlocals(allthevars, popped)
713
- ### get old locals
714
- dictlocalvars = returntoolastcall(allthevars)
715
- ### get old conds
716
- allthelocalconds = backtooldconds()
717
- endline()
718
- return thevar
719
-
720
- localsvars = Stack() #### the stack for locals created in function
721
- localsvars.push({"INT": {}, "STR": {}, "LIST": {}})
722
- localsstack = Stack() ###### the stack of the program locals
723
- LOOPINTEGER = VALUE(name="LOOPINTEGER", value=0, readable=True, writable=False, TYPE="INT")
724
- LOOPSTRING = VALUE(name="LOOPSTRING", value="", readable=True, writable=False, TYPE="STR")
725
- LOOPBOOL = VALUE(name="LOOPBOOL", value=True, readable=True, writable=False, TYPE="BOOL")
726
- LOOPLIST = LIST(name="LOOPLIST", size=8, readable=True, writable=False, TYPE="LIST")
727
- TEMPORARY = VALUE(name="TEMPORARY", value=0, readable=True, writable=True, TYPE="INT")
728
- LOCALINT = VALUE(name="LOCALINT", value=0, readable=0, writable=0, TYPE="INT")
729
- loopvars = {"INT": {"LOOPINTEGER": LOOPINTEGER}, "STR": {"LOOPSTRING": LOOPSTRING}, "LIST": {"LOOPLIST": LOOPLIST},
730
- "BOOLEAN": {"LOOPBOOL": LOOPBOOL}}
731
- goodloopvars = {"INT": LOOPINTEGER, "STR": LOOPSTRING, "LIST": LOOPLIST, "BOOLEAN": LOOPBOOL}
732
- TEMPSTRING = VALUE(name="TEMPSTRING", value="", readable=True, writable=False, TYPE="STR")
733
- LOCALSTR = VALUE(name="LOCALSTR", value="", readable=0, writable=0, TYPE="STR")
734
- INTEGER = VALUE(name="INTEGER", value="INT", readable=True, writable=False, TYPE="STR")
735
- STRING = VALUE(name="STRING", value="STR", readable=True, writable=False, TYPE="STR")
736
- LISTI = VALUE(name="LIST", value="LIST", readable=True, writable=False, TYPE="STR")
737
- BOOLEAN = VALUE(name="BOOLEAN", value="BOOLEAN", readable=True, writable=False, TYPE="STR")
738
- LOCALLIST = LIST(name="LOCALLIST", size=8, readable=0, writable=0, TYPE="LIST")
739
- THETRUTH = COND('EQUALS')
740
- THETRUTH.changeleft(TEMPORARY)
741
- THETRUTH.changeright(TEMPORARY)
742
- allthevars = {"INT": {"LOOPINTEGER": LOOPINTEGER, "TEMPORARY": TEMPORARY, "LOCALINT": LOCALINT},
743
- "STR": {"LOOPSTRING": LOOPSTRING, "TEMPSTRING": TEMPORARY, "LOCALSTR": LOCALSTR, "INTEGER": INTEGER,
744
- "STRING": STRING, "LIST": LISTI, "BOOLEAN": BOOLEAN},
745
- "LIST": {"LOOPLIST": LOOPLIST, "LOCALLIST": LOCALLIST}, "BOOLEAN": {"LOOPBOOL": LOOPBOOL}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
746
  # Number names (ZERO..ONEHUNDRED) are compile-time constants only.
747
  # They are resolved to plain integer literals by the compiler (toline/word_to_num)
748
- # and must NOT live in allthevars["INT"] — that would prevent users from naming
749
  # their own variables ONE, ZERO, etc.
750
- examplelocalvars = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
751
- dictlocalvars = examplelocalvars.copy()
752
- programlocals = {"INT": {"LOCALINT": allthevars["INT"]["LOCALINT"]}, "STR": {"LOCALSTR": allthevars["STR"]["LOCALSTR"]},
753
- "LIST": {"LOCALLIST": allthevars["LIST"]["LOCALLIST"]}}
754
- nameofprogramlocals = {"INT": "LOCALINT", "STR": "LOCALSTR", "LIST": "LOCALLIST"}
755
- stackoflocalconds = Stack()
756
- localsstack.push(programlocals)
757
- dictlocalvars = examplelocalvars.copy()
758
- allthelocalconds = {}
759
- errore = EERROR()
760
- alltheconds['THETRUTH'] = THETRUTH
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import sys
2
+ from typing import Dict, List, Any, Callable, Optional
3
+
4
+ # --- Comparison operator tables ---
5
+ COMPARE_OP_INDEX: Dict[str, int] = {"EQUALS": 0, "BIGEQUALS": 1, "BIGGER": 2}
6
+ COMPARE_OPS: List[Callable[[Any, Any], bool]] = [
7
+ (lambda x, y: x == y),
8
+ (lambda x, y: x >= y),
9
+ (lambda x, y: x > y)
10
+ ]
11
+
12
+ # --- VM execution counters and limits ---
13
+ line_count: int = 0
14
+ current_line: int = 0
15
+ line_limit: int = 1000
16
+ function_limit: int = 25
17
+ function_count: int = 0
18
+ printed_output: str = ""
19
+
20
+ # --- Forward declarations for types ---
21
+ class Value: pass
22
+ class VmList: pass
23
+ class Condition: pass
24
+
25
+ # --- Condition registries ---
26
+ cond_registry: Dict[str, Condition] = {}
27
+ saved_conds: Dict[str, Condition] = {}
28
+
29
+ # --- Local variable save slots ---
30
+ saved_locals: Dict[str, Any] = {}
31
+ saved_globals: Dict[str, Dict[str, Any]] = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # Line tracking
36
+ # ---------------------------------------------------------------------------
37
+
38
+ def set_current_line(line_num: int) -> bool:
39
+ """Record the current executing line number for error reporting."""
40
+ global current_line
41
+ current_line = line_num
42
  return True
43
 
44
 
45
+ # ---------------------------------------------------------------------------
46
+ # Node / Stack
47
+ # ---------------------------------------------------------------------------
48
+
49
  class Node:
50
+ """Singly-linked list node used internally by Stack."""
51
+
52
+ def __init__(self, value: Any):
53
  self.value = value
54
+ self.next: Optional['Node'] = None
55
 
56
+ def set_next(self, next_value: Any) -> None:
57
+ """Create and link a new node with the given value."""
58
+ self.next = Node(value=next_value)
59
 
60
+ def set_next_node(self, next_node: 'Node') -> None:
61
+ """Directly link to an existing node."""
62
+ self.next = next_node
63
 
64
+ def get_next(self) -> Optional['Node']:
65
+ """Return the next node."""
66
  return self.next
67
 
68
+ def get_value(self) -> Any:
69
+ """Return the value stored in this node."""
70
  return self.value
71
 
72
 
73
  class Stack:
74
+ """Simple LIFO stack backed by a Python list."""
75
+
76
  def __init__(self):
77
+ self.top: Optional[Node] = None
78
+ self.list: List[Any] = []
79
 
80
+ def is_empty(self) -> bool:
81
+ """Return True if the stack holds no elements."""
82
  return len(self.list) == 0
83
 
84
+ def push(self, value: Any) -> None:
85
+ """Push a value onto the top of the stack."""
86
  self.list.append(value)
87
 
88
+ def pop(self) -> Any:
89
+ """Pop and return the top value of the stack."""
90
  return self.list.pop()
91
 
92
 
93
+ function_call_stack = Stack()
94
 
95
 
96
+ # ---------------------------------------------------------------------------
97
+ # Error Handler
98
+ # ---------------------------------------------------------------------------
 
 
 
99
 
100
+ class ErrorHandler(Exception):
101
+ """Centralised VM error reporter: prints diagnostics and terminates execution."""
102
 
103
+ def name_error(self, var_type: str, name: str) -> None:
104
+ """Report that a variable of the given type and name does not exist."""
105
+ print(f"Error Line: {current_line}")
106
+ print(f"Var of name {name} doesn't exist as type {var_type}")
107
+ print(" ")
108
+ print_vars()
109
+ sys.exit(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
+ def index_error(self, list_name: str, bad_index: int, list_size: int) -> None:
112
+ """Report an out-of-bounds list index access."""
113
+ print(f"Error Line: {current_line}")
114
+ print(f"Tried to change index of list {list_name} with size of {list_size} to out-of-bounds index {bad_index}")
115
+ print(" ")
116
+ print_vars()
117
+ sys.exit(0)
118
 
119
+ def div_zero_error(self, var_name: str) -> None:
120
+ """Report a division-by-zero attempt."""
121
+ print(f"Error Line: {current_line}")
122
+ print(f"Cant divide by zero and var {var_name} has value of zero")
123
+ print(" ")
124
+ print_vars()
125
+ sys.exit(0)
 
126
 
127
+ def doesnt_exist_error(self, name: str) -> None:
128
+ """Report that no object with the given name exists."""
129
+ print(f"Error Line: {current_line}")
130
+ print(f"No object with name {name} exists")
131
+ print_vars()
132
+ sys.exit(0)
133
 
134
+ def write_error(self, name: str, value: Any) -> None:
135
+ """Report an attempt to write to an unwritable variable."""
136
+ print(f"Error Line: {current_line}")
137
+ print(f"Tried to write value of {value} to unwritable variable {name}")
138
+ print(" ")
139
+ print_vars()
140
+ sys.exit(0)
141
 
142
+ def type_to_int_error(self, value: str) -> None:
143
+ """Report that a string does not correspond to a known type name."""
144
+ print(f"Error Line: {current_line}")
145
+ print(f"No type such as {value}")
146
+ print(" ")
147
+ print_vars()
148
+ sys.exit(0)
149
 
150
+ def read_error(self, name: str) -> None:
151
+ """Report an attempt to read from an unreadable variable."""
152
+ print(f"Error Line: {current_line}")
153
+ print(f"Tried to read from unreadable variable {name}")
154
+ print(" ")
155
+ print_vars()
156
+ sys.exit(0)
 
157
 
158
+ def line_limit_error(self) -> None:
159
+ """Report that the program exceeded the maximum allowed line count."""
160
+ print(f"Error Line: {current_line}")
161
+ print("Program ran for too long")
162
+ print(" ")
163
+ print_vars()
164
+ sys.exit(0)
165
 
166
+ def overflow_error(self, call_stack: Stack) -> None:
167
+ """Report a function call stack overflow."""
168
+ print(f"Error Line: {current_line}")
169
+ print("Executing too many function calls")
170
+ print(" ")
171
+ print_vars()
172
+ sys.exit(0)
173
 
174
+ def cant_change_index_error(self, name: str, added: int) -> None:
175
+ """Report an attempt to resize an unwritable list."""
176
+ print(f"Error Line: {current_line}")
177
+ print(f"Tried to change indexes of list {name} and add size {added} but list is unwritable")
 
 
178
 
179
+ def var_exists_error(self, name: str) -> None:
180
+ """Report an attempt to create a variable whose name is already taken."""
181
+ print(f"Error Line: {current_line}")
182
+ print(f"Tried to create object with name {name} but var already exists")
183
+ print(" ")
184
+ print_vars()
185
+ sys.exit(0)
186
 
187
+ def type_error(self, name: str, type1: str, type2: str) -> None:
188
+ """Report a type mismatch during a variable assignment."""
189
+ print(f"Error Line: {current_line}")
190
+ print(f"Mismatch of types {type1} and {type2} in variable {name}")
191
+ print(" ")
192
+ print_vars()
193
+ sys.exit(0)
 
 
 
 
 
 
 
194
 
195
 
196
+ # ---------------------------------------------------------------------------
197
+ # Value
198
+ # ---------------------------------------------------------------------------
 
 
199
 
200
+ class Value:
201
+ """A single typed, named VM variable with read/write permission flags."""
202
 
203
+ def __init__(self, name: str, value: Any, readable: bool, writable: bool, type_name: str):
 
204
  self.name = name
205
  self.value = value
206
  self.readable = readable
207
  self.writable = writable
208
+ self.type = type_name
209
 
210
+ def write(self, value: Any) -> None:
211
+ """Write a value if writable, otherwise raise a write error."""
212
+ if self.writable:
213
  self.value = value
214
  else:
215
+ error_handler.write_error(self.name, value)
216
 
217
+ def force_write(self, value: Any) -> None:
218
+ """Write a value unconditionally, bypassing the writable flag."""
219
  self.value = value
220
 
221
+ def read(self) -> Any:
222
+ """Read the value if readable, otherwise raise a read error."""
223
+ if self.readable:
224
  return self.value
225
  else:
226
+ error_handler.read_error(self.name)
227
 
228
+ def force_read(self) -> Any:
229
+ """Read the value unconditionally, bypassing the readable flag."""
230
  return self.value
231
 
232
+ def set_readable(self, readable: bool) -> None:
233
+ """Set the readable permission flag."""
234
+ self.readable = readable
235
 
236
+ def set_writable(self, writable: bool) -> None:
237
+ """Set the writable permission flag."""
238
+ self.writable = writable
239
 
240
+ def get_name(self) -> str:
241
+ """Return the variable's name."""
242
  return self.name
243
 
244
+ def is_writable(self) -> bool:
245
+ """Return True if the variable is writable."""
246
  return self.writable
247
 
248
+ def is_readable(self) -> bool:
249
+ """Return True if the variable is readable."""
250
  return self.readable
251
 
252
+ def to_string(self) -> str:
253
+ """Return a string representation of the stored value."""
254
  return str(self.value)
255
 
256
+ def give_type(self) -> str:
257
+ """Return the type string of this variable."""
258
  return self.type
259
 
260
+ def override(self, value: Any) -> None:
261
+ """Directly overwrite the stored value, bypassing all checks."""
262
  self.value = value
263
 
264
+ def make_copy(self) -> 'Value':
265
+ """Return a readable, writable copy of this variable."""
266
+ return Value(self.name, self.value, self.readable, True, self.type)
267
 
268
+ def copy_var(self, source: 'Value') -> None:
269
+ """Copy the value from source into this variable, with type and read checks."""
270
+ if self.type != source.type:
271
+ error_handler.type_error(self.name, self.type, source.type)
272
  else:
273
+ if not source.is_readable():
274
+ error_handler.read_error(source.get_name())
275
  else:
276
+ self.value = source.value
277
 
278
 
279
+ # ---------------------------------------------------------------------------
280
+ # VmList
281
+ # ---------------------------------------------------------------------------
282
 
283
+ class VmList:
284
+ """A fixed-size, typed VM list whose elements are Value objects."""
285
+
286
+ def __init__(self, name: str, size: int, readable: bool, writable: bool, type_name: str):
287
+ self.size = size
288
+ self.index = 0
289
+ self.values: List[Value] = [
290
+ Value(name=(str(name) + " " + str(i)), value=0, writable=True, readable=True, type_name="INT")
291
+ for i in range(size)
292
+ ]
293
+ self.types: List[str] = ["INT" for _ in range(size)]
294
+ self.readable = readable
295
+ self.writable = writable
296
+ self.name = name
297
+ self.type = type_name
298
 
299
+ def add_size(self, added: int) -> None:
300
+ """Grow the list by `added` elements if writable."""
301
+ if self.writable:
302
+ old_size = self.size
303
+ self.size = self.size + added
304
+ # Recreate values list with previous values preserved or new slots added
305
+ # Note: The logic here mirrors the original, which re-checks 'compare_ops' in a
306
+ # way that copies old indices and creates new ones.
307
+ self.values = [
308
+ self.values[i] if i < old_size
309
+ else Value(name=(str(self.name) + " " + str(i)), value=0, writable=True, readable=True, type_name="INT")
310
+ for i in range(self.size)
311
+ ]
312
+ self.types = self.types + ["INT" for _ in range(added)]
313
+ else:
314
+ error_handler.cant_change_index_error(self.name, added)
315
 
316
+ def force_add_size(self, added: int) -> None:
317
+ """Grow the list by `added` elements unconditionally."""
318
+ old_size = self.size
319
+ self.size = self.size + added
320
+ self.values = [
321
+ self.values[i] if i < old_size
322
+ else Value(name=(str(self.name) + " " + str(i)), value=0, writable=True, readable=True, type_name="INT")
323
+ for i in range(self.size)
324
+ ]
325
+ self.types = self.types + ["INT" for _ in range(added)]
326
+
327
+ def change_index(self, new_index: int) -> None:
328
+ """Set the active index if readable and in bounds."""
329
+ if self.readable:
330
+ if new_index >= self.size:
331
+ error_handler.index_error(self.name, new_index, self.size)
332
+ else:
333
+ self.index = new_index
334
+ else:
335
+ error_handler.read_error(self.name)
336
 
337
+ def force_change_index(self, new_index: int) -> None:
338
+ """Set the active index unconditionally, still checking bounds."""
339
+ if new_index >= self.size:
340
+ error_handler.index_error(self.name, new_index, self.size)
341
+ else:
342
+ self.index = new_index
343
+
344
+ def place_value(self, name: str, var_type: str) -> None:
345
+ """Copy a variable into the current index slot if writable."""
346
+ if self.writable:
347
+ source = get_var(var_type, name)
348
+ if source.is_readable():
349
+ self.types[self.index] = var_type
350
+ self.values[self.index] = source.make_copy()
351
+ else:
352
+ error_handler.read_error(name)
353
+ else:
354
+ error_handler.write_error(self.name, name)
355
+
356
+ def force_place_value(self, name: str, var_type: str) -> None:
357
+ """Copy a variable into the current index slot unconditionally."""
358
+ source = get_var(var_type, name)
359
+ if source.is_readable():
360
+ self.types[self.index] = var_type
361
+ self.values[self.index] = source.make_copy()
362
+ else:
363
+ error_handler.read_error(name)
364
 
365
+ def read_value(self) -> Value:
366
+ """Return the Value at the current index if readable."""
367
+ if self.readable:
368
+ return self.values[self.index]
369
+ else:
370
+ error_handler.read_error(self.name)
371
+ return self.values[0] # Should be unreachable due to sys.exit
372
 
373
+ def read(self) -> Value:
374
+ """Return the Value at the current index if readable (alias for read_value)."""
375
+ return self.read_value()
376
 
377
+ def force_read_value(self) -> Value:
378
+ """Return the Value at the current index unconditionally."""
379
+ return self.values[self.index]
 
380
 
381
+ def copy_element_to(self, dest_value: Value) -> None:
382
+ """Write the current element's value into dest_value, with type checking."""
383
+ if self.types[self.index] == dest_value.give_type():
384
+ dest_value.write(self.values[self.index].read())
385
+ else:
386
+ error_handler.type_error(
387
+ name=self.name,
388
+ type1=self.types[self.index],
389
+ type2=dest_value.give_type()
390
+ )
391
+
392
+ def read_type(self) -> str:
393
+ """Return the type string of the element at the current index if readable."""
394
+ if self.readable:
395
+ return self.types[self.index]
396
+ else:
397
+ error_handler.read_error(self.name)
398
+ return ""
399
 
400
+ def force_read_type(self) -> str:
401
+ """Return the type string of the element at the current index unconditionally."""
402
+ return self.types[self.index]
403
 
404
+ def to_string(self) -> str:
405
+ """Return a bracketed string of all element values."""
406
+ parts = ""
407
+ for val in self.values:
408
+ parts = parts + str(val.to_string()) + " "
409
+ return "[ " + parts + " ]"
410
+
411
+ def to_type_string(self) -> str:
412
+ """Return a string of the first-character type codes for all elements if readable."""
413
+ if self.readable:
414
+ return "".join(t[0] for t in self.types)
415
+ else:
416
+ error_handler.read_error(self.name)
417
+ return ""
418
 
419
+ def force_to_type_string(self) -> str:
420
+ """Return a string of the first-character type codes for all elements unconditionally."""
421
+ return "".join(t[0] for t in self.types)
 
422
 
423
+ def set_readable(self, readable: bool) -> None:
424
+ """Set the readable permission flag."""
425
+ self.readable = readable
426
 
427
+ def set_writable(self, writable: bool) -> None:
428
+ """Set the writable permission flag."""
429
+ self.writable = writable
430
 
431
+ def get_name(self) -> str:
432
+ """Return the list's name."""
433
+ return self.name
434
 
435
+ def is_writable(self) -> bool:
436
+ """Return True if the list is writable."""
437
+ return self.writable
438
 
439
+ def is_readable(self) -> bool:
440
+ """Return True if the list is readable."""
441
+ return self.readable
442
 
443
+ def get_values(self) -> List[Value]:
444
+ """Return the raw list of Value elements."""
445
+ return self.values
446
 
447
+ def get_types(self) -> List[str]:
448
+ """Return the list of type strings for each element."""
449
+ return self.types
450
 
451
+ def get_size(self) -> int:
452
+ """Return the current size of the list."""
453
+ return self.size
454
 
455
+ def make_copy(self) -> 'VmList':
456
+ """Return a full deep copy of this list as a writable instance."""
457
+ copy = VmList(self.name, self.size, self.readable, True, self.type)
458
+ copy.types = self.types.copy()
459
+ copy.values = [val.make_copy() for val in self.values]
460
+ return copy
461
 
462
+ def override(self, values: List[Value], types: List[str], size: int) -> None:
463
+ """Directly replace the list's contents, bypassing all checks."""
464
+ self.values = values
465
+ self.types = types
466
+ self.size = size
467
 
468
+ def give_type(self) -> str:
469
+ """Return the type string of this list object."""
470
+ return self.type
471
 
472
+ def copy_var(self, source_list: 'VmList') -> None:
473
+ """Copy all content from source_list into this list, with type and read checks."""
474
+ if self.type != source_list.type:
475
+ error_handler.type_error(self.name, self.type, source_list.type)
476
+ else:
477
+ if not source_list.is_readable():
478
+ error_handler.read_error(source_list.get_name())
479
+ else:
480
+ self.type = 'LIST'
481
+ self.types = source_list.types.copy()
482
+ self.size = source_list.size
483
+ self.values = [var.make_copy() for var in source_list.values]
484
 
485
 
486
+ # ---------------------------------------------------------------------------
487
+ # Condition
488
+ # ---------------------------------------------------------------------------
 
 
 
489
 
490
+ class Condition:
491
+ """A named conditional that compares two Value objects using a stored operator."""
492
 
493
+ def __init__(self, compare: str):
494
+ self.compare_index = COMPARE_OP_INDEX[compare]
495
+ self.left = Value("0", 0, False, False, "INT")
496
+ self.right = Value("0", 0, False, False, "INT")
497
+ self.type = "COND"
498
 
499
+ def set_compare(self, compare: str) -> None:
500
+ """Change the comparison operator."""
501
+ self.compare_index = COMPARE_OP_INDEX[compare]
502
 
503
+ def set_left(self, left: Value) -> None:
504
+ """Set the left operand Value."""
505
  self.left = left
506
 
507
+ def set_right(self, right: Value) -> None:
508
+ """Set the right operand Value."""
509
  self.right = right
510
 
511
+ def evaluate(self) -> bool:
512
+ """Evaluate the condition and return the boolean result."""
513
+ return COMPARE_OPS[self.compare_index](self.left.read(), self.right.read())
514
 
515
+ def give_type(self) -> str:
516
+ """Return the type string of this object."""
517
  return self.type
518
 
519
 
520
+ # ---------------------------------------------------------------------------
521
+ # Condition helpers
522
+ # ---------------------------------------------------------------------------
523
 
524
+ def add_cond(name, compare):
525
+ """Create a new global condition with the given comparison operator."""
526
+ global cond_registry
527
+ if name in cond_registry:
528
+ error_handler.var_exists_error(name)
529
+ else:
530
+ cond_registry[name] = Condition(compare)
531
 
 
 
 
 
 
 
 
 
532
 
533
+ def add_local_cond(name, compare):
534
+ """Create a new condition scoped to the current function call."""
535
+ global cond_registry, current_local_conds
536
+ if name in cond_registry:
537
+ error_handler.var_exists_error(name)
538
+ else:
539
+ cond_registry[name] = Condition(compare)
540
+ current_local_conds[name] = cond_registry[name]
541
+ return current_local_conds
542
+
543
+
544
+ def push_local_conds(local_conds):
545
+ """Save the current local conditions onto the stack and remove them from the registry."""
546
+ global local_conds_stack, cond_registry, saved_conds
547
+ for name in local_conds:
548
+ del cond_registry[name]
549
+ local_conds_stack.push(local_conds)
550
+
551
+
552
+ def pop_local_conds():
553
+ """Restore the previous function's local conditions from the stack."""
554
+ global local_conds_stack, cond_registry, saved_conds
555
+ popped = local_conds_stack.pop()
556
+ for name in saved_conds:
557
+ if name in cond_registry:
558
+ del cond_registry[name]
559
+ saved_conds = {}
560
+ for name in popped:
561
+ cond_registry[name] = popped[name]
562
+ saved_conds[name] = popped[name]
563
+ return popped
564
 
 
 
 
 
 
 
 
 
565
 
566
+ def get_cond(name):
567
+ """Look up and return a condition by name, raising an error if absent."""
568
+ global cond_registry
569
+ if name in cond_registry:
570
+ return cond_registry[name]
571
+ else:
572
+ error_handler.doesnt_exist_error(name)
573
 
 
 
 
 
 
 
 
 
574
 
575
+ # ---------------------------------------------------------------------------
576
+ # Debug / print helpers
577
+ # ---------------------------------------------------------------------------
 
 
 
 
 
578
 
579
+ def print_vars():
580
+ """Print all current variable values and the accumulated output buffer."""
581
+ global var_registry, printed_output
582
+ print("END OF PROGRAM")
583
+ print()
584
+ for var_type in var_registry:
585
+ print("All the vars used from type " + var_type)
586
+ for var_name in var_registry[var_type]:
587
+ if var_registry[var_type][var_name].is_writable():
588
+ print(var_name + " : " + var_registry[var_type][var_name].to_string())
589
+ print("")
590
+ print("All that was printed during the program")
591
+ print(printed_output)
592
 
 
 
 
 
 
 
 
 
593
 
594
+ # ---------------------------------------------------------------------------
595
+ # Variable helpers
596
+ # ---------------------------------------------------------------------------
 
 
 
 
 
 
 
 
597
 
598
+ def add_var(var_type, name, value):
599
+ """Add a new global variable of the given type and initial value."""
600
+ global var_registry
601
+ if name in var_registry[var_type]:
602
+ error_handler.var_exists_error(name)
603
+ if var_type == "LIST":
604
+ var_registry[var_type][name] = VmList(name, value, True, True, var_type)
605
+ else:
606
+ var_registry[var_type][name] = Value(name, value, True, True, var_type)
 
 
 
 
 
607
 
 
 
 
 
 
 
 
 
608
 
609
+ def get_var(var_type, name):
610
+ """Return the variable object for the given type and name."""
611
+ global var_registry
612
+ if name in var_registry[var_type]:
613
+ return var_registry[var_type][name]
614
+ else:
615
+ error_handler.doesnt_exist_error(name)
616
 
 
 
 
 
 
 
 
 
 
 
 
617
 
618
+ def add_local_var(var_type, name, value):
619
+ """Add a new variable scoped to the current function call."""
620
+ global current_local_vars, var_registry
621
+ if name in var_registry[var_type]:
622
+ error_handler.var_exists_error(name)
623
+ if var_type == "LIST":
624
+ var_registry[var_type][name] = VmList(name, value, True, True, var_type)
625
+ current_local_vars[var_type][name] = var_registry[var_type][name]
626
+ else:
627
+ var_registry[var_type][name] = Value(name, value, True, True, var_type)
628
+ current_local_vars[var_type][name] = var_registry[var_type][name]
629
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
630
 
631
+ def push_local_vars(var_registry_ref: dict, local_vars: dict, stack: Stack):
632
+ """Save current local variables to the stack and clear them from the registry."""
633
+ for var_type in local_vars:
634
+ for name in local_vars[var_type]:
635
+ del var_registry_ref[var_type][name]
636
+ stack.push(local_vars)
637
+ return {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
 
 
 
 
 
 
 
 
 
638
 
 
 
 
 
 
 
 
639
 
640
+ def pop_local_vars(var_registry_ref: dict):
641
+ """Restore the previous call's local variables from the stack."""
642
+ stack = local_vars_stack
643
+ last_call = stack.pop()
644
+ global saved_locals
645
+ for var_type in saved_locals:
646
+ for name in saved_locals[var_type]:
647
+ if name in var_registry_ref[var_type]:
648
+ del var_registry_ref[var_type][name]
649
+ saved_locals = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
650
+ for var_type in last_call:
651
+ for name in last_call[var_type]:
652
+ var_registry_ref[var_type][name] = last_call[var_type][name]
653
+ saved_locals[var_type][name] = last_call[var_type][name]
654
+ return last_call
655
 
 
 
 
 
 
656
 
657
+ # ---------------------------------------------------------------------------
658
+ # VM print
659
+ # ---------------------------------------------------------------------------
660
 
661
+ def vm_print(var, newline):
662
+ """Print a VM variable's value and append it to the output buffer."""
663
+ global printed_output
664
+ text = var.to_string() + newline * '\n' + ' ' * (1 - newline)
665
+ print(text, end='')
666
+ printed_output = printed_output + text
667
 
 
 
 
 
 
668
 
669
+ # ---------------------------------------------------------------------------
670
+ # Arithmetic / string operations
671
+ # ---------------------------------------------------------------------------
672
 
673
+ def vm_add(var1, var2):
674
+ """Add two INT variables and store the result in TEMPORARY."""
675
+ get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() + get_var("INT", var2).read())
 
 
 
 
 
 
 
 
 
 
 
676
 
 
 
 
 
 
677
 
678
+ def vm_sub(var1, var2):
679
+ """Subtract var2 from var1 and store the result in TEMPORARY."""
680
+ get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() - get_var("INT", var2).read())
681
 
 
 
682
 
683
+ def vm_mul(var1, var2):
684
+ """Multiply two INT variables and store the result in TEMPORARY."""
685
+ get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() * get_var("INT", var2).read())
686
 
 
 
687
 
688
+ def vm_div(var1, var2):
689
+ """Integer-divide var1 by var2 and store the result in TEMPORARY."""
690
+ if get_var("INT", var2).read() == 0:
691
+ error_handler.div_zero_error(var2)
692
+ get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() // get_var("INT", var2).read())
693
 
 
 
694
 
695
+ def vm_float_div(var1, var2):
696
+ """Float-divide var1 by var2 and store the result in TEMPORARY."""
697
+ if get_var("INT", var2).read() == 0:
698
+ error_handler.div_zero_error(var2)
699
+ get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() / get_var("INT", var2).read())
700
 
 
 
701
 
702
+ def vm_pow(var1, var2):
703
+ """Raise var1 to the power of var2 and store the result in TEMPORARY."""
704
+ get_var("INT", "TEMPORARY").force_write(get_var("INT", var1).read() ** get_var("INT", var2).read())
 
 
705
 
 
 
 
 
706
 
707
+ def vm_mod(var1, var2):
708
+ """Compute var1 modulo var2 and store the result in TEMPORARY."""
709
+ if get_var("INT", var2).read() == 0:
710
+ error_handler.div_zero_error(var2)
711
+ get_var("INT", "TEMPORARY").force_write(int(get_var("INT", var1).read() % get_var("INT", var2).read()))
712
 
713
+
714
+ def vm_concat(var1, var2):
715
+ """Concatenate two STR variables and store the result in TEMPSTRING."""
716
+ get_var("STR", "TEMPSTRING").force_write(get_var("STR", var1).read() + get_var("STR", var2).read())
717
+
718
+
719
+ # ---------------------------------------------------------------------------
720
+ # Explicit-destination ALU operations (4-word dialect)
721
+ # ---------------------------------------------------------------------------
722
+
723
+ def vm_add_to(dest: str, var1: str, var2: str) -> None:
724
+ """Add two INT variables and store the result in *dest*."""
725
+ get_var("INT", dest).force_write(get_var("INT", var1).read() + get_var("INT", var2).read())
726
+
727
+
728
+ def vm_sub_to(dest: str, var1: str, var2: str) -> None:
729
+ """Subtract var2 from var1 and store the result in *dest*."""
730
+ get_var("INT", dest).force_write(get_var("INT", var1).read() - get_var("INT", var2).read())
731
+
732
+
733
+ def vm_mul_to(dest: str, var1: str, var2: str) -> None:
734
+ """Multiply two INT variables and store the result in *dest*."""
735
+ get_var("INT", dest).force_write(get_var("INT", var1).read() * get_var("INT", var2).read())
736
+
737
+
738
+ def vm_div_to(dest: str, var1: str, var2: str) -> None:
739
+ """Integer-divide var1 by var2 and store the result in *dest*."""
740
+ if get_var("INT", var2).read() == 0:
741
+ error_handler.div_zero_error(var2)
742
+ get_var("INT", dest).force_write(get_var("INT", var1).read() // get_var("INT", var2).read())
743
+
744
+
745
+ def vm_float_div_to(dest: str, var1: str, var2: str) -> None:
746
+ """Float-divide var1 by var2 and store the result in *dest*."""
747
+ if get_var("INT", var2).read() == 0:
748
+ error_handler.div_zero_error(var2)
749
+ get_var("INT", dest).force_write(get_var("INT", var1).read() / get_var("INT", var2).read())
750
+
751
+
752
+ def vm_pow_to(dest: str, var1: str, var2: str) -> None:
753
+ """Raise var1 to the power of var2 and store the result in *dest*."""
754
+ get_var("INT", dest).force_write(get_var("INT", var1).read() ** get_var("INT", var2).read())
 
 
 
 
755
 
756
 
757
+ def vm_mod_to(dest: str, var1: str, var2: str) -> None:
758
+ """Compute var1 modulo var2 and store the result in *dest*."""
759
+ if get_var("INT", var2).read() == 0:
760
+ error_handler.div_zero_error(var2)
761
+ get_var("INT", dest).force_write(int(get_var("INT", var1).read() % get_var("INT", var2).read()))
762
+
763
+
764
+ def vm_concat_to(dest: str, var1: str, var2: str) -> None:
765
+ """Concatenate two STR variables and store the result in *dest*."""
766
+ get_var("STR", dest).force_write(get_var("STR", var1).read() + get_var("STR", var2).read())
767
+
768
+
769
+ def vm_list_grow(list_name, size_var):
770
+ """Increase the size of a LIST variable by the value of an INT variable."""
771
+ get_var("LIST", list_name).add_size(get_var("INT", size_var).read())
772
+
773
+
774
+ def vm_assign_list(dest, src):
775
+ """Copy a LIST variable from src into dest."""
776
+ get_var("LIST", dest).copy_var(get_var("LIST", src))
777
+
778
+
779
+ def vm_assign_str(dest, src):
780
+ """Copy a STR variable from src into dest."""
781
+ get_var("STR", dest).copy_var(get_var("STR", src))
782
+
783
+
784
+ def vm_assign_int(dest, src):
785
+ """Copy an INT variable from src into dest."""
786
+ get_var("INT", dest).copy_var(get_var("INT", src))
787
+
788
+
789
+ def vm_pad_str(var_name, num_spaces):
790
+ """Append a fixed number of blank spaces to a STR variable."""
791
+ get_var("STR", var_name).write(get_var("STR", var_name).read() + ' ' * num_spaces)
792
+
793
+
794
+ def vm_type_to_int(type_str_var, dest_int_var):
795
+ """Write the integer index of a type-name string variable into an INT variable."""
796
+ lookup = {"INT": 0, "STR": 1, "BOOLEAN": 2, "LIST": 3}
797
+ type_str = get_var('STR', type_str_var).read()
798
+ if type_str in lookup:
799
+ get_var('INT', dest_int_var).write(lookup[type_str])
800
+ else:
801
+ error_handler.type_to_int_error(type_str)
802
+
803
+
804
+ # ---------------------------------------------------------------------------
805
+ # Program-locals save/restore
806
+ # ---------------------------------------------------------------------------
807
+
808
+ def restore_program_locals(global_vars: dict, saved_program_locals: dict):
809
+ """Restore program-level local variables into the global registry after a function call."""
810
+ global saved_globals
811
+ for var_type in saved_globals:
812
+ for name in saved_globals[var_type]:
813
+ del global_vars[var_type][name]
814
+ saved_globals = {"INT": {}, "STR": {}, "LIST": {}}
815
+ for var_type in saved_program_locals:
816
+ for name in saved_program_locals[var_type]:
817
+ global_vars[var_type][name] = saved_program_locals[var_type][name]
818
+ saved_globals[var_type][name] = saved_program_locals[var_type][name]
819
+
820
+
821
+ # ---------------------------------------------------------------------------
822
+ # Line tick
823
+ # ---------------------------------------------------------------------------
824
+
825
+ def tick_line():
826
+ """Increment the execution line counter and abort if the line limit is exceeded."""
827
+ global line_count
828
+ line_count += 1
829
+ if line_count == line_limit:
830
+ error_handler.line_limit_error()
831
  else:
832
  return True
833
 
834
 
835
+ # ---------------------------------------------------------------------------
836
+ # Function call enter / exit
837
+ # ---------------------------------------------------------------------------
838
+
839
+ def enter_function_call(input_type, input_var_name, function, output_type, output_var_name, call_line):
840
+ """Push a new function call frame, execute the function, and copy its return value."""
841
+ set_current_line(call_line)
842
+ global var_registry, function_count, program_locals_stack, current_program_locals
843
+ global program_local_names, current_local_vars, function_limit, current_local_conds
844
 
845
+ program_locals_stack.push(current_program_locals)
846
+ var_input = get_var(input_type, input_var_name)
847
+
848
+ # Reset program-local slots to unreadable/unwritable defaults
849
+ for slot_type in program_local_names:
850
+ slot_name = program_local_names[slot_type]
851
+ if slot_type == 'STR':
852
+ var_registry[slot_type][slot_name] = Value(slot_name, '', False, False, 'STR')
853
+ elif slot_type == 'INT':
854
+ var_registry[slot_type][slot_name] = Value(slot_name, 0, False, False, 'INT')
855
  else:
856
+ var_registry[slot_type][slot_name] = VmList(slot_name, 8, False, False, 'LIST')
857
+
858
+ current_program_locals = {
859
+ "INT": {"LOCALINT": var_registry["INT"]["LOCALINT"]},
860
+ "STR": {"LOCALSTR": var_registry["STR"]["LOCALSTR"]},
861
+ "LIST": {"LOCALLIST": var_registry["LIST"]["LOCALLIST"]},
862
+ }
863
+
864
+ var_registry[input_type][program_local_names[input_type]].copy_var(var_input)
865
+ var_to_send = var_registry[input_type][program_local_names[input_type]]
866
+
867
+ function_count += 1
868
+ if function_count == function_limit:
869
+ error_handler.overflow_error(function_call_stack)
870
  else:
871
+ current_local_vars = push_local_vars(var_registry, current_local_vars, local_vars_stack)
872
+ var_to_send.set_readable(True)
873
+ var_to_send.set_writable(True)
874
+ push_local_conds(current_local_conds)
875
+ current_local_conds = {}
876
+ output = get_var(output_type, output_var_name)
877
+ result = function()
878
+ output.copy_var(result)
879
+ tick_line()
880
+
881
+
882
+ def exit_function_call(return_type, return_var_name):
883
+ """Pop the current function call frame and return the named output variable."""
884
+ global var_registry, program_locals_stack, function_count, current_local_conds
885
+ global current_local_vars, local_vars_stack
886
+
887
+ return_var = get_var(return_type, return_var_name)
888
+ function_count -= 1
889
+
890
+ saved_program_locals = program_locals_stack.pop()
891
+ # Restore program-level globals
892
+ restore_program_locals(var_registry, saved_program_locals)
893
+ # Restore call-level locals
894
+ current_local_vars = pop_local_vars(var_registry)
895
+ # Restore conditions
896
+ current_local_conds = pop_local_conds()
897
+ tick_line()
898
+ return return_var
899
+
900
+
901
+ # ---------------------------------------------------------------------------
902
+ # VM initialisation
903
+ # ---------------------------------------------------------------------------
904
+
905
+ local_vars_stack = Stack() # stack of local variable dicts per function call
906
+ local_vars_stack.push({"INT": {}, "STR": {}, "LIST": {}})
907
+ program_locals_stack = Stack() # stack of program-local variable dicts
908
+
909
+ LOOP_INTEGER = Value(name="LOOPINTEGER", value=0, readable=True, writable=False, type_name="INT")
910
+ LOOP_STRING = Value(name="LOOPSTRING", value="", readable=True, writable=False, type_name="STR")
911
+ LOOP_BOOL = Value(name="LOOPBOOL", value=True, readable=True, writable=False, type_name="BOOL")
912
+ LOOP_LIST = VmList( name="LOOPLIST", size=8, readable=True, writable=False, type_name="LIST")
913
+ TEMPORARY = Value(name="TEMPORARY", value=0, readable=True, writable=True, type_name="INT")
914
+ LOCAL_INT = Value(name="LOCALINT", value=0, readable=False, writable=False, type_name="INT")
915
+ TEMP_STRING = Value(name="TEMPSTRING", value="", readable=True, writable=False, type_name="STR")
916
+ LOCAL_STR = Value(name="LOCALSTR", value="", readable=False, writable=False, type_name="STR")
917
+ LOCAL_LIST = VmList( name="LOCALLIST", size=8, readable=False, writable=False, type_name="LIST")
918
+
919
+ TYPE_INT_VAL = Value(name="INTEGER", value="INT", readable=True, writable=False, type_name="STR")
920
+ TYPE_STR_VAL = Value(name="STRING", value="STR", readable=True, writable=False, type_name="STR")
921
+ TYPE_LIST_VAL = Value(name="LIST", value="LIST", readable=True, writable=False, type_name="STR")
922
+ TYPE_BOOLEAN_VAL = Value(name="BOOLEAN", value="BOOLEAN", readable=True, writable=False, type_name="STR")
923
+
924
+ loop_var_registry = {
925
+ "INT": {"LOOPINTEGER": LOOP_INTEGER},
926
+ "STR": {"LOOPSTRING": LOOP_STRING},
927
+ "LIST": {"LOOPLIST": LOOP_LIST},
928
+ "BOOLEAN": {"LOOPBOOL": LOOP_BOOL},
929
+ }
930
+ loop_var_by_type = {
931
+ "INT": LOOP_INTEGER,
932
+ "STR": LOOP_STRING,
933
+ "LIST": LOOP_LIST,
934
+ "BOOLEAN": LOOP_BOOL,
935
+ }
936
+
937
+ THE_TRUTH = Condition('EQUALS')
938
+ THE_TRUTH.set_left(TEMPORARY)
939
+ THE_TRUTH.set_right(TEMPORARY)
940
+
941
+ var_registry = {
942
+ "INT": {
943
+ "LOOPINTEGER": LOOP_INTEGER,
944
+ "TEMPORARY": TEMPORARY,
945
+ "LOCALINT": LOCAL_INT,
946
+ },
947
+ "STR": {
948
+ "LOOPSTRING": LOOP_STRING,
949
+ "TEMPSTRING": TEMP_STRING,
950
+ "LOCALSTR": LOCAL_STR,
951
+ "INTEGER": TYPE_INT_VAL,
952
+ "STRING": TYPE_STR_VAL,
953
+ "LIST": TYPE_LIST_VAL,
954
+ "BOOLEAN": TYPE_BOOLEAN_VAL,
955
+ },
956
+ "LIST": {
957
+ "LOOPLIST": LOOP_LIST,
958
+ "LOCALLIST": LOCAL_LIST,
959
+ },
960
+ "BOOLEAN": {
961
+ "LOOPBOOL": LOOP_BOOL,
962
+ },
963
+ }
964
+
965
  # Number names (ZERO..ONEHUNDRED) are compile-time constants only.
966
  # They are resolved to plain integer literals by the compiler (toline/word_to_num)
967
+ # and must NOT live in var_registry["INT"] — that would prevent users from naming
968
  # their own variables ONE, ZERO, etc.
969
+
970
+ empty_local_vars = {"INT": {}, "STR": {}, "LIST": {}, "BOOLEAN": {}}
971
+ current_local_vars = empty_local_vars.copy()
972
+ current_program_locals = {
973
+ "INT": {"LOCALINT": var_registry["INT"]["LOCALINT"]},
974
+ "STR": {"LOCALSTR": var_registry["STR"]["LOCALSTR"]},
975
+ "LIST": {"LOCALLIST": var_registry["LIST"]["LOCALLIST"]},
976
+ }
977
+ program_local_names = {"INT": "LOCALINT", "STR": "LOCALSTR", "LIST": "LOCALLIST"}
978
+
979
+ local_conds_stack = Stack()
980
+ current_local_conds = {}
981
+
982
+ program_locals_stack.push(current_program_locals)
983
+
984
+ error_handler = ErrorHandler()
985
+ cond_registry['THETRUTH'] = THE_TRUTH
language/dialects.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ dialects.py – Dialect normalisation for Tzefa source lines.
3
+
4
+ The canonical internal bytecode is a **4-word tuple**::
5
+
6
+ [VERB, TYPE, ARG1, ARG2]
7
+
8
+ Two source dialects produce these tuples:
9
+
10
+ THREE_WORD – ``OPCODE ARG1 ARG2`` (classic, expanded to 4-word internally)
11
+ FOUR_WORD – ``VERB TYPE ARG1 ARG2`` (verbose, already native)
12
+
13
+ Two casing modes:
14
+
15
+ CAPS_ONLY – every token is UPPERCASE
16
+ MIXED_CASE – commands Titlecase, user vars lowercase; all uppercased internally
17
+ """
18
+ from __future__ import annotations
19
+
20
+ from typing import Dict, List, Tuple
21
+
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Public constants
25
+ # ---------------------------------------------------------------------------
26
+
27
+ THREE_WORD: str = "three_word"
28
+ FOUR_WORD: str = "four_word"
29
+
30
+ CAPS_ONLY: str = "caps_only"
31
+ MIXED_CASE: str = "mixed_case"
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # 3-word → 4-word expansion table
36
+ # ---------------------------------------------------------------------------
37
+ # Every classic 3-word opcode maps to a (VERB, TYPE) pair.
38
+
39
+ THREE_TO_FOUR: Dict[str, Tuple[str, str]] = {
40
+ # Variable declarations
41
+ "MAKEINTEGER": ("MAKE", "INTEGER"),
42
+ "MAKESTR": ("MAKE", "STRING"),
43
+ "MAKEBOOLEAN": ("MAKE", "BOOLEAN"),
44
+ "NEWLIST": ("NEW", "LIST"),
45
+ "BASICCONDITION": ("NEW", "CONDITION"),
46
+
47
+ # Assignment / copy
48
+ "ASSSIGNINT": ("SET", "INTEGER"),
49
+ "STRINGASSIGN": ("SET", "STRING"),
50
+ "COPYLIST": ("SET", "LIST"),
51
+ "SETINDEX": ("SET", "INDEX"),
52
+ "LEFTSIDE": ("SET", "LEFT"),
53
+ "RIGHTSIDE": ("SET", "RIGHT"),
54
+
55
+ # Condition
56
+ "CHANGECOMPARE": ("CHANGE", "COMPARE"),
57
+
58
+ # Control flow
59
+ "WHILE": ("WHILE", "CONDITION"),
60
+ "WHILETRUE": ("WHILE", "BOOLEAN"),
61
+ "COMPARE": ("IF", "CONDITION"),
62
+ "IFTRUE": ("IF", "BOOLEAN"),
63
+ "ELSECOMPARE": ("ELIF", "CONDITION"),
64
+ "ELSEIF": ("ELIF", "BOOLEAN"),
65
+ "ITERATE": ("ITERATE", "LIST"),
66
+
67
+ # Print
68
+ "PRINTSTRING": ("PRINT", "STRING"),
69
+ "PRINTINTEGER": ("PRINT", "INTEGER"),
70
+
71
+ # List read
72
+ "GETINTEGER": ("GET", "INTEGER"),
73
+ "GETSTRING": ("GET", "STRING"),
74
+ "GETBOOL": ("GET", "BOOLEAN"),
75
+ "GETLIST": ("GET", "LIST"),
76
+ "GETTYPE": ("GET", "TYPE"),
77
+ "LENGTH": ("GET", "LENGTH"),
78
+
79
+ # List write
80
+ "WRITEINTEGER": ("WRITE", "INTEGER"),
81
+ "WRITESTRING": ("WRITE", "STRING"),
82
+ "WRITEBOOL": ("WRITE", "BOOLEAN"),
83
+ "WRITELIST": ("WRITE", "LIST"),
84
+
85
+ # List resize
86
+ "ADDSIZE": ("ADD", "SIZE"),
87
+
88
+ # String utilities
89
+ "BLANKSPACES": ("PAD", "STRING"),
90
+
91
+ # Type introspection
92
+ "TYPETOINT": ("TYPE", "TOINT"),
93
+
94
+ # Functions
95
+ "INTEGERFUNCTION": ("FUNCTION", "INTEGER"),
96
+ "STRINGFUNCTION": ("FUNCTION", "STRING"),
97
+ "LISTFUNCTION": ("FUNCTION", "LIST"),
98
+ "RETURN": ("RETURN", "VALUE"),
99
+ }
100
+
101
+ # 3-word ALU opcodes that implicitly write to TEMPORARY (or TEMPSTRING for COMBINE).
102
+ # They expand differently: OPCODE A B → [VERB, DEST, A, B]
103
+ _THREE_WORD_ALU: Dict[str, Tuple[str, str]] = {
104
+ "ADDVALUES": ("ADD", "TEMPORARY"),
105
+ "SUBTRACT": ("SUBTRACT", "TEMPORARY"),
106
+ "MULTIPLY": ("MULTIPLY", "TEMPORARY"),
107
+ "DIVIDE": ("DIVIDE", "TEMPORARY"),
108
+ "SIMPLEDIVIDE": ("SIMPLEDIVIDE","TEMPORARY"),
109
+ "MODULO": ("MODULO", "TEMPORARY"),
110
+ "MATHPOW": ("POWER", "TEMPORARY"),
111
+ "COMBINE": ("COMBINE", "TEMPSTRING"),
112
+ }
113
+
114
+ # Reverse lookup: (VERB, DEST) → old 3-word opcode (only for non-ALU ops)
115
+ FOUR_TO_THREE: Dict[Tuple[str, str], str] = {v: k for k, v in THREE_TO_FOUR.items()}
116
+
117
+ # Set of ALU verbs that use the [VERB, DEST, SRC1, SRC2] layout
118
+ ALU_VERBS = frozenset(_THREE_WORD_ALU[k][0] for k in _THREE_WORD_ALU)
119
+
120
+
121
+ def words_per_line(dialect: str) -> int:
122
+ """Return the expected token count for the given dialect."""
123
+ return 4 if dialect == FOUR_WORD else 3
124
+
125
+
126
+ # ---------------------------------------------------------------------------
127
+ # Normalisation — always produces a 4-word CAPS tuple
128
+ # ---------------------------------------------------------------------------
129
+
130
+ def normalize_line(tokens: List[str], dialect: str, casing: str) -> List[str]:
131
+ """
132
+ Convert a raw token list into a canonical 4-word UPPERCASE tuple.
133
+
134
+ For most instructions the layout is [VERB, TYPE, ARG1, ARG2].
135
+ For ALU operations the layout is [VERB, DEST, SRC1, SRC2].
136
+
137
+ In the 3-word dialect ALU ops have no explicit dest:
138
+ ADDVALUES A B → [ADD, TEMPORARY, A, B]
139
+ MODULO A B → [MODULO, TEMPORARY, A, B]
140
+
141
+ In the 4-word dialect the dest is already present:
142
+ ADD RESULT A B → [ADD, RESULT, A, B]
143
+
144
+ Returns
145
+ -------
146
+ list[str]
147
+ Exactly 4 UPPERCASE tokens.
148
+ """
149
+ upper = [t.upper() for t in tokens]
150
+
151
+ if dialect == FOUR_WORD:
152
+ while len(upper) < 4:
153
+ upper.append("")
154
+ return upper[:4]
155
+
156
+ # THREE_WORD → expand to 4-word
157
+ while len(upper) < 3:
158
+ upper.append("")
159
+ upper = upper[:3]
160
+
161
+ opcode, arg1, arg2 = upper[0], upper[1], upper[2]
162
+
163
+ # 3-word ALU: inject implicit dest
164
+ alu = _THREE_WORD_ALU.get(opcode)
165
+ if alu is not None:
166
+ return [alu[0], alu[1], arg1, arg2]
167
+
168
+ # Standard verb+type expansion
169
+ pair = THREE_TO_FOUR.get(opcode)
170
+ if pair is not None:
171
+ return [pair[0], pair[1], arg1, arg2]
172
+
173
+ # Unknown opcode — treat as user-defined function call: FUNCNAME INPUT OUTPUT
174
+ return ["CALL", opcode, arg1, arg2]
175
+
language/topy.py CHANGED
@@ -1,454 +1,384 @@
1
- def makeparenthasis(listofvals):
2
- stri = "("
3
- for i in range(len(listofvals) - 1):
4
- stri = stri + " " + str(listofvals[i]) + " " + ","
5
- stri = stri + " " + str(listofvals[-1]) + " )"
6
- return stri
7
 
 
8
 
9
- def strreadvalue(type, name):
10
- return "getvar" + makeparenthasis([tostri(type), tostri(name)]) + ".read()"
11
 
 
 
 
 
12
 
13
- lineupdate = "endline() ;"
14
- infunction = False
15
- dictoffunct = {i[0]: i for i in [[0]]}
16
- dictofinstructions = {i: "thetext" for i in dictoffunct}
17
- listfunctionswithtypes = {i[0]: i for i in [[0]]}
18
- listfunctionswithtypes["GREATESTDIV"] = ["GREATESTDIV", "LIST", "LIST"]
19
- for i in listfunctionswithtypes:
20
- for j in range(len(listfunctionswithtypes[i])):
21
- if (listfunctionswithtypes[i][j] == "BOOL"):
22
- listfunctionswithtypes[i][j] = "BOOLEAN"
23
 
24
- listofindentchanges = [0 for i in range(1, 1000 + 1)]
25
 
 
 
 
26
 
27
- def getinstructions(listfunctions, listezfunctions):
28
- global dictoffunct, listfunctionswithtypes
29
- dictoffunct = {i[0]: i for i in listezfunctions}
30
- listfunctionswithtypes = {i[0]: i for i in listfunctions}
31
 
 
32
 
33
- def tostri(value):
34
- return "'" + str(value) + "'"
35
 
36
 
37
- def MAKEINTEGER(name, value, linenum):
38
- global infunction
39
- inparan = makeparenthasis(['"INT"', tostri(name), value])
40
- if (infunction):
41
- declarestr = "addlocalvar" + inparan
42
- else:
43
- declarestr = "addvar" + inparan
44
- stri = "line(" + str(linenum) + ")" + "; " + declarestr + "; " + lineupdate
45
- return stri
46
-
47
-
48
- def MAKESTR(name, value, linenum):
49
- global infunction
50
- inparan = makeparenthasis(['"STR"', tostri(name), "'" + str(value) + "'"])
51
- if (infunction):
52
- declarestr = "addlocalvar" + inparan
53
- else:
54
- declarestr = "addvar" + inparan
55
- stri = "line(" + str(linenum) + ")" + "; " + declarestr + "; " + lineupdate
56
- return stri
57
-
58
-
59
- def MAKEBOOLEAN(name, value, linenum):
60
- global infunction
61
- if value == "TRUE":
62
- value = "True"
63
- elif value == "FALSE":
64
- value = "False"
65
- inparan = makeparenthasis(['"BOOLEAN"', tostri(name), value])
66
- if (infunction):
67
- declarestr = "addlocalvar" + inparan
68
- else:
69
- declarestr = "addvar" + inparan
70
- stri = "line(" + str(linenum) + ")" + "; " + declarestr + "; " + lineupdate
71
- return stri
72
-
73
-
74
-
75
- def NEWLIST(name, value, linenum):
76
- global infunction
77
- # value is already a plain integer string (e.g. '6') resolved at compile time
78
- inparan = makeparenthasis(['"LIST"', tostri(name), str(int(value))])
79
- if (infunction):
80
- declarestr = "addlocalvar" + inparan
81
- else:
82
- declarestr = "addvar" + inparan
83
- stri = "line(" + str(linenum) + ")" + "; " + declarestr + "; " + lineupdate
84
- return stri
85
-
86
-
87
- def BASICCONDITION(name, compare, linenum):
88
- global infunction
89
- if (infunction == False):
90
- declarestr = "addcond" + makeparenthasis([tostri(name), tostri(compare)])
91
- else:
92
- declarestr = "addlocalcond" + makeparenthasis([tostri(name), tostri(compare)])
93
 
94
- stri = "line(" + str(linenum) + ")" + "; " + declarestr + "; " + lineupdate
95
- return stri
 
96
 
97
 
98
- def LEFTSIDE(name, othername, linenum):
99
- thegetvar = "getvar" + makeparenthasis(['"INT"', tostri(othername)])
100
- stri = "line(" + str(linenum) + ")" + "; " + \
101
- "getcond" + makeparenthasis([tostri(name)]) + ".changeleft(" + thegetvar + ")" + "; " + lineupdate
102
 
103
- return (stri)
104
 
 
 
 
105
 
106
- def RIGHTSIDE(name, othername, linenum):
107
- thegetvar = "getvar" + makeparenthasis(['"INT"', tostri(othername)])
108
- stri = "line(" + str(linenum) + ")" + "; " + \
109
- "getcond" + makeparenthasis([tostri(name)]) + ".changeright(" + thegetvar + ")" + "; " + lineupdate
110
- return (stri)
111
 
 
 
 
112
 
113
- def CHANGECOMPARE(name, valuecompare, linenum):
114
- stri = "line(" + str(linenum) + ")" + "; " + \
115
- "getcond" + makeparenthasis([tostri(name)]) + ".changecompare(" + tostri(
116
- valuecompare) + ")" + "; " + lineupdate
117
- return (stri)
118
 
 
 
 
119
 
120
- def WHILE(compare, endline, linenum):
121
- global listofindentchanges
122
- lineofwhile = "while" + makeparenthasis(["line(" + str(linenum) + ") and " + (
123
- "getcond" + makeparenthasis([tostri(compare)])) + ".giveresult() and endline()"]) + ":"
124
- listofindentchanges[linenum + 1] = 1
125
- listofindentchanges[int(endline) + 1] = -1
126
- return (lineofwhile)
127
 
 
 
 
128
 
129
- def ITERATE(listi, endline, linenum):
130
- global listofindentchanges
131
- lineofwhile = "for i in join" + makeparenthasis(["getvar('LIST'," + tostri(listi) + ")", str(linenum)]) + ":"
132
- listofindentchanges[linenum + 1] = 1
133
- listofindentchanges[int(endline) + 1] = -1
134
- return lineofwhile
135
 
136
 
137
- def COMPARE(compare, endline, linenum):
138
- global listofindentchanges
139
- lineofwhile = "if" + makeparenthasis(["line(" + str(linenum) + ") and " + (
140
- "getcond" + makeparenthasis([tostri(compare)])) + ".giveresult() and endline()"]) + ":"
141
- listofindentchanges[linenum + 1] = 1
142
- listofindentchanges[int(endline) + 1] = -1
143
- return (lineofwhile)
144
 
145
 
146
- def ELSECOMPARE(compare, endline, linenum):
147
- global listofindentchanges
148
- lineofwhile = "elif" + makeparenthasis(["line(" + str(linenum) + ") and " + (
149
- "getcond" + makeparenthasis([tostri(compare)])) + ".giveresult() and endline()"]) + ":"
150
- listofindentchanges[linenum + 1] = 1
151
- listofindentchanges[int(endline) + 1] = -1
152
- return (lineofwhile)
153
 
 
154
 
155
- def WHILETRUE(bool, endline, linenum):
156
- global listofindentchanges
157
- lineofwhile = "while" + makeparenthasis(["line(" + str(linenum) + ") and " + (
158
- "getvar('BOOLEAN'," + tostri(bool) + ").read() " + "and endline()")]) + ":"
159
- listofindentchanges[linenum + 1] = 1
160
- listofindentchanges[int(endline) + 1] = -1
161
- return (lineofwhile)
 
 
 
 
 
 
 
 
162
 
163
 
164
- def IFTRUE(bool, endline, linenum):
165
- global listofindentchanges
166
- lineofwhile = "if" + makeparenthasis(["line(" + str(linenum) + ") and " + (
167
- "getvar('BOOLEAN'," + tostri(bool) + ").read() " + "and endline()")]) + ":"
168
- listofindentchanges[linenum + 1] = 1
169
- listofindentchanges[int(endline) + 1] = -1
170
- return (lineofwhile)
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
- def ELSEIF(bool, endline, linenum):
174
- global listofindentchanges
175
- lineofwhile = "elif" + makeparenthasis(["line(" + str(linenum) + ") and " + (
176
- "getvar('BOOLEAN'," + tostri(bool) + ").read() " + "and endline()")]) + ":"
177
- listofindentchanges[linenum + 1] = 1
178
- listofindentchanges[int(endline) + 1] = -1
179
- return (lineofwhile)
180
 
 
181
 
182
- def INTEGERFUNCTION(name, type, linenum):
183
- global thetype, infunction
184
- infunction = True
185
- thetype = "INT"
186
- listofindentchanges[linenum + 1] = 1
187
- return "def " + name + "" + '():'
188
 
189
 
190
- def STRINGFUNCTION(name, type, linenum):
191
- global thetype, infunction
192
- infunction = True
193
- thetype = "STR"
194
- listofindentchanges[linenum + 1] = 1
195
- return "def " + name + "" + '():'
196
 
 
 
 
 
 
 
 
 
197
 
198
- def LISTFUNCTION(name, type, linenum):
199
- global thetype, infunction
200
- infunction = True
201
- thetype = "LIST"
202
- listofindentchanges[linenum + 1] = 1
203
- return "def " + name + "" + '():'
204
 
205
-
206
- def RETURN(name, stay, linenum):
207
- if (stay == "BREAK"):
208
- listofindentchanges[linenum + 1] = -1
209
- global infunction
210
- infunction = False
211
- return ("line(" + str(linenum) + "); " + "return(updatelineexitingcall" + makeparenthasis(
212
- [tostri(thetype), tostri(name)]) + ")")
213
-
214
-
215
- def PRINTSTRING(name, state, linenum):
216
- if (state == "BREAK"):
217
- state = "True"
218
  else:
219
- state = "False"
220
- return "line(" + str(linenum) + "); " + "Print(" + "getvar('STR'," + tostri(name) + ")," + state + "); " + "endline()"
221
 
222
 
223
- def PRINTINTEGER(name, state, linenum):
224
- if (state == "BREAK"):
225
- state = "True"
 
 
226
  else:
227
- state = "False"
228
- return "line(" + str(linenum) + "); " + "Print(" + "getvar('INT'," + tostri(
229
- name) + ")," + state + "); " + "endline()"
230
 
231
 
232
- def SETINDEX(name, index, linenum):
233
- # index is already a plain integer string resolved at compile time
234
- return ("line(" + str(linenum) + "); getvar('LIST'," + tostri(name) + ").changeindex(" + str(int(index)) + "); endline()")
 
235
 
236
 
237
- def GETSTRING(listname, name, linenum):
238
- name = tostri(name)
239
- listname = tostri(listname)
240
- return ("line(" + str(
241
- linenum) + ");getvar('STR'," + name + ").copyvar(getvar('LIST'," + listname + ").read()); endline()")
242
 
 
 
 
 
243
 
244
- def GETINTEGER(listname, name, linenum):
245
- name = tostri(name)
246
- listname = tostri(listname)
247
- return ("line(" + str(
248
- linenum) + ");getvar('INT'," + name + ").copyvar(getvar('LIST'," + listname + ").read()); endline()")
249
 
 
250
 
251
- def GETLIST(listname, name, linenum):
252
- name = tostri(name)
253
- listname = tostri(listname)
254
- return ("line(" + str(
255
- linenum) + ");getvar('LIST'," + name + ").copyvar(getvar('LIST'," + listname + ").read()); endline()")
256
 
257
 
258
- def GETBOOL(listname, name, linenum):
259
- name = tostri(name)
260
- listname = tostri(listname)
261
- return ("line(" + str(
262
- linenum) + ");getvar('BOOLEAN'," + name + ").copyvar(getvar('LIST'," + listname + ").read()); endline()")
 
 
263
 
264
 
265
- def WRITESTRING(listname, name, linenum):
266
- name = tostri(name)
267
- listname = tostri(listname)
268
- return ("line(" + str(linenum) + ");getvar('LIST'," + listname + ") .placevalue(" + name + ',"STR"'"); endline()")
269
 
 
270
 
271
- def WRITEINTEGER(listname, name, linenum):
272
- name = tostri(name)
273
- listname = tostri(listname)
274
- return ("line(" + str(linenum) + ");getvar('LIST'," + listname + ") .placevalue(" + name + ',"INT"'"); endline()")
275
 
 
 
 
276
 
277
- def WRITEBOOL(listname, name, linenum):
278
- name = tostri(name)
279
- listname = tostri(listname)
280
- return ("line(" + str(
281
- linenum) + ");getvar('LIST'," + listname + ") .placevalue(" + name + ',"BOOLEAN"'"); endline()")
282
 
 
283
 
284
- def WRITELIST(listname, name, linenum):
285
- name = tostri(name)
286
- listname = tostri(listname)
287
- return ("line(" + str(linenum) + ");getvar('LIST'," + listname + ") .placevalue(" + name + ',"LIST"'"); endline()")
 
 
288
 
289
 
290
- def GETTYPE(listname, strname, linenum):
291
- strname = tostri(strname)
292
- listname = tostri(listname)
293
- return ("line(" + str(
294
- linenum) + ");getvar('STR'," + strname + ").write(getvar('LIST'," + listname + ").returntype()); endline()")
295
 
 
 
296
 
297
- def LENGTH(listname, intname, linenum):
298
- intname = tostri(intname)
299
- listname = tostri(listname)
300
- return ("line(" + str(
301
- linenum) + ");getvar('INT'," + intname + ").write(getvar('LIST'," + listname + ").getsize()); endline()")
302
 
 
 
303
 
304
- def ADDVALUES(vali, vali2, linenum):
305
- return ("line(" + str(linenum) + "); " + "add" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
306
 
 
 
307
 
308
- def MULTIPLY(vali, vali2, linenum):
309
- return ("line(" + str(linenum) + "); " + "mult" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
310
 
 
 
311
 
312
- def MATHPOW(vali, vali2, linenum):
313
- return ("line(" + str(linenum) + "); " + "pow" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
314
 
 
 
315
 
316
- def DIVIDE(vali, vali2, linenum):
317
- return ("line(" + str(linenum) + "); " + "betterdiv" + makeparenthasis(
318
- [tostri(vali), tostri(vali2)]) + "; endline()")
319
 
 
 
320
 
321
- def SIMPLEDIVIDE(vali, vali2, linenum):
322
- return ("line(" + str(linenum) + "); " + "div" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
323
 
 
 
324
 
325
- def SUBTRACT(vali, vali2, linenum):
326
- return ("line(" + str(linenum) + "); " + "dec" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
327
 
 
328
 
329
- def MODULO(vali, vali2, linenum):
330
- return ("line(" + str(linenum) + "); " + "mod" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
331
 
332
 
333
- def COMBINE(vali, vali2, linenum):
334
- return ("line(" + str(linenum) + "); " + "comb" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
335
 
 
 
336
 
337
- def ADDSIZE(vali, vali2, linenum):
338
- return ("line(" + str(linenum) + "); " + "addsize" + makeparenthasis([tostri(vali), tostri(vali2)]) + "; endline()")
339
 
 
340
 
341
- def ASSSIGNINT(vali, vali2, linenum):
342
- return ("line(" + str(linenum) + "); " + "assignint" + makeparenthasis(
343
- [tostri(vali), tostri(vali2)]) + "; endline()")
 
 
 
 
344
 
345
 
346
- def STRINGASSIGN(vali, vali2, linenum):
347
- return ("line(" + str(linenum) + "); " + "assignstr" + makeparenthasis(
348
- [tostri(vali), tostri(vali2)]) + "; endline()")
349
 
 
 
 
 
 
 
350
 
351
- def COPYLIST(vali, vali2, linenum):
352
- return ("line(" + str(linenum) + "); " + "assignlist" + makeparenthasis(
353
- [tostri(vali), tostri(vali2)]) + "; endline()")
354
 
 
355
 
356
- def BLANKSPACES(vali, vali2, linenum):
357
- return ("line(" + str(linenum) + "); " + "blankspaces" + makeparenthasis([tostri(vali), vali2]) + "; endline()")
 
 
 
 
 
 
 
 
 
358
 
359
 
360
- def TYPETOINT(vali, vali2, linenum):
361
- return ("line(" + str(linenum) + "); " + "typetoint" + makeparenthasis(
362
- [tostri(vali), tostri(vali2)]) + "; endline()")
363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
 
365
- dictofinstructions["MAKEINTEGER"] = MAKEINTEGER
366
- dictofinstructions["MAKESTR"] = MAKESTR
367
- dictofinstructions["MAKEBOOLEAN"] = MAKEBOOLEAN
368
- dictofinstructions["NEWLIST"] = NEWLIST
369
- dictofinstructions["BASICCONDITION"] = BASICCONDITION
370
- dictofinstructions["LEFTSIDE"] = LEFTSIDE
371
- dictofinstructions["RIGHTSIDE"] = RIGHTSIDE
372
- dictofinstructions["CHANGECOMPARE"] = CHANGECOMPARE
373
- dictofinstructions["WHILE"] = WHILE
374
- dictofinstructions["ITERATE"] = ITERATE
375
- dictofinstructions["COMPARE"] = COMPARE
376
- dictofinstructions["ELSECOMPARE"] = ELSECOMPARE
377
- dictofinstructions["WHILETRUE"] = WHILETRUE
378
- dictofinstructions["IFTRUE"] = IFTRUE
379
- dictofinstructions["ELSEIF"] = ELSEIF
380
- dictofinstructions["SETINDEX"] = SETINDEX
381
- dictofinstructions["INTEGERFUNCTION"] = INTEGERFUNCTION
382
- dictofinstructions["STRINGFUNCTION"] = STRINGFUNCTION
383
- dictofinstructions["LISTFUNCTION"] = LISTFUNCTION
384
- dictofinstructions["PRINTSTRING"] = PRINTSTRING
385
- dictofinstructions["PRINTINTEGER"] = PRINTINTEGER
386
- dictofinstructions["GETSTRING"] = GETSTRING
387
- dictofinstructions["GETINTEGER"] = GETINTEGER
388
- dictofinstructions["GETLIST"] = GETLIST
389
- dictofinstructions["GETBOOL"] = GETBOOL
390
- dictofinstructions["WRITESTRING"] = WRITESTRING
391
- dictofinstructions["WRITEINTEGER"] = WRITEINTEGER
392
- dictofinstructions["WRITEBOOL"] = WRITEBOOL
393
- dictofinstructions["WRITELIST"] = WRITELIST
394
- dictofinstructions["GETTYPE"] = GETTYPE
395
- dictofinstructions["LENGTH"] = LENGTH
396
- dictofinstructions["ASSSIGNINT"] = ASSSIGNINT
397
- dictofinstructions["ADDSIZE"] = ADDSIZE
398
- dictofinstructions["STRINGASSIGN"] = STRINGASSIGN
399
- dictofinstructions["COPYLIST"] = COPYLIST
400
- dictofinstructions["ADDVALUES"] = ADDVALUES
401
- dictofinstructions["MULTIPLY"] = MULTIPLY
402
- dictofinstructions["MATHPOW"] = MATHPOW
403
- dictofinstructions["DIVIDE"] = DIVIDE
404
- dictofinstructions["SIMPLEDIVIDE"] = SIMPLEDIVIDE
405
- dictofinstructions["SUBTRACT"] = SUBTRACT
406
- dictofinstructions["MODULO"] = MODULO
407
- dictofinstructions["COMBINE"] = COMBINE
408
- dictofinstructions["BLANKSPACES"] = BLANKSPACES
409
- dictofinstructions["RETURN"] = RETURN
410
- dictofinstructions["TYPETOINT"] = TYPETOINT
411
 
 
 
 
412
 
413
-
414
- def makepredict(listi, i):
415
- if listi[0] in dictofinstructions:
416
- return dictofinstructions[listi[0]](listi[1], listi[2], i)
417
- else:
418
- listfun = listfunctionswithtypes[listi[0]]
419
- return ("updatelinewithcall" + makeparenthasis(
420
- [tostri(listfun[1]), tostri(listi[1]), listi[0], tostri(listfun[2]), tostri(listi[2]), i]))
421
 
422
 
423
- def makepyfile(listi):
 
424
  from pathlib import Path
425
 
426
- outfile = Path(__file__).parent / "test.py"
427
- with outfile.open("w+", encoding="utf-8") as f:
 
 
 
 
428
  f.write("from Tzefa_Language.createdpython import *\n")
429
- counterindent = 0
430
- indent = " "
431
- for i in range(1, len(listi) + 1):
432
- counterindent += listofindentchanges[i]
433
- f.write(indent * counterindent + makepredict(listi[i - 1], i) + '\n')
434
- f.write("printvars()")
435
-
436
-
437
-
438
-
439
- if __name__ == '__main__':
440
- listi = [["MAKEINTEGER", "THEINT", '2769'], ["MAKEINTEGER", "THEINTI", '1065'], ["MAKEINTEGER", "THROWONE", '1065'],
441
- ["MAKEINTEGER", "THROWTWO", '1065'], ["NEWLIST", "LISTOFTWO", '2'], ["SETINDEX", "LISTOFTWO", '0'],
442
- ["WRITEINTEGER", "LISTOFTWO", 'THEINT'], ["SETINDEX", "LISTOFTWO", '1'],
443
- ["WRITEINTEGER", "LISTOFTWO", 'THEINTI'], ["MAKEINTEGER", "ZERO", '0'], ["ADDVALUES", "THEINT", 'THEINTI'],
444
- ["PRINTINTEGER", "TEMPORARY", 'BREAK'], ["LISTFUNCTION", "GREATESTDIV", 'LIST'],
445
- ["SETINDEX", "LISTOFTWO", '0'], ["GETINTEGER", "LISTOFTWO", 'THROWONE'], ["SETINDEX", "LISTOFTWO", '1'],
446
- ["GETINTEGER", "LISTOFTWO", 'THROWTWO'], ["BASICCONDITION", "EUCLIDCOMPARE", 'EQUALS'],
447
- ["LEFTSIDE", "EUCLIDCOMPARE", 'THROWTWO'], ["RIGHTSIDE", "EUCLIDCOMPARE", 'ZERO'],
448
- ["COMPARE", "EUCLIDCOMPARE", '23'], ["WRITEINTEGER", "LISTOFTWO", 'THROWTWO'],
449
- ["RETURN", "LISTOFTWO", "STAY"], ["RIGHTSIDE", "EUCLIDCOMPARE", 'THROWTWO']
450
- , ["SETINDEX", "LISTOFTWO", '0'], ["WRITEINTEGER", "LISTOFTWO", 'THROWTWO'], ["MODULO", "THROWONE", 'THROWTWO'],
451
- ["SETINDEX", "LISTOFTWO", '1'], ["WRITEINTEGER", "LISTOFTWO", 'TEMPORARY'],
452
- ["GREATESTDIV", "LISTOFTWO", 'LISTOFTWO'], ["RETURN", "LISTOFTWO", 'BREAK'],
453
- ["GREATESTDIV", "LISTOFTWO", 'LISTOFTWO']]
454
- makepyfile(listi)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ topy.py Tzefa IR → Python code generator.
 
 
 
 
3
 
4
+ The bytecode is a 4-element tuple::
5
 
6
+ [VERB, TYPE, ARG1, ARG2]
 
7
 
8
+ Each handler receives (verb, type_word, arg1, arg2, line_num) and returns a
9
+ Python source-code string that is later assembled by make_py_file().
10
+ """
11
+ from __future__ import annotations
12
 
13
+ from typing import Any, Callable, Dict, List, Tuple
 
 
 
 
 
 
 
 
 
14
 
 
15
 
16
+ # ---------------------------------------------------------------------------
17
+ # Globals
18
+ # ---------------------------------------------------------------------------
19
 
20
+ _TICK: str = "tick_line() ;"
21
+ _in_function: bool = False
22
+ _current_return_type: str = ""
 
23
 
24
+ _user_functions: Dict[str, List[str]] = {}
25
 
26
+ _indent_changes: List[int] = [0] * 1001
 
27
 
28
 
29
+ # ---------------------------------------------------------------------------
30
+ # Tiny code-gen helpers
31
+ # ---------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ def _args(*values: Any) -> str:
34
+ """Parenthesised, comma-separated argument list."""
35
+ return "( " + ", ".join(str(v) for v in values) + " )" if values else "()"
36
 
37
 
38
+ def _q(value: Any) -> str:
39
+ """Single-quote a value for generated code."""
40
+ return f"'{value}'"
 
41
 
 
42
 
43
+ def _gv(var_type: str, name: str) -> str:
44
+ """get_var() call expression."""
45
+ return f"get_var({_q(var_type)}, {_q(name)})"
46
 
 
 
 
 
 
47
 
48
+ def _lp(n: int) -> str:
49
+ """set_current_line() prefix."""
50
+ return f"set_current_line({n})"
51
 
 
 
 
 
 
52
 
53
+ def _stmt(line_num: int, *parts: str) -> str:
54
+ """Standard statement: set_current_line; body; tick_line."""
55
+ return f"{_lp(line_num)}; " + "; ".join(parts) + f"; {_TICK}"
56
 
 
 
 
 
 
 
 
57
 
58
+ # ---------------------------------------------------------------------------
59
+ # Register user-defined functions (called by ErrorCorrection after parsing)
60
+ # ---------------------------------------------------------------------------
61
 
62
+ def register_user_function(name: str, input_type: str, output_type: str) -> None:
63
+ """Register a user-defined function so the code generator can emit calls."""
64
+ _user_functions[name] = [name, input_type, output_type]
 
 
 
65
 
66
 
67
+ def get_user_functions() -> Dict[str, List[str]]:
68
+ return _user_functions
 
 
 
 
 
69
 
70
 
71
+ # ---------------------------------------------------------------------------
72
+ # Handlers — each takes (type_word, arg1, arg2, line_num) -> str
73
+ # ---------------------------------------------------------------------------
 
 
 
 
74
 
75
+ # -- MAKE: declare variables -----------------------------------------------
76
 
77
+ def _make(type_word: str, arg1: str, arg2: str, ln: int) -> str:
78
+ call = "add_local_var" if _in_function else "add_var"
79
+ call_c = "add_local_cond" if _in_function else "add_cond"
80
+ if type_word == "BOOLEAN":
81
+ val = "True" if arg2 == "TRUE" else ("False" if arg2 == "FALSE" else arg2)
82
+ return _stmt(ln, f"{call}{_args(_q('BOOLEAN'), _q(arg1), val)}")
83
+ if type_word == "STRING":
84
+ return _stmt(ln, f"{call}{_args(_q('STR'), _q(arg1), _q(arg2))}")
85
+ if type_word == "INTEGER":
86
+ return _stmt(ln, f"{call}{_args(_q('INT'), _q(arg1), arg2)}")
87
+ if type_word == "LIST":
88
+ return _stmt(ln, f"{call}{_args(_q('LIST'), _q(arg1), int(arg2))}")
89
+ if type_word == "CONDITION":
90
+ return _stmt(ln, f"{call_c}{_args(_q(arg1), _q(arg2))}")
91
+ return ""
92
 
93
 
94
+ # -- SET: assignment / index / condition sides -----------------------------
 
 
 
 
 
 
95
 
96
+ def _set(type_word: str, arg1: str, arg2: str, ln: int) -> str:
97
+ if type_word == "INTEGER":
98
+ return _stmt(ln, f"vm_assign_int{_args(_q(arg1), _q(arg2))}")
99
+ if type_word == "STRING":
100
+ return _stmt(ln, f"vm_assign_str{_args(_q(arg1), _q(arg2))}")
101
+ if type_word == "LIST":
102
+ return _stmt(ln, f"vm_assign_list{_args(_q(arg1), _q(arg2))}")
103
+ if type_word == "INDEX":
104
+ return _stmt(ln, f"get_var('LIST',{_q(arg1)}).change_index({int(arg2)})")
105
+ if type_word == "LEFT":
106
+ return _stmt(ln, f"get_cond({_q(arg1)}).set_left({_gv('INT', arg2)})")
107
+ if type_word == "RIGHT":
108
+ return _stmt(ln, f"get_cond({_q(arg1)}).set_right({_gv('INT', arg2)})")
109
+ return ""
110
 
 
 
 
 
 
 
 
111
 
112
+ # -- CHANGE ----------------------------------------------------------------
113
 
114
+ def _change(type_word: str, arg1: str, arg2: str, ln: int) -> str:
115
+ # Only COMPARE for now
116
+ return _stmt(ln, f"get_cond({_q(arg1)}).set_compare({_q(arg2)})")
 
 
 
117
 
118
 
119
+ # -- Control flow ----------------------------------------------------------
 
 
 
 
 
120
 
121
+ def _while(type_word: str, arg1: str, arg2: str, ln: int) -> str:
122
+ _indent_changes[ln + 1] = 1
123
+ _indent_changes[int(arg2) + 1] = -1
124
+ if type_word == "CONDITION":
125
+ guard = f"set_current_line({ln}) and get_cond({_q(arg1)}).evaluate() and tick_line()"
126
+ else: # BOOLEAN
127
+ guard = f"set_current_line({ln}) and get_var('BOOLEAN',{_q(arg1)}).read() and tick_line()"
128
+ return f"while( {guard} ):"
129
 
 
 
 
 
 
 
130
 
131
+ def _if(type_word: str, arg1: str, arg2: str, ln: int) -> str:
132
+ _indent_changes[ln + 1] = 1
133
+ _indent_changes[int(arg2) + 1] = -1
134
+ if type_word == "CONDITION":
135
+ guard = f"set_current_line({ln}) and get_cond({_q(arg1)}).evaluate() and tick_line()"
 
 
 
 
 
 
 
 
136
  else:
137
+ guard = f"set_current_line({ln}) and get_var('BOOLEAN',{_q(arg1)}).read() and tick_line()"
138
+ return f"if( {guard} ):"
139
 
140
 
141
+ def _elif(type_word: str, arg1: str, arg2: str, ln: int) -> str:
142
+ _indent_changes[ln + 1] = 1
143
+ _indent_changes[int(arg2) + 1] = -1
144
+ if type_word == "CONDITION":
145
+ guard = f"set_current_line({ln}) and get_cond({_q(arg1)}).evaluate() and tick_line()"
146
  else:
147
+ guard = f"set_current_line({ln}) and get_var('BOOLEAN',{_q(arg1)}).read() and tick_line()"
148
+ return f"elif( {guard} ):"
 
149
 
150
 
151
+ def _iterate(type_word: str, arg1: str, arg2: str, ln: int) -> str:
152
+ _indent_changes[ln + 1] = 1
153
+ _indent_changes[int(arg2) + 1] = -1
154
+ return f"for i in vm_loop_list({_gv('LIST', arg1)}, {ln}):"
155
 
156
 
157
+ # -- PRINT -----------------------------------------------------------------
 
 
 
 
158
 
159
+ def _print(type_word: str, arg1: str, arg2: str, ln: int) -> str:
160
+ vm_type = "STR" if type_word == "STRING" else "INT"
161
+ newline = "True" if arg2 == "BREAK" else "False"
162
+ return _stmt(ln, f"vm_print(get_var({_q(vm_type)},{_q(arg1)}),{newline})")
163
 
 
 
 
 
 
164
 
165
+ # -- GET: read from list ---------------------------------------------------
166
 
167
+ _GET_TYPE_MAP = {"INTEGER": "INT", "STRING": "STR", "BOOLEAN": "BOOLEAN", "LIST": "LIST"}
 
 
 
 
168
 
169
 
170
+ def _get(type_word: str, arg1: str, arg2: str, ln: int) -> str:
171
+ if type_word == "TYPE":
172
+ return _stmt(ln, f"get_var('STR',{_q(arg2)}).write(get_var('LIST',{_q(arg1)}).read_type())")
173
+ if type_word == "LENGTH":
174
+ return _stmt(ln, f"get_var('INT',{_q(arg2)}).write(get_var('LIST',{_q(arg1)}).get_size())")
175
+ vm = _GET_TYPE_MAP[type_word]
176
+ return _stmt(ln, f"get_var({_q(vm)},{_q(arg2)}).copy_var(get_var('LIST',{_q(arg1)}).read())")
177
 
178
 
179
+ # -- WRITE: write to list --------------------------------------------------
 
 
 
180
 
181
+ _WRITE_TYPE_MAP = {"INTEGER": "INT", "STRING": "STR", "BOOLEAN": "BOOLEAN", "LIST": "LIST"}
182
 
 
 
 
 
183
 
184
+ def _write(type_word: str, arg1: str, arg2: str, ln: int) -> str:
185
+ vm = _WRITE_TYPE_MAP[type_word]
186
+ return _stmt(ln, f"get_var('LIST',{_q(arg1)}).place_value({_q(arg2)},\"{vm}\")")
187
 
 
 
 
 
 
188
 
189
+ # -- ADD (dual purpose: list resize / arithmetic with explicit dest) --------
190
 
191
+ def _add(dest: str, src1: str, src2: str, ln: int) -> str:
192
+ if dest == "SIZE":
193
+ # ADD SIZE listname int_amount (list resize — dest is literally "SIZE")
194
+ return _stmt(ln, f"vm_list_grow{_args(_q(src1), _q(src2))}")
195
+ # ADD DEST SRC1 SRC2
196
+ return _stmt(ln, f"vm_add_to{_args(_q(dest), _q(src1), _q(src2))}")
197
 
198
 
199
+ # -- Arithmetic verbs — all take (dest, src1, src2, ln) --------------------
 
 
 
 
200
 
201
+ def _subtract(dest: str, src1: str, src2: str, ln: int) -> str:
202
+ return _stmt(ln, f"vm_sub_to{_args(_q(dest), _q(src1), _q(src2))}")
203
 
 
 
 
 
 
204
 
205
+ def _multiply(dest: str, src1: str, src2: str, ln: int) -> str:
206
+ return _stmt(ln, f"vm_mul_to{_args(_q(dest), _q(src1), _q(src2))}")
207
 
 
 
208
 
209
+ def _divide(dest: str, src1: str, src2: str, ln: int) -> str:
210
+ return _stmt(ln, f"vm_float_div_to{_args(_q(dest), _q(src1), _q(src2))}")
211
 
 
 
212
 
213
+ def _simpledivide(dest: str, src1: str, src2: str, ln: int) -> str:
214
+ return _stmt(ln, f"vm_div_to{_args(_q(dest), _q(src1), _q(src2))}")
215
 
 
 
216
 
217
+ def _modulo(dest: str, src1: str, src2: str, ln: int) -> str:
218
+ return _stmt(ln, f"vm_mod_to{_args(_q(dest), _q(src1), _q(src2))}")
219
 
 
 
 
220
 
221
+ def _power(dest: str, src1: str, src2: str, ln: int) -> str:
222
+ return _stmt(ln, f"vm_pow_to{_args(_q(dest), _q(src1), _q(src2))}")
223
 
 
 
224
 
225
+ def _combine(dest: str, src1: str, src2: str, ln: int) -> str:
226
+ return _stmt(ln, f"vm_concat_to{_args(_q(dest), _q(src1), _q(src2))}")
227
 
 
 
228
 
229
+ # -- PAD -------------------------------------------------------------------
230
 
231
+ def _pad(type_word: str, arg1: str, arg2: str, ln: int) -> str:
232
+ return _stmt(ln, f"vm_pad_str{_args(_q(arg1), arg2)}")
233
 
234
 
235
+ # -- TYPE ------------------------------------------------------------------
 
236
 
237
+ def _type(type_word: str, arg1: str, arg2: str, ln: int) -> str:
238
+ return _stmt(ln, f"vm_type_to_int{_args(_q(arg1), _q(arg2))}")
239
 
 
 
240
 
241
+ # -- FUNCTION: define ------------------------------------------------------
242
 
243
+ def _function(type_word: str, arg1: str, arg2: str, ln: int) -> str:
244
+ global _in_function, _current_return_type
245
+ _in_function = True
246
+ type_map = {"INTEGER": "INT", "STRING": "STR", "LIST": "LIST"}
247
+ _current_return_type = type_map.get(type_word, "INT")
248
+ _indent_changes[ln + 1] = 1
249
+ return f"def {arg1}():"
250
 
251
 
252
+ # -- RETURN ----------------------------------------------------------------
 
 
253
 
254
+ def _return(type_word: str, arg1: str, arg2: str, ln: int) -> str:
255
+ global _in_function
256
+ if arg2 == "BREAK":
257
+ _indent_changes[ln + 1] = -1
258
+ _in_function = False
259
+ return f"set_current_line({ln}); return(exit_function_call({_q(_current_return_type)}, {_q(arg1)}))"
260
 
 
 
 
261
 
262
+ # -- CALL: user-defined function -------------------------------------------
263
 
264
+ def _call(type_word: str, arg1: str, arg2: str, ln: int) -> str:
265
+ # type_word = function name, arg1 = input var, arg2 = output var
266
+ func_name = type_word
267
+ spec = _user_functions.get(func_name)
268
+ if spec:
269
+ return (
270
+ f"enter_function_call"
271
+ f"({_q(spec[1])}, {_q(arg1)}, {func_name}, {_q(spec[2])}, {_q(arg2)}, {ln})"
272
+ )
273
+ # Fallback — shouldn't happen if ErrorCorrection registered all functions
274
+ return f"enter_function_call('INT', {_q(arg1)}, {func_name}, 'INT', {_q(arg2)}, {ln})"
275
 
276
 
277
+ # ---------------------------------------------------------------------------
278
+ # Dispatch table keyed by VERB
279
+ # ---------------------------------------------------------------------------
280
 
281
+ _DISPATCH: Dict[str, Callable[[str, str, str, int], str]] = {
282
+ "MAKE": _make,
283
+ "SET": _set,
284
+ "CHANGE": _change,
285
+ "WHILE": _while,
286
+ "IF": _if,
287
+ "ELIF": _elif,
288
+ "ITERATE": _iterate,
289
+ "PRINT": _print,
290
+ "GET": _get,
291
+ "WRITE": _write,
292
+ "ADD": _add,
293
+ "SUBTRACT": _subtract,
294
+ "MULTIPLY": _multiply,
295
+ "DIVIDE": _divide,
296
+ "SIMPLEDIVIDE": _simpledivide,
297
+ "MODULO": _modulo,
298
+ "POWER": _power,
299
+ "COMBINE": _combine,
300
+ "PAD": _pad,
301
+ "TYPE": _type,
302
+ "FUNCTION": _function,
303
+ "RETURN": _return,
304
+ "CALL": _call,
305
+ }
306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
 
308
+ # ---------------------------------------------------------------------------
309
+ # Code generation
310
+ # ---------------------------------------------------------------------------
311
 
312
+ def make_instruction(quad: List[str], line_num: int) -> str:
313
+ """Dispatch a 4-word bytecode tuple to its code-gen handler."""
314
+ verb = quad[0]
315
+ handler = _DISPATCH.get(verb)
316
+ if handler:
317
+ return handler(quad[1], quad[2], quad[3], line_num)
318
+ # Unknown verb — treat as user-defined function call
319
+ return _call(verb, quad[1], quad[2], line_num)
320
 
321
 
322
+ def make_py_file(instruction_list: List[List[str]]) -> None:
323
+ """Compile *instruction_list* to Python and write it to test.py."""
324
  from pathlib import Path
325
 
326
+ out_path = Path(__file__).parent / "test.py"
327
+ indent_unit = " "
328
+
329
+ with out_path.open("w", encoding="utf-8") as f:
330
+ f.write("import sys\nimport os\n")
331
+ f.write("sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))\n")
332
  f.write("from Tzefa_Language.createdpython import *\n")
333
+ f.write("print('VM TEST START')\n")
334
+
335
+ indent_level = 0
336
+ for i, quad in enumerate(instruction_list, start=1):
337
+ indent_level += _indent_changes[i]
338
+ f.write(indent_unit * indent_level + make_instruction(quad, i) + "\n")
339
+
340
+ f.write("print_vars()\nprint('VM TEST END')\n")
341
+
342
+
343
+ # ---------------------------------------------------------------------------
344
+ # Self-test
345
+ # ---------------------------------------------------------------------------
346
+
347
+ if __name__ == "__main__":
348
+ register_user_function("GREATESTDIV", "LIST", "LIST")
349
+ _sample = [
350
+ ["MAKE", "INTEGER", "THEINT", "2769"],
351
+ ["MAKE", "INTEGER", "THEINTI", "1065"],
352
+ ["MAKE", "INTEGER", "THROWONE", "1065"],
353
+ ["MAKE", "INTEGER", "THROWTWO", "1065"],
354
+ ["MAKE", "LIST", "LISTOFTWO", "2"],
355
+ ["SET", "INDEX", "LISTOFTWO", "0"],
356
+ ["WRITE", "INTEGER", "LISTOFTWO", "THEINT"],
357
+ ["SET", "INDEX", "LISTOFTWO", "1"],
358
+ ["WRITE", "INTEGER", "LISTOFTWO", "THEINTI"],
359
+ ["MAKE", "INTEGER", "ZERO", "0"],
360
+ ["ADD", "TEMPORARY", "THEINT", "THEINTI"],
361
+ ["PRINT", "INTEGER", "TEMPORARY", "BREAK"],
362
+ ["FUNCTION", "LIST", "GREATESTDIV", "LIST"],
363
+ ["SET", "INDEX", "LISTOFTWO", "0"],
364
+ ["GET", "INTEGER", "LISTOFTWO", "THROWONE"],
365
+ ["SET", "INDEX", "LISTOFTWO", "1"],
366
+ ["GET", "INTEGER", "LISTOFTWO", "THROWTWO"],
367
+ ["MAKE", "CONDITION", "EUCLIDCOMPARE", "EQUALS"],
368
+ ["SET", "LEFT", "EUCLIDCOMPARE", "THROWTWO"],
369
+ ["SET", "RIGHT", "EUCLIDCOMPARE", "ZERO"],
370
+ ["IF", "CONDITION", "EUCLIDCOMPARE", "23"],
371
+ ["WRITE", "INTEGER", "LISTOFTWO", "THROWTWO"],
372
+ ["RETURN", "VALUE", "LISTOFTWO", "STAY"],
373
+ ["SET", "RIGHT", "EUCLIDCOMPARE", "THROWTWO"],
374
+ ["SET", "INDEX", "LISTOFTWO", "0"],
375
+ ["WRITE", "INTEGER", "LISTOFTWO", "THROWTWO"],
376
+ ["MODULO", "TEMPORARY", "THROWONE", "THROWTWO"], # DEST=TEMPORARY
377
+ ["SET", "INDEX", "LISTOFTWO", "1"],
378
+ ["WRITE", "INTEGER", "LISTOFTWO", "TEMPORARY"],
379
+ ["CALL", "GREATESTDIV","LISTOFTWO", "LISTOFTWO"],
380
+ ["RETURN", "VALUE", "LISTOFTWO", "BREAK"],
381
+ ["CALL", "GREATESTDIV","LISTOFTWO", "LISTOFTWO"],
382
+ ]
383
+ make_py_file(_sample)
384
+
requirements.txt CHANGED
@@ -1,17 +1,18 @@
1
- torch
2
- torchvision
3
- transformers
4
- segmentation-models-pytorch
5
- albumentations
6
- ultralytics
7
- timm
8
- opencv-python-headless
9
- pillow
10
- numpy
11
- gradio
12
- huggingface_hub
13
- fast_edit_distance
14
- pydantic==2.10.6
15
- tiktoken
16
- protobuf
17
- sentencepiece
 
 
1
+ torch
2
+ torchvision
3
+ transformers
4
+ segmentation-models-pytorch
5
+ ultralytics
6
+ timm
7
+ opencv-python-headless
8
+ pillow
9
+ numpy
10
+ gradio
11
+ huggingface_hub
12
+ fast_edit_distance
13
+ pydantic
14
+ einops
15
+ safetensors
16
+ surya-ocr
17
+ sentencepiece
18
+ protobuf