toxic-royale-env / scripts /bluestacks_elixir_ocr.py
omm7's picture
Upload folder using huggingface_hub
b0620f3 verified
from __future__ import annotations
"""
Local elixir reading using OCR + bar fill (fallback).
Designed for BlueStacks screenshot produced by scripts/bluestacks_screenshot.sh.
Dependencies:
- Pillow (already used elsewhere)
- Optional: tesseract + pytesseract for best results
- brew install tesseract
- python3 -m pip install pytesseract
Usage:
cd toxic_royale_env
python3 scripts/bluestacks_elixir_ocr.py --image outputs/bluestacks/pilot_current.png
Debug:
BS_DEBUG_ELIXIR=1 python3 scripts/bluestacks_elixir_ocr.py --image ...
-> writes outputs/bluestacks/debug_elixir_ocr_roi.png and debug_elixir_bar_roi.png
"""
import argparse
import json
import os
import subprocess
from pathlib import Path
from PIL import Image, ImageOps, ImageFilter
def _load_cfg(root: Path) -> dict:
cfg_path = root / "config" / "bluestacks_gameplay.local.json"
return json.loads(cfg_path.read_text(encoding="utf-8"))
def _bluestacks_bounds() -> tuple[int, int, int, int] | None:
"""
Return (x,y,w,h) of the exact region captured by scripts/bluestacks_screenshot.sh.
This lets us convert absolute screen calibration coords -> screenshot pixel coords.
"""
script = r'''
tell application "System Events"
try
set bluestacksProc to application process "BlueStacks"
set allWins to every window of bluestacksProc
repeat with w in allWins
set t to title of w
set n to name of w
set sr to subrole of w
set s to size of w
set p to position of w
if (t is "BlueStacks Air" or n is "BlueStacks Air") and sr is "AXDialog" then
set x to item 1 of p
set y to item 2 of p
set w_ to item 1 of s
set h to item 2 of s
if h > 200 then
return "" & x & "," & y & "," & w_ & "," & h
end if
end if
end repeat
return ""
on error
return ""
end try
end tell
'''
try:
out = subprocess.check_output(["osascript", "-e", script], text=True).strip()
if not out or out.count(",") < 3:
return None
x_s, y_s, w_s, h_s = out.split(",", 3)
return int(float(x_s)), int(float(y_s)), int(float(w_s)), int(float(h_s))
except Exception:
return None
def _crop_elixir_number_roi(im: Image.Image, *, x_left: int, y: int) -> Image.Image:
"""
Crop ROI around the big elixir number (white).
Important: BlueStacks UI is consistent inside the cropped screenshot, and calibration y can drift.
We therefore default to a RELATIVE crop (percentage of image size) which is robust.
"""
w, h = im.size
# Relative ROI covering the big elixir number + "Max: 10" baseline.
# Tuned against typical BlueStacks Air layout (and avoids the hand card costs).
# Default to a bar-anchored crop (more stable than a pure relative crop).
# The big number sits ABOVE the bar and slightly LEFT of the bar start.
# Keep this fairly right-shifted to avoid the "Next:" label region.
x0 = max(0, x_left - 170)
x1 = max(0, min(w, x_left + 20))
# Place the crop just above the bar, where the big elixir number sits.
y0 = max(0, y - 175)
y1 = max(0, min(h, y - 20))
# If user wants to force calibration-based crop for experiments:
if os.environ.get("BS_OCR_MODE", "").strip().lower() == "relative":
# Relative ROI covering the elixir droplet region; useful if calibration is very wrong.
x0 = int(round(0.36 * w))
x1 = int(round(0.50 * w))
y0 = int(round(0.85 * h))
y1 = int(round(0.95 * h))
x0 = max(0, min(w - 1, x0))
x1 = max(x0 + 1, min(w, x1))
y0 = max(0, min(h - 1, y0))
y1 = max(y0 + 1, min(h, y1))
return im.crop((x0, y0, x1, y1))
def _preprocess_for_ocr(roi: Image.Image) -> Image.Image:
g = ImageOps.grayscale(roi)
# Upscale so OCR has more pixels.
g = g.resize((g.size[0] * 3, g.size[1] * 3), Image.Resampling.BICUBIC)
# Sharpen edges a bit.
g = g.filter(ImageFilter.UnsharpMask(radius=2, percent=180, threshold=2))
# High contrast: white digits on darker background.
g = ImageOps.autocontrast(g)
# Binarize (slightly lower threshold; BlueStacks UI can be dim).
g = g.point(lambda p: 255 if p > 140 else 0)
return g
def _try_tesseract(roi_bin: Image.Image) -> int | None:
try:
import pytesseract # type: ignore
except Exception:
return None
try:
txt = pytesseract.image_to_string(
roi_bin,
config="--psm 7 -c tessedit_char_whitelist=0123456789",
).strip()
except Exception:
return None
# Extract first integer-like token
digits = "".join(ch for ch in txt if ch.isdigit())
if not digits:
return None
try:
v = int(digits)
except Exception:
return None
if 0 <= v <= 10:
return v
return None
def _estimate_from_bar(im: Image.Image, *, x0: int, x1: int, y: int) -> float | None:
"""
Estimate elixir 0..10 from bar fill (left->right length).
Uses HSV scoring + adaptive threshold and searches nearby y positions to
avoid sampling the wrong scanline (Retina + UI glow).
"""
w, h = im.size
x0 = max(0, min(w - 2, x0))
x1 = max(x0 + 1, min(w, x1))
import colorsys
def magenta_score(r: int, g: int, b: int) -> float:
rf, gf, bf = r / 255.0, g / 255.0, b / 255.0
h_, s_, v_ = colorsys.rgb_to_hsv(rf, gf, bf)
# magenta/purple hue band
if 0.78 <= h_ <= 0.95 and s_ >= 0.25 and v_ >= 0.15:
return s_ * v_
return 0.0
best_fill = 0.0
best_roi = None
# scan a few y offsets around calibrated y
for dy in range(-24, 25, 2):
yy = max(0, min(h - 16, y + dy))
roi = im.crop((x0, yy, x1, yy + 12)).convert("RGB")
px = roi.load()
cols = roi.size[0]
band = roi.size[1]
scores: list[float] = []
for cx in range(cols):
ssum = 0.0
for cy in range(band):
r, g, b = px[cx, cy]
ssum += magenta_score(r, g, b)
scores.append(ssum / max(1, band))
s_sorted = sorted(scores)
p10 = s_sorted[int(0.10 * (len(s_sorted) - 1))]
p90 = s_sorted[int(0.90 * (len(s_sorted) - 1))]
if p90 <= 0.02:
continue
thr = (p10 + p90) / 2.0
# find left-to-right fill run, allowing small gaps for tick marks
filled_cols = 0
miss_run = 0
for v in scores:
if v >= thr:
filled_cols += 1
miss_run = 0
else:
miss_run += 1
if miss_run >= 10:
break
fill = filled_cols / max(1, cols)
if fill > best_fill:
best_fill = fill
best_roi = roi
if os.environ.get("BS_DEBUG_ELIXIR", "0") == "1" and best_roi is not None:
out_dir = Path(__file__).resolve().parents[1] / "outputs" / "bluestacks"
out_dir.mkdir(parents=True, exist_ok=True)
best_roi.save(out_dir / "debug_elixir_bar_roi.png")
if best_fill <= 0.0:
return None
return round(10.0 * max(0.0, min(1.0, best_fill)), 1)
def read_elixir(image_path: Path) -> tuple[float | None, dict]:
root = Path(__file__).resolve().parents[1]
cfg = _load_cfg(root)
eb = cfg.get("elixir_bar") or {}
s = eb.get("start")
e = eb.get("end")
if not (s and e):
return None, {"error": "missing_elixir_bar_calibration"}
im = Image.open(image_path).convert("RGB")
# Convert absolute screen coords -> screenshot coords using the BlueStacks window origin.
b = _bluestacks_bounds()
if b is None:
return None, {"error": "cannot_find_bluestacks_bounds"}
ox, oy, _w, _h = b
# Retina scaling: screencapture produces pixel-dense images, while cliclick/AppleScript coords
# are in logical screen points. Compute scale from captured image size vs window bounds.
sx = im.size[0] / max(1.0, float(_w))
sy = im.size[1] / max(1.0, float(_h))
x0_pt, y_pt = int(s[0]) - ox, int(s[1]) - oy
x1_pt, _y2_pt = int(e[0]) - ox, int(e[1]) - oy
x0 = int(round(x0_pt * sx))
y = int(round(y_pt * sy))
x1 = int(round(x1_pt * sx))
dbg_bounds = {
"bounds": [ox, oy, _w, _h],
"scale": [round(sx, 3), round(sy, 3)],
"bar_start_xy_points": [x0_pt, y_pt],
"bar_end_x_points": x1_pt,
"bar_start_xy_px": [x0, y],
"bar_end_x_px": x1,
}
if x1 < x0:
x0, x1 = x1, x0
# Optional: ROI calibration (screen coords) for number + bar.
# If present (non-zero), use these boxes instead of derived ones.
roi_cfg = cfg.get("elixir_roi") or {}
num_tl = (roi_cfg.get("number") or {}).get("tl") or [0, 0]
num_br = (roi_cfg.get("number") or {}).get("br") or [0, 0]
bar_tl = (roi_cfg.get("bar") or {}).get("tl") or [0, 0]
bar_br = (roi_cfg.get("bar") or {}).get("br") or [0, 0]
def _nonzero(pt) -> bool:
try:
return int(pt[0]) != 0 or int(pt[1]) != 0
except Exception:
return False
# Convert screen-ROI -> screenshot pixels (apply origin + scale)
num_box = None
if _nonzero(num_tl) and _nonzero(num_br):
x0n = int(round((int(num_tl[0]) - ox) * sx))
y0n = int(round((int(num_tl[1]) - oy) * sy))
x1n = int(round((int(num_br[0]) - ox) * sx))
y1n = int(round((int(num_br[1]) - oy) * sy))
num_box = (min(x0n, x1n), min(y0n, y1n), max(x0n, x1n), max(y0n, y1n))
bar_box = None
if _nonzero(bar_tl) and _nonzero(bar_br):
x0b = int(round((int(bar_tl[0]) - ox) * sx))
y0b = int(round((int(bar_tl[1]) - oy) * sy))
x1b = int(round((int(bar_br[0]) - ox) * sx))
y1b = int(round((int(bar_br[1]) - oy) * sy))
bar_box = (min(x0b, x1b), min(y0b, y1b), max(x0b, x1b), max(y0b, y1b))
# 1) OCR number (best)
if num_box is not None:
roi_num = im.crop(num_box)
roi_num_box = list(num_box)
else:
roi_num = _crop_elixir_number_roi(im, x_left=x0, y=y)
roi_num_box = "auto"
roi_bin = _preprocess_for_ocr(roi_num)
v_int = _try_tesseract(roi_bin)
# 2) Bar fill fallback
if bar_box is not None:
x0b, y0b, x1b, y1b = bar_box
# use centerline y for scanning, but keep x range tight
yb = int((y0b + y1b) / 2)
v_bar = _estimate_from_bar(im, x0=x0b, x1=x1b, y=yb)
bar_box_dbg = list(bar_box)
else:
v_bar = _estimate_from_bar(im, x0=x0, x1=x1, y=y)
bar_box_dbg = "auto"
dbg = {
"ocr_int": v_int,
"bar_est": v_bar,
"roi_num_box": roi_num_box,
"bar_box": bar_box_dbg,
**dbg_bounds,
}
if os.environ.get("BS_DEBUG_ELIXIR", "0") == "1":
out_dir = root / "outputs" / "bluestacks"
out_dir.mkdir(parents=True, exist_ok=True)
roi_bin.save(out_dir / "debug_elixir_ocr_roi.png")
dbg["debug_dir"] = str(out_dir)
if v_int is not None:
return float(v_int), dbg
if v_bar is not None:
return float(v_bar), dbg
return None, dbg
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--image", type=str, required=True)
args = ap.parse_args()
v, dbg = read_elixir(Path(args.image))
print(json.dumps({"elixir": v, "debug": dbg}, indent=2))
return 0
if __name__ == "__main__":
raise SystemExit(main())