shaml / backend /scripts /run_local.py
Cursor Agent
Add Diac API backend, Arabic RTL frontend, Docker, and docs
0b083c6 unverified
#!/usr/bin/env python3
"""Run Arabic diacritization locally from the command line."""
from __future__ import annotations
import argparse
import json
import logging
import sys
from pathlib import Path
# Ensure project root is on sys.path when run as a script
PROJECT_ROOT = Path(__file__).resolve().parents[2]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from backend.app.config import get_settings
from backend.app.services.model_service import model_service
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
logger = logging.getLogger(__name__)
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Diacritize Arabic text locally using a Hugging Face pretrained model.",
)
parser.add_argument(
"--text",
type=str,
help="Single Arabic sentence without diacritics",
)
parser.add_argument(
"--file",
type=str,
help="Path to a text file with one sentence per line",
)
parser.add_argument(
"--output",
type=str,
help="Output file path (for --file mode). Default: outputs/local_diacritized.txt",
)
parser.add_argument(
"--model",
type=str,
default=None,
help="Hugging Face model ID (overrides MODEL_NAME env var)",
)
parser.add_argument(
"--constants",
type=str,
default=None,
help="Path to constants/ folder (overrides DIAC_CONSTANTS_PATH)",
)
parser.add_argument(
"--json",
action="store_true",
help="Print results as JSON",
)
return parser.parse_args()
def main() -> int:
args = parse_args()
settings = get_settings()
model_name = args.model or settings.model_name
constants_path = args.constants or settings.constants_path
if not args.text and not args.file:
logger.error("Provide --text or --file")
return 1
try:
model_service.load(model_name, constants_path)
except Exception as exc:
logger.error("Failed to load model: %s", exc)
return 1
if args.text:
try:
diacritized = model_service.diacritize(args.text)
except Exception as exc:
logger.error("Diacritization failed: %s", exc)
return 1
if args.json:
print(json.dumps({"input": args.text, "diacritized": diacritized}, ensure_ascii=False))
else:
print(diacritized)
return 0
input_path = Path(args.file)
if not input_path.exists():
logger.error("Input file not found: %s", input_path)
return 1
lines = [line.strip() for line in input_path.read_text(encoding="utf-8").splitlines() if line.strip()]
if not lines:
logger.error("Input file is empty")
return 1
try:
outputs = model_service.diacritize_batch(lines)
except Exception as exc:
logger.error("Batch diacritization failed: %s", exc)
return 1
output_path = Path(args.output or "outputs/local_diacritized.txt")
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text("\n".join(outputs) + "\n", encoding="utf-8")
if args.json:
results = [{"input": i, "diacritized": o} for i, o in zip(lines, outputs, strict=True)]
print(json.dumps({"results": results, "output_file": str(output_path)}, ensure_ascii=False, indent=2))
else:
logger.info("Wrote %d lines to %s", len(outputs), output_path)
for inp, out in zip(lines, outputs, strict=True):
print(f"IN: {inp}")
print(f"OUT: {out}")
print()
return 0
if __name__ == "__main__":
raise SystemExit(main())