File size: 2,478 Bytes
fa2dad9
453c542
fa2dad9
 
453c542
 
fa2dad9
 
453c542
 
 
 
 
 
fa2dad9
 
 
 
 
 
 
453c542
fa2dad9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453c542
 
 
 
 
 
 
fa2dad9
 
453c542
fa2dad9
 
453c542
fa2dad9
 
 
 
453c542
 
fa2dad9
453c542
 
fa2dad9
 
453c542
 
fa2dad9
453c542
fa2dad9
453c542
fa2dad9
453c542
 
fa2dad9
 
 
 
 
 
 
 
 
453c542
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""Talk to EDEN in the terminal, similar to how Ollama works.

This downloads the published model from the Hugging Face Hub the first time it
runs and caches it. After that it works offline.

Usage:
    python3 try_eden.py                       # open the chat interface
    python3 try_eden.py "some rough text"     # one-shot: clean the given text
"""

import sys

MODEL_ID = "Rybib/EDEN"

# ANSI styles, used only when writing to a real terminal.
_TTY = sys.stdout.isatty()
BOLD = "\033[1m" if _TTY else ""
DIM = "\033[2m" if _TTY else ""
GREEN = "\033[32m" if _TTY else ""
CYAN = "\033[36m" if _TTY else ""
RESET = "\033[0m" if _TTY else ""

BANNER = f"""{CYAN}{BOLD}
  EDEN  ::  Encoder Decoder Enhancement Network
{RESET}{DIM}  Type or paste rough text and press Enter to clean it up.
  Commands:  /help   show help      /bye  quit      (Ctrl+D also quits)
{RESET}"""

HELP = f"""{DIM}
  Just type or paste text, then press Enter, and EDEN rewrites it.
  Commands:
    /help      show this help
    /bye       quit (so do /exit, /quit, and Ctrl+D)
{RESET}"""


def load_model():
    try:
        from transformers import AutoModel, AutoTokenizer
    except ImportError:
        print("Missing dependencies. Run this first:")
        print("    pip3 install torch transformers")
        sys.exit(1)

    print(f"{DIM}Loading {MODEL_ID} (first run downloads about 430 MB) ...{RESET}")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
    model = AutoModel.from_pretrained(MODEL_ID, trust_remote_code=True).eval()
    return model, tokenizer


def main() -> None:
    model, tokenizer = load_model()

    # One-shot mode: clean the text passed as arguments and exit.
    args = [a for a in sys.argv[1:] if a.strip()]
    if args:
        print(model.enhance(tokenizer, " ".join(args)))
        return

    # Interactive chat mode.
    print(BANNER)
    while True:
        try:
            text = input(f"{GREEN}{BOLD}>>> {RESET}").strip()
        except (EOFError, KeyboardInterrupt):
            print(f"\n{DIM}Goodbye.{RESET}")
            return

        if not text:
            continue
        if text.lower() in {"/bye", "/exit", "/quit", "/q"}:
            print(f"{DIM}Goodbye.{RESET}")
            return
        if text.lower() in {"/help", "/h", "/?"}:
            print(HELP)
            continue

        cleaned = model.enhance(tokenizer, text)
        print(f"{CYAN}{cleaned}{RESET}\n")


if __name__ == "__main__":
    main()