GGUF
conversational
File size: 9,912 Bytes
45be297
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
"""
THAR.0X β€” app.py
Model-agnostic cognitive architecture interface.

Supports:
  - Ollama  (http://localhost:11434)
  - LM Studio (http://localhost:1234)
  - Any OpenAI-compatible local server

Usage:
  python app.py                        # interactive CLI chat
  python app.py --backend lmstudio     # use LM Studio instead of Ollama
  python app.py --model qwen2.5:14b    # override model
  python app.py --once "Who are you?"  # single query, print and exit

Requirements:
  pip install openai requests
"""

import argparse
import json
import pathlib
import sys
import textwrap
from typing import Optional

# ---------------------------------------------------------------------------
# Load assets
# ---------------------------------------------------------------------------

SCRIPT_DIR = pathlib.Path(__file__).parent.resolve()

def load_system_prompt() -> str:
    path = SCRIPT_DIR / "system_prompt.txt"
    if not path.exists():
        print(f"[ERROR] system_prompt.txt not found at {path}")
        print("Make sure system_prompt.txt is in the same directory as app.py.")
        sys.exit(1)
    return path.read_text(encoding="utf-8").strip()


def load_config() -> dict:
    path = SCRIPT_DIR / "config.json"
    if not path.exists():
        return {}
    with open(path, encoding="utf-8") as f:
        return json.load(f)


# ---------------------------------------------------------------------------
# Backend abstraction
# ---------------------------------------------------------------------------

BACKENDS = {
    "ollama": {
        "base_url": "http://localhost:11434/v1",
        "api_key": "ollama",
        "default_model": "THAR.0X",
    },
    "lmstudio": {
        "base_url": "http://localhost:1234/v1",
        "api_key": "lm-studio",
        "default_model": "local-model",
    },
}


def build_client(backend: str):
    """Return an OpenAI-compatible client for the chosen backend."""
    try:
        from openai import OpenAI
    except ImportError:
        print("[ERROR] openai package not installed.")
        print("Run: pip install openai")
        sys.exit(1)

    cfg = BACKENDS.get(backend)
    if cfg is None:
        print(f"[ERROR] Unknown backend '{backend}'. Choose: {list(BACKENDS.keys())}")
        sys.exit(1)

    return OpenAI(base_url=cfg["base_url"], api_key=cfg["api_key"])


def check_server(backend: str) -> bool:
    """Ping the server to confirm it's running before starting chat."""
    import requests
    cfg = BACKENDS[backend]
    url = cfg["base_url"].replace("/v1", "")
    try:
        r = requests.get(url, timeout=3)
        return r.status_code < 500
    except Exception:
        return False


# ---------------------------------------------------------------------------
# Chat engine
# ---------------------------------------------------------------------------

class THAR0X:
    def __init__(
        self,
        backend: str = "ollama",
        model: Optional[str] = None,
        verbose: bool = False,
    ):
        self.config = load_config()
        self.system_prompt = load_system_prompt()
        self.backend = backend
        self.client = build_client(backend)
        self.history: list[dict] = []
        self.verbose = verbose

        # Model: CLI arg > config default > backend default
        inf = self.config.get("inference", {})
        backend_cfg = BACKENDS[backend]
        self.model = model or backend_cfg["default_model"]

        # Inference parameters from config.json
        self.temperature = inf.get("temperature", 0.85)
        self.top_p = inf.get("top_p", 0.92)
        self.max_tokens = inf.get("max_tokens", 2048)

        if self.verbose:
            print(f"[THAR.0X] Backend: {backend} | Model: {self.model}")
            print(f"[THAR.0X] Temp: {self.temperature} | Top-p: {self.top_p} | Max tokens: {self.max_tokens}")

    def chat(self, user_message: str) -> str:
        """Send a message and return the assistant reply. History is maintained."""
        self.history.append({"role": "user", "content": user_message})

        messages = [
            {"role": "system", "content": self.system_prompt},
            *self.history,
        ]

        try:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=messages,
                temperature=self.temperature,
                top_p=self.top_p,
                max_tokens=self.max_tokens,
            )
        except Exception as e:
            error_msg = f"[ERROR] API call failed: {e}"
            print(error_msg, file=sys.stderr)
            return error_msg

        reply = response.choices[0].message.content
        self.history.append({"role": "assistant", "content": reply})
        return reply

    def reset(self):
        """Clear conversation history."""
        self.history = []
        print("[THAR.0X] Conversation reset.")

    def show_history(self):
        """Print conversation history."""
        if not self.history:
            print("[THAR.0X] No conversation history yet.")
            return
        for i, turn in enumerate(self.history):
            role = "YOU" if turn["role"] == "user" else "THAR.0X"
            print(f"\n[{role}] {turn['content']}")


# ---------------------------------------------------------------------------
# CLI interface
# ---------------------------------------------------------------------------

BANNER = """
╔══════════════════════════════════════════════╗
β•‘             T H A R . 0 X                   β•‘
β•‘   Cognitive Architecture β€” Local Intelligence β•‘
β•‘   Zero as in origin. X as in unlimited.      β•‘
β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•

Commands:
  /reset    β€” clear conversation history
  /history  β€” show full conversation
  /model    β€” show current model and backend
  /quit     β€” exit
"""


def run_interactive(agent: THAR0X):
    print(BANNER)
    print(f"Backend: {agent.backend.upper()}  |  Model: {agent.model}\n")

    while True:
        try:
            user_input = input("YOU > ").strip()
        except (EOFError, KeyboardInterrupt):
            print("\n[THAR.0X] Session ended.")
            break

        if not user_input:
            continue

        # Commands
        if user_input.lower() in ("/quit", "/exit", "quit", "exit"):
            print("[THAR.0X] Session ended.")
            break
        elif user_input.lower() == "/reset":
            agent.reset()
            continue
        elif user_input.lower() == "/history":
            agent.show_history()
            continue
        elif user_input.lower() == "/model":
            print(f"[THAR.0X] Backend: {agent.backend} | Model: {agent.model}")
            continue

        # Normal message
        print("\nTHAR.0X > ", end="", flush=True)
        reply = agent.chat(user_input)

        # Word-wrap long replies for terminal readability
        wrapped = textwrap.fill(
            reply,
            width=90,
            subsequent_indent="          ",
            break_long_words=False,
            break_on_hyphens=False,
        )
        print(wrapped)
        print()


# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------

def parse_args():
    parser = argparse.ArgumentParser(
        description="THAR.0X β€” Model-agnostic cognitive architecture CLI",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=textwrap.dedent("""
        Examples:
          python app.py
          python app.py --backend lmstudio
          python app.py --model qwen2.5:14b
          python app.py --once "Explain consciousness in one paragraph."
          python app.py --backend lmstudio --model Qwen2.5-14B --verbose
        """),
    )
    parser.add_argument(
        "--backend",
        choices=list(BACKENDS.keys()),
        default="ollama",
        help="Which local LLM server to use (default: ollama)",
    )
    parser.add_argument(
        "--model",
        default=None,
        help="Model name override. For Ollama: 'qwen2.5:14b'. For LM Studio: model filename.",
    )
    parser.add_argument(
        "--once",
        metavar="PROMPT",
        default=None,
        help="Send a single prompt, print the reply, and exit.",
    )
    parser.add_argument(
        "--verbose",
        action="store_true",
        help="Print inference parameters on startup.",
    )
    parser.add_argument(
        "--no-check",
        action="store_true",
        help="Skip server connectivity check on startup.",
    )
    return parser.parse_args()


def main():
    args = parse_args()

    # Server check
    if not args.no_check:
        print(f"[THAR.0X] Checking {args.backend} server...", end=" ", flush=True)
        if check_server(args.backend):
            print("OK")
        else:
            print("FAILED")
            print(f"\n[ERROR] Cannot reach {args.backend} server.")
            if args.backend == "ollama":
                print("Start it with: ollama serve")
                print("If THAR.0X model not created yet: ollama create THAR.0X -f Modelfile")
            elif args.backend == "lmstudio":
                print("Start LM Studio, load a model, and enable the local server.")
            print("\nUse --no-check to skip this check.")
            sys.exit(1)

    # Build agent
    agent = THAR0X(
        backend=args.backend,
        model=args.model,
        verbose=args.verbose,
    )

    # Single-shot mode
    if args.once:
        reply = agent.chat(args.once)
        print(reply)
        return

    # Interactive mode
    run_interactive(agent)


if __name__ == "__main__":
    main()