boo4blue commited on
Commit
c526bb9
·
verified ·
1 Parent(s): 306461b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +353 -72
app.py CHANGED
@@ -1,86 +1,296 @@
1
- import os, time, json, re
2
  import gradio as gr
3
  from llama_cpp import Llama
4
 
5
- # ✅ Working public GGUF model
 
 
 
 
6
  MODEL_REPO = "QuantFactory/Phi-3.5-mini-instruct-GGUF"
7
- MODEL_FILE = "Phi-3.5-mini-instruct.Q4_K_M.gguf"
 
 
8
  SAVE_PATH = "convos.jsonl"
9
- MAX_RECALL = 5 # how many past turns to recall
 
 
 
 
10
 
 
 
 
11
  llm = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def get_llm():
14
  global llm
15
  if llm is not None:
16
  return llm
17
  llm = Llama.from_pretrained(
18
- repo_id=MODEL_REPO,
19
  filename=MODEL_FILE,
20
- n_ctx=4096,
21
- n_threads=4,
22
- n_gpu_layers=0,
23
  verbose=False
24
  )
25
  return llm
26
 
27
- def save_turn(system, history, user_msg, assistant_msg):
28
- with open(SAVE_PATH, "a", encoding="utf-8") as f:
29
- rec = {
30
- "ts": time.time(),
31
- "system": system,
32
- "history": history,
33
- "user": user_msg,
34
- "assistant": assistant_msg,
35
- }
36
- f.write(json.dumps(rec, ensure_ascii=False) + "\n")
37
-
38
- def load_memory(query):
39
- """Simple keyword-based recall from past chats."""
40
- if not os.path.exists(SAVE_PATH):
41
- return []
42
- with open(SAVE_PATH, "r", encoding="utf-8") as f:
43
- lines = [json.loads(l) for l in f if l.strip()]
44
- query_words = set(re.findall(r"\w+", query.lower()))
45
- scored = []
46
- for rec in lines:
47
- text = (rec.get("user","") + " " + rec.get("assistant","")).lower()
48
- score = len(query_words & set(re.findall(r"\w+", text)))
49
- if score > 0:
50
- scored.append((score, rec))
51
- scored.sort(reverse=True, key=lambda x: x[0])
52
- return [rec for _, rec in scored[:MAX_RECALL]]
53
-
54
- def format_messages(system, history, user_msg):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  msgs = []
56
  if system.strip():
57
  msgs.append({"role": "system", "content": system})
58
-
59
- recalls = load_memory(user_msg)
60
- if recalls:
61
- mem_text = "\n".join(
62
- f"User: {r['user']}\nAssistant: {r['assistant']}" for r in recalls
63
- )
64
- msgs.append({"role": "system", "content": f"Relevant past conversations:\n{mem_text}"})
65
-
66
  for h in history:
67
  msgs.append({"role": h["role"], "content": h["content"]})
68
-
69
  msgs.append({"role": "user", "content": user_msg})
70
  return msgs
71
 
72
- def chat_fn(user_msg, history, system, temperature, top_p, max_new_tokens):
73
  llm = get_llm()
74
- msgs = format_messages(system, history, user_msg)
75
-
76
- stream = llm.create_chat_completion(
77
  messages=msgs,
78
  temperature=temperature,
79
- top_p=top_p,
80
- max_tokens=max_new_tokens,
81
  stream=True
82
  )
83
-
84
  partial = ""
85
  for chunk in stream:
86
  delta = chunk["choices"][0]["delta"]
@@ -88,28 +298,97 @@ def chat_fn(user_msg, history, system, temperature, top_p, max_new_tokens):
88
  piece = delta["content"]
89
  partial += piece
90
  yield partial
 
 
 
91
 
92
- save_turn(system, history, user_msg, partial)
93
 
94
- with gr.Blocks(title="Free ChatGPT-like (CPU) with Memory") as demo:
95
- gr.Markdown("# Free ChatGPT‑style AI (CPU) + Long‑Term Memory")
 
96
  with gr.Row():
97
- system = gr.Textbox(label="System prompt", value="You are a helpful, concise assistant.")
98
  with gr.Row():
99
- temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
100
  top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top‑p")
101
- max_new_tokens = gr.Slider(16, 1024, value=512, step=16, label="Max new tokens")
102
- chat = gr.Chatbot(height=480, show_copy_button=True, type="messages")
103
- user = gr.Textbox(label="Your message", placeholder="Ask anything...")
104
  send = gr.Button("Send", variant="primary")
 
105
 
106
- def respond(message, chat_history, system, temperature, top_p, max_new_tokens):
107
  if not message or not message.strip():
108
- return gr.update(), chat_history
109
- history_msgs = chat_history or []
110
- stream = chat_fn(message, history_msgs, system, temperature, top_p, max_new_tokens)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  bot_text = ""
112
- for partial in stream:
113
  bot_text = partial
114
  yield gr.update(value=(history_msgs + [
115
  {"role": "user", "content": message},
@@ -117,18 +396,20 @@ with gr.Blocks(title="Free ChatGPT-like (CPU) with Memory") as demo:
117
  ])), (history_msgs + [
118
  {"role": "user", "content": message},
119
  {"role": "assistant", "content": bot_text}
120
- ])
 
 
121
 
122
  send.click(
123
  respond,
124
- [user, chat, system, temperature, top_p, max_new_tokens],
125
- [chat, chat],
126
  )
127
  user.submit(
128
  respond,
129
- [user, chat, system, temperature, top_p, max_new_tokens],
130
- [chat, chat],
131
  )
132
 
133
- if __name__ == "__main__":
134
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ import os, time, json, re, difflib, tempfile, pathlib, shutil, fnmatch
2
  import gradio as gr
3
  from llama_cpp import Llama
4
 
5
+ =========================
6
+
7
+ Model + basic configuration
8
+
9
+ =========================
10
  MODEL_REPO = "QuantFactory/Phi-3.5-mini-instruct-GGUF"
11
+ MODELFILE = "Phi-3.5-mini-instruct.Q4K_M.gguf"
12
+
13
+ APP_TITLE = "Natural-language self-editing AI (CPU)"
14
  SAVE_PATH = "convos.jsonl"
15
+ MAXNEWTOKENS = 768
16
+ N_CTX = 4096
17
+ THREADS = 4
18
+
19
+ =========================
20
 
21
+ Globals
22
+
23
+ =========================
24
  llm = None
25
+ ROOT_DIR = pathlib.Path(".").resolve()
26
+ BACKUPDIR = ROOTDIR / ".fs_backups"
27
+ BACKUPDIR.mkdir(existok=True)
28
+ DENYLIST = ["/proc", "/sys", "/dev", "/run", "/var/lib/docker", "/var/run"]
29
+
30
+ Pending action schema:
31
+
32
+ {"path": "app.py", "newcontent": "...", "oldcontent": "...", "reason": "…"}
33
+
34
+ Stored in gr.State
35
+ PENDINGKEY = "pendingaction"
36
+
37
+ =========================
38
+
39
+ Utilities: file system
40
+
41
+ =========================
42
+ def _resolve(path: str) -> pathlib.Path:
43
+ p = (ROOT_DIR / path).resolve()
44
+ for d in DENYLIST:
45
+ if str(p).startswith(d):
46
+ raise PermissionError(f"Path {p} is denied")
47
+ return p
48
+
49
+ def make_backup(path: str) -> str:
50
+ p = _resolve(path)
51
+ if not p.exists():
52
+ return ""
53
+ ts = int(time.time())
54
+ rel = str(p.relativeto(ROOTDIR)).replace("/", "")
55
+ bk = BACKUP_DIR / f"{rel}.{ts}.bak"
56
+ if p.is_file():
57
+ shutil.copy2(p, bk)
58
+ return str(bk)
59
+ else:
60
+ shutil.makearchive(str(bk), "zip", rootdir=str(p))
61
+ return str(bk) + ".zip"
62
+
63
+ def read_file(path: str) -> str:
64
+ p = _resolve(path)
65
+ with open(p, "r", encoding="utf-8") as f:
66
+ return f.read()
67
+
68
+ def write_atomic(path: str, content: str) -> str:
69
+ p = _resolve(path)
70
+ p.parent.mkdir(parents=True, exist_ok=True)
71
+ backuppath = makebackup(path)
72
+ with tempfile.NamedTemporaryFile("w", delete=False, dir=str(p.parent), encoding="utf-8") as tmp:
73
+ tmp.write(content)
74
+ tmp_name = tmp.name
75
+ os.replace(tmp_name, p)
76
+ return backup_path
77
 
78
+ def list_paths(pattern: str = "/*", cwd: str = ".") -> list:
79
+ base = _resolve(cwd)
80
+ results = []
81
+ for path in base.rglob("*"):
82
+ rel = str(path.relativeto(ROOTDIR))
83
+ if fnmatch.fnmatch(rel, pattern):
84
+ results.append(rel + ("/" if path.is_dir() else ""))
85
+ return results[:1000]
86
+
87
+ def filedifftext(old: str, new: str, fromname: str, toname: str) -> str:
88
+ diff = difflib.unified_diff(
89
+ old.splitlines(), new.splitlines(), fromfile=fromname, tofile=toname
90
+ )
91
+ return "\n".join(diff)
92
+
93
+ =========================
94
+
95
+ Utilities: chat memory (lightweight)
96
+
97
+ =========================
98
+ def saveturn(system, history, usermsg, assistant_msg):
99
+ try:
100
+ with open(SAVE_PATH, "a", encoding="utf-8") as f:
101
+ rec = {
102
+ "ts": time.time(),
103
+ "system": system,
104
+ "history": history,
105
+ "user": user_msg,
106
+ "assistant": assistant_msg,
107
+ }
108
+ f.write(json.dumps(rec, ensure_ascii=False) + "\n")
109
+ except Exception:
110
+ pass
111
+
112
+ =========================
113
+
114
+ Model loader
115
+
116
+ =========================
117
  def get_llm():
118
  global llm
119
  if llm is not None:
120
  return llm
121
  llm = Llama.from_pretrained(
122
+ repoid=MODELREPO,
123
  filename=MODEL_FILE,
124
+ nctx=NCTX,
125
+ n_threads=THREADS,
126
+ ngpulayers=0,
127
  verbose=False
128
  )
129
  return llm
130
 
131
+ =========================
132
+
133
+ NL agent: intent + planning
134
+
135
+ =========================
136
+ CODEBLOCKRE = re.compile(r"(?:[\w.+-]+)?\n(.*?)", re.DOTALL)
137
+
138
+ def extract_fenced(text: str) -> str:
139
+ m = CODEBLOCKRE.search(text)
140
+ return m.group(1).strip() if m else text
141
+
142
+ def detectintent(usertext: str) -> str:
143
+ """
144
+ Returns: "edit", "create", "chat"
145
+ """
146
+ t = user_text.lower()
147
+ edit_verbs = ["edit", "change", "modify", "refactor", "fix", "optimize", "speed up", "rework", "rewrite", "patch"]
148
+ create_verbs = ["create", "make a new", "add a new", "generate a new", "build a new", "scaffold"]
149
+ if any(v in t for v in edit_verbs):
150
+ return "edit"
151
+ if any(v in t for v in create_verbs):
152
+ return "create"
153
+ # Heuristic: mentions of specific files imply edit
154
+ if re.search(r"\b[\w\-/]+\.py\b", t):
155
+ return "edit"
156
+ return "chat"
157
+
158
+ def findtargetfiles(user_text: str) -> list:
159
+ """
160
+ Pull explicit filenames from the message; fallback to app.py if none.
161
+ """
162
+ files = re.findall(r"([\w\-/]+\.py)\b", user_text)
163
+ files = [f for f in files if (ROOT_DIR / f).exists()]
164
+ if files:
165
+ return files
166
+ # sensible default
167
+ fallback = ["app.py"] if (ROOT_DIR / "app.py").exists() else []
168
+ return fallback
169
+
170
+ def proposeeditorcreate(usertext: str) -> dict:
171
+ """
172
+ Ask the model to propose a full-file replacement (for edit) or a new file (for create).
173
+ Returns: {"path": str, "new_content": str, "reason": str}
174
+ """
175
+ targets = findtargetfiles(user_text)
176
+ context_blobs = []
177
+ for path in targets:
178
+ try:
179
+ contextblobs.append(f"File: {path}\npython\n{readfile(path)}\n")
180
+ except Exception:
181
+ pass
182
+ filelistpreview = "\n".join(list_paths("/*.py"))
183
+
184
+ system_hint = (
185
+ "You are a precise software editor. When asked to change or create code, "
186
+ "you return ONLY the complete target file in a single fenced code block, and a brief reason."
187
+ )
188
+ user_prompt = f"""
189
+ User request:
190
+ {user_text}
191
+
192
+ Existing Python files (truncated):
193
+ `text
194
+ {filelistpreview}
195
+ `
196
+
197
+ Context for existing targets:
198
+ {("\n\n".join(contextblobs) if contextblobs else "(no existing file context)")}
199
+
200
+ Instructions:
201
+ - If editing, produce the FULL updated content of the primary target file you chose.
202
+ - If creating, choose a sensible path (e.g., scripts/logger.py) and produce the FULL content.
203
+ - Return your answer in this JSON template followed by ONE fenced code block with the file content:
204
+ JSON (no code fences):
205
+ {{"path": "<target_path.py>", "reason": "<1-2 sentences>"}}
206
+ Then the file content:
207
+ `python
208
+
209
+ full file content here
210
+ `
211
+ Do not include anything else.
212
+ """
213
+ out = getllm().createchat_completion(
214
+ messages=[{"role": "system", "content": system_hint},
215
+ {"role": "user", "content": user_prompt}],
216
+ temperature=0.2,
217
+ top_p=0.9,
218
+ maxtokens=MAXNEW_TOKENS
219
+ )
220
+ text = out["choices"][0]["message"]["content"]
221
+ # Extract JSON header
222
+ json_match = re.search(r"\{.*\}", text, flags=re.DOTALL)
223
+ header = {"path": "app.generated.py", "reason": "Generated update"}
224
+ if json_match:
225
+ try:
226
+ header = json.loads(json_match.group(0))
227
+ except Exception:
228
+ pass
229
+ newcontent = extractfenced(text)
230
+ return {
231
+ "path": header.get("path", "app.generated.py"),
232
+ "newcontent": newcontent,
233
+ "reason": header.get("reason", "Proposed change"),
234
+ }
235
+
236
+ def proposediffmessage(path: str, old: str, new: str, reason: str) -> str:
237
+ diff = filedifftext(old, new, f"{path} (old)", f"{path} (new)")
238
+ preview = diff if diff.strip() else "(no textual differences)"
239
+ return (
240
+ f"I proposed changes to {path}:\n"
241
+ f"Reason: {reason}\n"
242
+ f"Diff:\ndiff\n{preview}\n\n"
243
+ f"Apply these changes? Say 'yes' to apply, or 'no' to cancel. "
244
+ f"(You can also say 'edit the proposal' to iterate.)"
245
+ )
246
+
247
+ def apply_pending(pending: dict) -> str:
248
+ path = pending["path"]
249
+ newcontent = pending["newcontent"]
250
+ oldcontent = pending["oldcontent"]
251
+ try:
252
+ backup = writeatomic(path, newcontent)
253
+ return f"Applied changes to {path}. Backup: {backup or 'none'}"
254
+ except Exception as e:
255
+ # On failure, offer to save to an alternative path
256
+ alt = f"{path}.failed.{int(time.time())}.txt"
257
+ try:
258
+ writeatomic(alt, newcontent)
259
+ return f"Failed to write {path}: {e}\nSaved proposed content to {alt}"
260
+ except Exception as e2:
261
+ return f"Failed to apply and to save alt copy: {e2}"
262
+
263
+ def natural_yes(text: str) -> bool:
264
+ return text.strip().lower() in {"y", "yes", "apply", "do it", "ok", "okay", "sure", "confirm"}
265
+
266
+ def natural_no(text: str) -> bool:
267
+ t = text.strip().lower()
268
+ return t in {"n", "no", "cancel", "stop", "reject", "discard"}
269
+
270
+ =========================
271
+
272
+ Core chat pipeline
273
+
274
+ =========================
275
+ def formatmessages(system, history, usermsg):
276
  msgs = []
277
  if system.strip():
278
  msgs.append({"role": "system", "content": system})
 
 
 
 
 
 
 
 
279
  for h in history:
280
  msgs.append({"role": h["role"], "content": h["content"]})
 
281
  msgs.append({"role": "user", "content": user_msg})
282
  return msgs
283
 
284
+ def streamchatresponse(usermsg, history, system, temperature, topp, maxnewtokens):
285
  llm = get_llm()
286
+ msgs = formatmessages(system, history, usermsg)
287
+ stream = llm.createchatcompletion(
 
288
  messages=msgs,
289
  temperature=temperature,
290
+ topp=topp,
291
+ maxtokens=maxnew_tokens,
292
  stream=True
293
  )
 
294
  partial = ""
295
  for chunk in stream:
296
  delta = chunk["choices"][0]["delta"]
 
298
  piece = delta["content"]
299
  partial += piece
300
  yield partial
301
+ return
302
+
303
+ =========================
304
 
305
+ Gradio app
306
 
307
+ =========================
308
+ with gr.Blocks(title=APP_TITLE) as demo:
309
+ gr.Markdown(f"# {APP_TITLE}\nTalk normally. Ask for changes or new files; I’ll propose a patch, show a diff, and wait for your yes/no.")
310
  with gr.Row():
311
+ system = gr.Textbox(label="System prompt", value="You are a helpful, precise, and concise assistant.")
312
  with gr.Row():
313
+ temperature = gr.Slider(0.0, 1.5, value=0.4, step=0.05, label="Temperature")
314
  top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top‑p")
315
+ maxnewtokens = gr.Slider(64, 2048, value=MAXNEWTOKENS, step=16, label="Max new tokens")
316
+ chat = gr.Chatbot(height=520, showcopybutton=True, type="messages")
317
+ user = gr.Textbox(label="Your message", placeholder="Ask anything… e.g., 'Optimize the memory recall code' or 'Create scripts/logger.py that logs messages'")
318
  send = gr.Button("Send", variant="primary")
319
+ state = gr.State({PENDING_KEY: None})
320
 
321
+ def respond(message, chathistory, system, temperature, topp, maxnewtokens, state_obj):
322
  if not message or not message.strip():
323
+ return gr.update(), chathistory, stateobj
324
+
325
+ # If there's a pending action, check for yes/no
326
+ pending = stateobj.get(PENDINGKEY)
327
+ if pending is not None:
328
+ if natural_yes(message):
329
+ result = apply_pending(pending)
330
+ stateobj[PENDINGKEY] = None
331
+ newhist = (chathistory or []) + [
332
+ {"role": "user", "content": message},
333
+ {"role": "assistant", "content": result},
334
+ ]
335
+ return gr.update(value=newhist), newhist, state_obj
336
+ elif natural_no(message):
337
+ stateobj[PENDINGKEY] = None
338
+ newhist = (chathistory or []) + [
339
+ {"role": "user", "content": message},
340
+ {"role": "assistant", "content": "Okay, discarded the proposed change."},
341
+ ]
342
+ return gr.update(value=newhist), newhist, state_obj
343
+ # If neither yes/no, treat as iteration: regenerate proposal using the user's feedback
344
+ msg = f"Updating the proposal with your feedback: {message}\nRe‑proposing…"
345
+ historymsgs = (chathistory or []) + [{"role": "assistant", "content": msg}]
346
+ # Merge feedback into a new proposal prompt by appending to user_text
347
+ merged_request = pending.get("reason", "") + "\n\nAdditional feedback: " + message
348
+ proposal = proposeeditorcreate(mergedrequest)
349
+ path = proposal["path"]
350
+ try:
351
+ old = read_file(path)
352
+ except Exception:
353
+ old = ""
354
+ diffmsg = proposediffmessage(path, old, proposal["newcontent"], proposal["reason"])
355
+ # Stash new pending
356
+ stateobj[PENDINGKEY] = {
357
+ "path": path,
358
+ "newcontent": proposal["newcontent"],
359
+ "old_content": old,
360
+ "reason": proposal["reason"],
361
+ }
362
+ newhist = historymsgs + [{"role": "assistant", "content": diff_msg}]
363
+ return gr.update(value=newhist), newhist, state_obj
364
+
365
+ # No pending: decide intent
366
+ intent = detect_intent(message)
367
+ if intent in ("edit", "create"):
368
+ proposal = proposeeditor_create(message)
369
+ path = proposal["path"]
370
+ try:
371
+ old = read_file(path)
372
+ except Exception:
373
+ old = ""
374
+ diffmsg = proposediffmessage(path, old, proposal["newcontent"], proposal["reason"])
375
+ # Stash pending
376
+ stateobj[PENDINGKEY] = {
377
+ "path": path,
378
+ "newcontent": proposal["newcontent"],
379
+ "old_content": old,
380
+ "reason": proposal["reason"],
381
+ }
382
+ newhist = (chathistory or []) + [
383
+ {"role": "user", "content": message},
384
+ {"role": "assistant", "content": diff_msg},
385
+ ]
386
+ return gr.update(value=newhist), newhist, state_obj
387
+
388
+ # Plain chat with streaming
389
+ historymsgs = chathistory or []
390
  bot_text = ""
391
+ for partial in streamchatresponse(message, historymsgs, system, temperature, topp, maxnewtokens):
392
  bot_text = partial
393
  yield gr.update(value=(history_msgs + [
394
  {"role": "user", "content": message},
 
396
  ])), (history_msgs + [
397
  {"role": "user", "content": message},
398
  {"role": "assistant", "content": bot_text}
399
+ ]), state_obj
400
+ # Save last turn once streaming ends
401
+ saveturn(system, historymsgs, message, bot_text)
402
 
403
  send.click(
404
  respond,
405
+ [user, chat, system, temperature, topp, maxnew_tokens, state],
406
+ [chat, chat, state],
407
  )
408
  user.submit(
409
  respond,
410
+ [user, chat, system, temperature, topp, maxnew_tokens, state],
411
+ [chat, chat, state],
412
  )
413
 
414
+ if name == "main":
415
+ demo.launch(servername="0.0.0.0", serverport=7860)