Spaces:

AIencoder
/

Axon-Pro-IDE

Running

App Files Files Community

AIencoder commited on Feb 18

Commit

6f6c901

verified ·

1 Parent(s): 54004c3

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -41

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import warnings
-warnings.filterwarnings("ignore", category=FutureWarning, message=".*torch.distributed.reduce_op.*")
 import gradio as gr
-import torch
 import time
 import sys
 import subprocess
@@ -18,15 +17,14 @@ import ast
 import json
 import tempfile
 from pathlib import Path
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, GenerationConfig
 from typing import List, Dict
 from functools import lru_cache
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
-MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-DEVICE = "cpu"
-DTYPE = torch.float32
 DISPLAY_NUM = ":99"
 SCREEN_W, SCREEN_H = 800, 600
 SNIPPETS_FILE = "/tmp/axon_snippets.json"
@@ -419,30 +417,62 @@ class PTYTerminal:
 terminal = PTYTerminal()
 # ═══════════════════════════════════════
-# AI Model
 # ═══════════════════════════════════════
-@lru_cache(maxsize=1)
 def load_model():
-    print("Loading TinyLlama...")
-    t0 = time.time()
-    try:
-        tok = AutoTokenizer.from_pretrained(MODEL_NAME)
-        mdl = AutoModelForCausalLM.from_pretrained(MODEL_NAME, dtype=DTYPE,
-              device_map=DEVICE, low_cpu_mem_usage=True)
-        gc = GenerationConfig(max_new_tokens=256, temperature=0.2, top_p=0.95, do_sample=True)
-        pipe = pipeline("text-generation", model=mdl, tokenizer=tok, generation_config=gc)
-        print(f"Loaded in {time.time()-t0:.1f}s")
-        return pipe
-    except Exception as e:
-        print(f"Model error: {e}"); return None
-def ai_gen(system_prompt, code, max_tokens=256):
-    pipe = load_model()
-    if not pipe: return "Error: model failed to load"
-    prompt = f"<|system|>{system_prompt}</s><|user|>\n{code}\n</s><|assistant|>"
     try:
-        result = pipe(prompt, max_new_tokens=max_tokens, return_full_text=False)
-        return result[0]["generated_text"].strip()
     except Exception as e:
         return f"Error: {e}"
@@ -609,6 +639,11 @@ with gr.Blocks(title="Axon Pro") as demo:
             with gr.Tabs() as bottom_tabs:
                 with gr.Tab("⌘ TERMINAL", id="term-tab"):
                     term_out = gr.Textbox(value=terminal.get_log(), lines=12, max_lines=25,
                                            interactive=False, elem_classes="term-box",
@@ -661,7 +696,7 @@ with gr.Blocks(title="Axon Pro") as demo:
     status_bar = gr.Markdown(
         f"**AXON PRO v4.0** │ Python {sys.version.split()[0]} │ CPU │ "
-        f"TinyLlama-1.1B │ PTY + Xvfb │ Snippets + Structure + Find",
         elem_classes="status-bar")
     # State
@@ -695,7 +730,7 @@ with gr.Blocks(title="Axon Pro") as demo:
     def on_save(code):
         fs.save_file(code)
         return (f"**AXON PRO v4.0** │ Python {sys.version.split()[0]} │ CPU │ "
-                f"TinyLlama-1.1B │ ✓ Saved {fs.current_file}")
     def on_delete():
         name = fs.current_file
@@ -712,13 +747,9 @@ with gr.Blocks(title="Axon Pro") as demo:
         if is_gui_code(code):
             # GUI app — launch as background process, capture display
             msg = gui_mgr.launch(code)
-            terminal._append(msg)
-            time.sleep(1.5)
-            ss = vdisplay.capture()
-            return terminal.get_log(), ss, f"<small>Xvfb: ● ON | {gui_mgr.get_status()}</small>"
-        # Normal script — run with subprocess, capture output
-        terminal._append("$ python [editor]")
         tmp = "/tmp/_axon_run.py"
         with open(tmp, "w") as f: f.write(code)
         try:
@@ -727,12 +758,17 @@ with gr.Blocks(title="Axon Pro") as demo:
             env["PYTHONPATH"] = fs._sync_dir + ":" + env.get("PYTHONPATH", "")
             r = subprocess.run([sys.executable, tmp], capture_output=True, text=True,
                                timeout=30, env=env, cwd=fs._sync_dir)
-            if r.stdout.strip(): terminal._append(r.stdout.rstrip())
-            if r.stderr.strip(): terminal._append(r.stderr.rstrip())
-            if not r.stdout.strip() and not r.stderr.strip(): terminal._append("(No output)")
-        except subprocess.TimeoutExpired: terminal._append("[Timed out 30s]")
-        except Exception as e: terminal._append(f"[Error] {e}")
-        return terminal.get_log(), gr.update(), gr.update()
     def on_stop():
         msg = gui_mgr.stop(); terminal._append(msg)
@@ -834,7 +870,8 @@ with gr.Blocks(title="Axon Pro") as demo:
     del_btn.click(on_delete, None, [file_list, editor, editor, structure_view])
     # Run
-    run_btn.click(on_run, editor, [term_out, display_image, gui_status])
     stop_btn.click(on_stop, None, [term_out, gui_status])
     # Terminal

 import warnings
+warnings.filterwarnings("ignore")
 import gradio as gr
 import time
 import sys
 import subprocess
 import json
 import tempfile
 from pathlib import Path
+from huggingface_hub import hf_hub_download
 from typing import List, Dict
 from functools import lru_cache
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
+MODEL_REPO = "AIencoder/Qwen2.5CMR-Q4_K_M-GGUF"
+MODEL_FILE = "qwen2.5cmr-q4_k_m.gguf"
 DISPLAY_NUM = ":99"
 SCREEN_W, SCREEN_H = 800, 600
 SNIPPETS_FILE = "/tmp/axon_snippets.json"
 terminal = PTYTerminal()
 # ═══════════════════════════════════════
+# AI Model — Qwen2.5CMR Q4_K_M GGUF
 # ═══════════════════════════════════════
+_llm_instance = None
+_llm_lock = threading.Lock()
 def load_model():
+    global _llm_instance
+    if _llm_instance is not None:
+        return _llm_instance
+    with _llm_lock:
+        if _llm_instance is not None:
+            return _llm_instance
+        print(f"Downloading {MODEL_REPO}/{MODEL_FILE}...")
+        t0 = time.time()
+        try:
+            model_path = hf_hub_download(
+                repo_id=MODEL_REPO,
+                filename=MODEL_FILE,
+            )
+            print(f"Downloaded in {time.time()-t0:.1f}s, loading...")
+            from llama_cpp import Llama
+            _llm_instance = Llama(
+                model_path=model_path,
+                n_ctx=4096,
+                n_threads=os.cpu_count() or 4,
+                n_gpu_layers=0,  # CPU only
+                verbose=False,
+            )
+            print(f"Model ready in {time.time()-t0:.1f}s total")
+            return _llm_instance
+        except Exception as e:
+            print(f"Model error: {e}")
+            return None
+def ai_gen(system_prompt, code, max_tokens=300):
+    llm = load_model()
+    if not llm: return "Error: model failed to load"
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": code},
+    ]
     try:
+        response = llm.create_chat_completion(
+            messages=messages,
+            max_tokens=max_tokens,
+            temperature=0.3,
+            top_p=0.9,
+            repeat_penalty=1.1,
+        )
+        text = response["choices"][0]["message"]["content"].strip()
+        return text if text else "(No output from model)"
     except Exception as e:
         return f"Error: {e}"
             with gr.Tabs() as bottom_tabs:
+                with gr.Tab("▶ OUTPUT", id="output-tab"):
+                    run_output = gr.Textbox(value="Run your code to see output here.",
+                                             lines=12, max_lines=25, interactive=False,
+                                             elem_classes="term-box", label="", show_label=False)
                 with gr.Tab("⌘ TERMINAL", id="term-tab"):
                     term_out = gr.Textbox(value=terminal.get_log(), lines=12, max_lines=25,
                                            interactive=False, elem_classes="term-box",
     status_bar = gr.Markdown(
         f"**AXON PRO v4.0** │ Python {sys.version.split()[0]} │ CPU │ "
+        f"Qwen2.5CMR Q4_K_M │ PTY + Xvfb │ Snippets + Structure + Find",
         elem_classes="status-bar")
     # State
     def on_save(code):
         fs.save_file(code)
         return (f"**AXON PRO v4.0** │ Python {sys.version.split()[0]} │ CPU │ "
+                f"Qwen2.5CMR Q4_K_M │ ✓ Saved {fs.current_file}")
     def on_delete():
         name = fs.current_file
         if is_gui_code(code):
             # GUI app — launch as background process, capture display
             msg = gui_mgr.launch(code)
+            return msg, vdisplay.capture(), f"<small>Xvfb: ● ON | {gui_mgr.get_status()}</small>"
+        # Normal script — run with subprocess, output goes to OUTPUT tab
         tmp = "/tmp/_axon_run.py"
         with open(tmp, "w") as f: f.write(code)
         try:
             env["PYTHONPATH"] = fs._sync_dir + ":" + env.get("PYTHONPATH", "")
             r = subprocess.run([sys.executable, tmp], capture_output=True, text=True,
                                timeout=30, env=env, cwd=fs._sync_dir)
+            output = ""
+            if r.stdout.strip(): output += r.stdout.rstrip()
+            if r.stderr.strip():
+                if output: output += "\n"
+                output += r.stderr.rstrip()
+            if not output: output = "(No output)"
+        except subprocess.TimeoutExpired:
+            output = "[Timed out after 30s]"
+        except Exception as e:
+            output = f"[Error] {e}"
+        return output, gr.update(), gr.update()
     def on_stop():
         msg = gui_mgr.stop(); terminal._append(msg)
     del_btn.click(on_delete, None, [file_list, editor, editor, structure_view])
     # Run
+    run_btn.click(on_run, editor, [run_output, display_image, gui_status]
+    ).then(lambda: gr.Tabs(selected="output-tab"), None, bottom_tabs)
     stop_btn.click(on_stop, None, [term_out, gui_status])
     # Terminal