Spaces:

Drakkarious
/

hackerai

Sleeping

Drakkarious commited on Apr 23

Commit

cc8034a

verified ·

1 Parent(s): da47586

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,50 +1,35 @@
 import os
-import sys
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
-# 1. Faster downloads
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 CACHE_DIR = os.path.join(os.getcwd(), "model_cache")
-print("--- Initializing AI Sandbox ---")
-try:
-    print("--- Downloading Model (16.7GB)... This will take a moment. ---")
-    model_path = hf_hub_download(
-        repo_id="BugTraceAI/BugTraceAI-Apex-G4-26B-Q4",
-        filename="BugTraceAI-Apex-G4-26B-Q4.gguf",
-        cache_dir=CACHE_DIR
-    )
-    print(f"--- Loading Model into RAM (mmap enabled) ---")
-    llm = Llama(
-        model_path=model_path,
-        n_ctx=2048,
-        n_threads=2,
-        use_mmap=True,
-        n_gpu_layers=0
-    )
-    print("\n✅ SANDBOX READY")
-    print("------------------------------------------")
-    print("Enter your prompt below. Type 'exit' to quit.")
-    # Standard terminal loop (Stable for Docker Logs)
-    while True:
-        user_input = input("\n[Terminal] User: ")
-        if user_input.lower() in ["exit", "quit"]:
-            break
-        output = llm(
-            f"User: {user_input}\nAssistant:",
-            max_tokens=256,
-            stop=["User:", "\n"],
-            echo=False
-        )
-        print(f"Assistant: {output['choices'][0]['text']}")
-except Exception as e:
-    print(f"❌ CRITICAL ERROR: {e}")
-    sys.exit(1)

 import os
+import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+# Fast download enabled
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 CACHE_DIR = os.path.join(os.getcwd(), "model_cache")
+print("--- Downloading Model (16.7GB) ---")
+model_path = hf_hub_download(
+    repo_id="BugTraceAI/BugTraceAI-Apex-G4-26B-Q4",
+    filename="BugTraceAI-Apex-G4-26B-Q4.gguf",
+    cache_dir=CACHE_DIR
+)
+print("--- Loading Model (This uses Disk Swapping/mmap) ---")
+llm = Llama(
+    model_path=model_path,
+    n_ctx=1024,      # Lower context to save RAM
+    n_threads=2,     # Free tier limit
+    use_mmap=True,   # CRITICAL: Read from disk, not just RAM
+    n_gpu_layers=0
+)
+def respond(message, history):
+    prompt = f"User: {message}\nAssistant:"
+    output = llm(prompt, max_tokens=256, stop=["User:"], echo=False)
+    return output["choices"][0]["text"]
+# Using Gradio keeps the Space "Alive"
+demo = gr.ChatInterface(fn=respond, title="BugTrace AI Sandbox")
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)