Spaces:

Drakkarious
/

hackerai

Sleeping

Drakkarious commited on Apr 23

Commit

715d004

verified ·

1 Parent(s): 85bc0f1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,36 +1,49 @@
 import os
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
-# Enable fast downloads
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
-print("--- Downloading Model (this may take a few minutes) ---")
-# Download the GGUF file
-model_path = hf_hub_download(
-    repo_id="BugTraceAI/BugTraceAI-Apex-G4-26B-Q4",
-    filename="BugTraceAI-Apex-G4-26B-Q4.gguf"
-)
-print(f"--- Loading Model from {model_path} ---")
-# Initialize the model with memory mapping for large MoE models
-llm = Llama(
-    model_path=model_path,
-    n_ctx=2048,      # Context window
-    n_threads=2,    # Free tier CPU cores
-    use_mmap=True,  # CRITICAL: Allows 16.7GB model to run on 16GB RAM
-    n_gpu_layers=0  # CPU only for free tier
-)
-def chat(prompt):
-    """Function to call from your terminal"""
-    output = llm(
-        f"Prompt: {prompt}\nResponse:",
-        max_tokens=256,
-        stop=["Prompt:", "\n"],
-        echo=False
     )
-    return output["choices"][0]["text"]
-print("\n🚀 AI Sandbox Ready!")
-print("Type: chat('your question') to interact.")

 import os
+import sys
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+# 1. Faster downloads
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+# 2. Define local paths
+CACHE_DIR = os.path.join(os.getcwd(), "model_cache")
+print("--- Starting AI Sandbox ---")
+try:
+    print("--- Downloading 26B Model (16.7GB) to local cache ---")
+    model_path = hf_hub_download(
+        repo_id="BugTraceAI/BugTraceAI-Apex-G4-26B-Q4",
+        filename="BugTraceAI-Apex-G4-26B-Q4.gguf",
+        cache_dir=CACHE_DIR
+    )
+    print(f"--- Loading Model: {model_path} ---")
+    # Low-RAM configuration for Free Tier
+    llm = Llama(
+        model_path=model_path,
+        n_ctx=2048,
+        n_threads=2,
+        use_mmap=True, # Critical for large models on 16GB RAM
+        n_gpu_layers=0 # CPU Only
     )
+    def chat(prompt):
+        output = llm(
+            f"User: {prompt}\nAssistant:",
+            max_tokens=256,
+            stop=["User:", "\n"],
+            echo=False
+        )
+        return output["choices"][0]["text"]
+    print("\n✅ Sandbox Ready!")
+    print("To chat, use the logs terminal or call: chat('your prompt')")
+    # Keeps the container alive and interactive
+    import IPython
+    IPython.embed()
+except Exception as e:
+    print(f"❌ Error during startup: {e}")
+    sys.exit(1)