Drakkarious commited on
Commit
cc8034a
·
verified ·
1 Parent(s): da47586

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -39
app.py CHANGED
@@ -1,50 +1,35 @@
1
  import os
2
- import sys
3
  from huggingface_hub import hf_hub_download
4
  from llama_cpp import Llama
5
 
6
- # 1. Faster downloads
7
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
8
-
9
  CACHE_DIR = os.path.join(os.getcwd(), "model_cache")
10
 
11
- print("--- Initializing AI Sandbox ---")
 
 
 
 
 
12
 
13
- try:
14
- print("--- Downloading Model (16.7GB)... This will take a moment. ---")
15
- model_path = hf_hub_download(
16
- repo_id="BugTraceAI/BugTraceAI-Apex-G4-26B-Q4",
17
- filename="BugTraceAI-Apex-G4-26B-Q4.gguf",
18
- cache_dir=CACHE_DIR
19
- )
 
20
 
21
- print(f"--- Loading Model into RAM (mmap enabled) ---")
22
- llm = Llama(
23
- model_path=model_path,
24
- n_ctx=2048,
25
- n_threads=2,
26
- use_mmap=True,
27
- n_gpu_layers=0
28
- )
29
 
30
- print("\n✅ SANDBOX READY")
31
- print("------------------------------------------")
32
- print("Enter your prompt below. Type 'exit' to quit.")
33
-
34
- # Standard terminal loop (Stable for Docker Logs)
35
- while True:
36
- user_input = input("\n[Terminal] User: ")
37
- if user_input.lower() in ["exit", "quit"]:
38
- break
39
-
40
- output = llm(
41
- f"User: {user_input}\nAssistant:",
42
- max_tokens=256,
43
- stop=["User:", "\n"],
44
- echo=False
45
- )
46
- print(f"Assistant: {output['choices'][0]['text']}")
47
 
48
- except Exception as e:
49
- print(f"❌ CRITICAL ERROR: {e}")
50
- sys.exit(1)
 
1
  import os
2
+ import gradio as gr
3
  from huggingface_hub import hf_hub_download
4
  from llama_cpp import Llama
5
 
6
+ # Fast download enabled
7
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 
8
  CACHE_DIR = os.path.join(os.getcwd(), "model_cache")
9
 
10
+ print("--- Downloading Model (16.7GB) ---")
11
+ model_path = hf_hub_download(
12
+ repo_id="BugTraceAI/BugTraceAI-Apex-G4-26B-Q4",
13
+ filename="BugTraceAI-Apex-G4-26B-Q4.gguf",
14
+ cache_dir=CACHE_DIR
15
+ )
16
 
17
+ print("--- Loading Model (This uses Disk Swapping/mmap) ---")
18
+ llm = Llama(
19
+ model_path=model_path,
20
+ n_ctx=1024, # Lower context to save RAM
21
+ n_threads=2, # Free tier limit
22
+ use_mmap=True, # CRITICAL: Read from disk, not just RAM
23
+ n_gpu_layers=0
24
+ )
25
 
26
+ def respond(message, history):
27
+ prompt = f"User: {message}\nAssistant:"
28
+ output = llm(prompt, max_tokens=256, stop=["User:"], echo=False)
29
+ return output["choices"][0]["text"]
 
 
 
 
30
 
31
+ # Using Gradio keeps the Space "Alive"
32
+ demo = gr.ChatInterface(fn=respond, title="BugTrace AI Sandbox")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ if __name__ == "__main__":
35
+ demo.launch(server_name="0.0.0.0", server_port=7860)