gusreinaos commited on
Commit
1a7c573
·
1 Parent(s): 7000311
Files changed (1) hide show
  1. app.py +30 -22
app.py CHANGED
@@ -3,43 +3,51 @@ import subprocess
3
  import sys
4
  import os
5
 
6
- # Install llama-cpp-python at runtime if missing (fixes HF build issues)
7
  try:
8
  from llama_cpp import Llama
9
  print("llama-cpp-python already installed.")
10
  except ImportError:
11
- print("Installing llama-cpp-python (runtime fix for HF Spaces)...")
12
- subprocess.check_call([
13
- sys.executable, "-m", "pip", "install", "--no-cache-dir",
14
- "https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.89/llama_cpp_python-0.2.89-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
15
- ])
16
- from llama_cpp import Llama
 
 
 
 
 
 
 
 
17
 
18
  from huggingface_hub import hf_hub_download
19
 
20
- # === CHANGE THESE TO YOUR FINE-TUNED MODEL ONCE UPLOADED ===
21
- MODEL_NAME = "TheBloke/Llama-2-7B-Chat-GGUF" # replace later
22
- MODEL_FILE = "llama-2-7b-chat.Q4_K_M.gguf" # replace later
23
 
24
- print("Downloading model from HuggingFace...")
25
  model_path = hf_hub_download(
26
- repo_id=MODEL_NAME,
27
  filename=MODEL_FILE,
28
  local_dir="./models",
29
  local_dir_use_symlinks=False
30
  )
31
- print(f"Model downloaded: {model_path}")
32
 
33
- print("Loading model into memory...")
34
  llm = Llama(
35
  model_path=model_path,
36
- n_ctx=4096,
37
  n_threads=8,
38
- n_gpu_layers=0,
39
  n_batch=512,
 
40
  verbose=False
41
  )
42
- print("Model loaded successfully!")
43
 
44
  def chat(message, history):
45
  if not message.strip():
@@ -57,15 +65,15 @@ def chat(message, history):
57
  max_tokens=512,
58
  temperature=0.7,
59
  top_p=0.9,
60
- stop=["User:", "\nUser:", "</s>"],
61
  stream=False
62
  )
63
 
64
- bot_response = response['choices'][0]['message']['content'].strip()
65
  history.append((message, bot_response))
66
  return history, ""
67
 
68
- # === Your awesome CSS (unchanged) ===
69
  custom_css = """
70
  @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Source+Code+Pro:wght@400;600&display=swap');
71
  body, .gradio-container { background: #0c0c0c !important; font-family: 'JetBrains Mono', monospace !important; }
@@ -85,7 +93,7 @@ footer { display: none !important; }
85
  with gr.Blocks(theme=gr.themes.Base(primary_hue="green"), css=custom_css, title="$ LLAMA TERMINAL") as demo:
86
  gr.Markdown("# $ LLAMA TERMINAL\n```\n> System Online | Neural Network Active\n> Type your query below...\n```")
87
  chatbot = gr.Chatbot(height=600)
88
-
89
  with gr.Row():
90
  msg = gr.Textbox(placeholder="$ Enter command...", show_label=False, scale=8, container=False)
91
  submit = gr.Button("SEND", scale=1, variant="primary")
@@ -99,7 +107,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue="green"), css=custom_css, title=
99
  ],
100
  inputs=msg
101
  )
102
-
103
  gr.ClearButton([msg, chatbot], value="CLEAR")
104
 
105
  submit.click(chat, [msg, chatbot], [chatbot, msg])
 
3
  import sys
4
  import os
5
 
6
+ # === RUNTIME INSTALL OF llama-cpp-python (fixes all HF issues) ===
7
  try:
8
  from llama_cpp import Llama
9
  print("llama-cpp-python already installed.")
10
  except ImportError:
11
+ print("Installing llama-cpp-python (fast CPU wheel)...")
12
+ try:
13
+ subprocess.check_call([
14
+ sys.executable, "-m", "pip", "install", "--no-cache-dir",
15
+ "https://github.com/yownas/llama-cpp-python-wheels/releases/download/v0.3.16/llama_cpp_python-0.3.16+cpuavx-cp310-cp310-linux_x86_64.whl"
16
+ ])
17
+ print("llama-cpp-python installed from wheel.")
18
+ except Exception as e: # <-- fixed: added "as e" so it doesn't crash
19
+ print("Wheel failed → falling back to PyPI (2–4 min)...")
20
+ subprocess.check_call([
21
+ sys.executable, "-m", "pip", "install", "--no-cache-dir",
22
+ "llama-cpp-python==0.3.16", "--force-reinstall"
23
+ ])
24
+ from llama_cpp import Llama # <-- fixed: must be inside the except block!
25
 
26
  from huggingface_hub import hf_hub_download
27
 
28
+ # === YOUR FINE-TUNED LLAMA 3.2 3B ===
29
+ MODEL_REPO = "your-username/your-model-repo" # CHANGE THIS
30
+ MODEL_FILE = "your-finetuned-llama-3.2-3b-q4_k_m.gguf" # CHANGE THIS
31
 
32
+ print("Downloading your fine-tuned Llama 3.2 3B model...")
33
  model_path = hf_hub_download(
34
+ repo_id=MODEL_REPO,
35
  filename=MODEL_FILE,
36
  local_dir="./models",
37
  local_dir_use_symlinks=False
38
  )
39
+ print(f"Model ready: {model_path}")
40
 
41
+ print("Loading model (Llama 3.2 3B)...")
42
  llm = Llama(
43
  model_path=model_path,
44
+ n_ctx=8192,
45
  n_threads=8,
 
46
  n_batch=512,
47
+ n_gpu_layers=0,
48
  verbose=False
49
  )
50
+ print("Model loaded!")
51
 
52
  def chat(message, history):
53
  if not message.strip():
 
65
  max_tokens=512,
66
  temperature=0.7,
67
  top_p=0.9,
68
+ stop=["<|eot_id|>", "<|end_of_text|>"], # <-- Llama 3.2 stop tokens
69
  stream=False
70
  )
71
 
72
+ bot_response = response["choices"][0]["message"]["content"].strip()
73
  history.append((message, bot_response))
74
  return history, ""
75
 
76
+ # === CSS & INTERFACE (unchanged, perfect) ===
77
  custom_css = """
78
  @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Source+Code+Pro:wght@400;600&display=swap');
79
  body, .gradio-container { background: #0c0c0c !important; font-family: 'JetBrains Mono', monospace !important; }
 
93
  with gr.Blocks(theme=gr.themes.Base(primary_hue="green"), css=custom_css, title="$ LLAMA TERMINAL") as demo:
94
  gr.Markdown("# $ LLAMA TERMINAL\n```\n> System Online | Neural Network Active\n> Type your query below...\n```")
95
  chatbot = gr.Chatbot(height=600)
96
+
97
  with gr.Row():
98
  msg = gr.Textbox(placeholder="$ Enter command...", show_label=False, scale=8, container=False)
99
  submit = gr.Button("SEND", scale=1, variant="primary")
 
107
  ],
108
  inputs=msg
109
  )
110
+
111
  gr.ClearButton([msg, chatbot], value="CLEAR")
112
 
113
  submit.click(chat, [msg, chatbot], [chatbot, msg])