boo4blue commited on
Commit
2d13893
·
verified ·
1 Parent(s): b8dc1c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -11
app.py CHANGED
@@ -2,24 +2,23 @@ import os, time, json
2
  import gradio as gr
3
  from llama_cpp import Llama
4
 
5
- MODEL_REPO = os.environ.get("MODEL_REPO", "microsoft/Phi-3.1-mini-4k-instruct-gguf")
6
- MODEL_FILE = os.environ.get("MODEL_FILE", "Phi-3.1-mini-4k-instruct-q4.gguf")
 
7
  SAVE_PATH = "convos.jsonl"
8
 
9
- # Lazy init
10
  llm = None
11
 
12
  def get_llm():
13
  global llm
14
  if llm is not None:
15
  return llm
16
- # Auto-download GGUF from HF hub on first run
17
  llm = Llama.from_pretrained(
18
  repo_id=MODEL_REPO,
19
  filename=MODEL_FILE,
20
  n_ctx=4096,
21
- n_threads=4, # Spaces CPU: keep modest
22
- n_gpu_layers=0, # CPU only
23
  verbose=False
24
  )
25
  return llm
@@ -29,12 +28,11 @@ def format_messages(system, history, user_msg):
29
  if system.strip():
30
  msgs.append({"role": "system", "content": system})
31
  for h in history:
32
- role = "user" if h[0] is not None else "assistant"
33
  if h[0] is not None:
34
- msgs.append({"role":"user","content":h[0]})
35
  if h[1] is not None:
36
- msgs.append({"role":"assistant","content":h[1]})
37
- msgs.append({"role":"user","content":user_msg})
38
  return msgs
39
 
40
  def save_turn(system, history, user_msg, assistant_msg):
@@ -52,7 +50,6 @@ def chat_fn(user_msg, history, system, temperature, top_p, max_new_tokens):
52
  llm = get_llm()
53
  msgs = format_messages(system, history, user_msg)
54
 
55
- # Stream tokens
56
  stream = llm.create_chat_completion(
57
  messages=msgs,
58
  temperature=temperature,
 
2
  import gradio as gr
3
  from llama_cpp import Llama
4
 
5
+ # Working public GGUF model
6
+ MODEL_REPO = "TheBloke/Phi-3.5-mini-instruct-GGUF"
7
+ MODEL_FILE = "phi-3.5-mini-instruct-q4_K_M.gguf"
8
  SAVE_PATH = "convos.jsonl"
9
 
 
10
  llm = None
11
 
12
  def get_llm():
13
  global llm
14
  if llm is not None:
15
  return llm
 
16
  llm = Llama.from_pretrained(
17
  repo_id=MODEL_REPO,
18
  filename=MODEL_FILE,
19
  n_ctx=4096,
20
+ n_threads=4,
21
+ n_gpu_layers=0,
22
  verbose=False
23
  )
24
  return llm
 
28
  if system.strip():
29
  msgs.append({"role": "system", "content": system})
30
  for h in history:
 
31
  if h[0] is not None:
32
+ msgs.append({"role": "user", "content": h[0]})
33
  if h[1] is not None:
34
+ msgs.append({"role": "assistant", "content": h[1]})
35
+ msgs.append({"role": "user", "content": user_msg})
36
  return msgs
37
 
38
  def save_turn(system, history, user_msg, assistant_msg):
 
50
  llm = get_llm()
51
  msgs = format_messages(system, history, user_msg)
52
 
 
53
  stream = llm.create_chat_completion(
54
  messages=msgs,
55
  temperature=temperature,