boo4blue commited on
Commit
90d83b0
·
verified ·
1 Parent(s): 2d13893

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -21
app.py CHANGED
@@ -1,11 +1,11 @@
1
- import os, time, json
2
  import gradio as gr
3
  from llama_cpp import Llama
4
 
5
- # Working public GGUF model
6
- MODEL_REPO = "TheBloke/Phi-3.5-mini-instruct-GGUF"
7
- MODEL_FILE = "phi-3.5-mini-instruct-q4_K_M.gguf"
8
  SAVE_PATH = "convos.jsonl"
 
9
 
10
  llm = None
11
 
@@ -23,18 +23,6 @@ def get_llm():
23
  )
24
  return llm
25
 
26
- def format_messages(system, history, user_msg):
27
- msgs = []
28
- if system.strip():
29
- msgs.append({"role": "system", "content": system})
30
- for h in history:
31
- if h[0] is not None:
32
- msgs.append({"role": "user", "content": h[0]})
33
- if h[1] is not None:
34
- msgs.append({"role": "assistant", "content": h[1]})
35
- msgs.append({"role": "user", "content": user_msg})
36
- return msgs
37
-
38
  def save_turn(system, history, user_msg, assistant_msg):
39
  with open(SAVE_PATH, "a", encoding="utf-8") as f:
40
  rec = {
@@ -46,6 +34,42 @@ def save_turn(system, history, user_msg, assistant_msg):
46
  }
47
  f.write(json.dumps(rec, ensure_ascii=False) + "\n")
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  def chat_fn(user_msg, history, system, temperature, top_p, max_new_tokens):
50
  llm = get_llm()
51
  msgs = format_messages(system, history, user_msg)
@@ -68,26 +92,33 @@ def chat_fn(user_msg, history, system, temperature, top_p, max_new_tokens):
68
 
69
  save_turn(system, history, user_msg, partial)
70
 
71
- with gr.Blocks(title="Free ChatGPT-like (CPU)") as demo:
72
- gr.Markdown("# Free ChatGPT‑style AI (CPU)\nSmall, quantized model on Hugging Face Spaces.")
73
  with gr.Row():
74
  system = gr.Textbox(label="System prompt", value="You are a helpful, concise assistant.")
75
  with gr.Row():
76
  temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
77
  top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top‑p")
78
  max_new_tokens = gr.Slider(16, 1024, value=512, step=16, label="Max new tokens")
79
- chat = gr.Chatbot(height=480, show_copy_button=True)
80
  user = gr.Textbox(label="Your message", placeholder="Ask anything...")
81
  send = gr.Button("Send", variant="primary")
82
 
83
  def respond(message, chat_history, system, temperature, top_p, max_new_tokens):
84
  if not message or not message.strip():
85
  return gr.update(), chat_history
86
- stream = chat_fn(message, chat_history or [], system, temperature, top_p, max_new_tokens)
 
87
  bot_text = ""
88
  for partial in stream:
89
  bot_text = partial
90
- yield gr.update(value=(chat_history + [[message, bot_text]])), (chat_history + [[message, bot_text]])
 
 
 
 
 
 
91
 
92
  send.click(
93
  respond,
 
1
+ import os, time, json, re
2
  import gradio as gr
3
  from llama_cpp import Llama
4
 
5
+ MODEL_REPO = "QuantFactory/Phi-3.5-mini-instruct-GGUF"
6
+ MODEL_FILE = "Phi-3.5-mini-instruct-Q4_K_M.gguf"
 
7
  SAVE_PATH = "convos.jsonl"
8
+ MAX_RECALL = 5 # how many past turns to recall
9
 
10
  llm = None
11
 
 
23
  )
24
  return llm
25
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def save_turn(system, history, user_msg, assistant_msg):
27
  with open(SAVE_PATH, "a", encoding="utf-8") as f:
28
  rec = {
 
34
  }
35
  f.write(json.dumps(rec, ensure_ascii=False) + "\n")
36
 
37
+ def load_memory(query):
38
+ """Simple keyword-based recall from past chats."""
39
+ if not os.path.exists(SAVE_PATH):
40
+ return []
41
+ with open(SAVE_PATH, "r", encoding="utf-8") as f:
42
+ lines = [json.loads(l) for l in f if l.strip()]
43
+ # naive keyword match
44
+ query_words = set(re.findall(r"\w+", query.lower()))
45
+ scored = []
46
+ for rec in lines:
47
+ text = (rec.get("user","") + " " + rec.get("assistant","")).lower()
48
+ score = len(query_words & set(re.findall(r"\w+", text)))
49
+ if score > 0:
50
+ scored.append((score, rec))
51
+ scored.sort(reverse=True, key=lambda x: x[0])
52
+ return [rec for _, rec in scored[:MAX_RECALL]]
53
+
54
+ def format_messages(system, history, user_msg):
55
+ msgs = []
56
+ if system.strip():
57
+ msgs.append({"role": "system", "content": system})
58
+
59
+ # Inject recalled memory
60
+ recalls = load_memory(user_msg)
61
+ if recalls:
62
+ mem_text = "\n".join(
63
+ f"User: {r['user']}\nAssistant: {r['assistant']}" for r in recalls
64
+ )
65
+ msgs.append({"role": "system", "content": f"Relevant past conversations:\n{mem_text}"})
66
+
67
+ for h in history:
68
+ msgs.append({"role": h["role"], "content": h["content"]})
69
+
70
+ msgs.append({"role": "user", "content": user_msg})
71
+ return msgs
72
+
73
  def chat_fn(user_msg, history, system, temperature, top_p, max_new_tokens):
74
  llm = get_llm()
75
  msgs = format_messages(system, history, user_msg)
 
92
 
93
  save_turn(system, history, user_msg, partial)
94
 
95
+ with gr.Blocks(title="Free ChatGPT-like (CPU) with Memory") as demo:
96
+ gr.Markdown("# Free ChatGPT‑style AI (CPU) + Long‑Term Memory")
97
  with gr.Row():
98
  system = gr.Textbox(label="System prompt", value="You are a helpful, concise assistant.")
99
  with gr.Row():
100
  temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
101
  top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top‑p")
102
  max_new_tokens = gr.Slider(16, 1024, value=512, step=16, label="Max new tokens")
103
+ chat = gr.Chatbot(height=480, show_copy_button=True, type="messages")
104
  user = gr.Textbox(label="Your message", placeholder="Ask anything...")
105
  send = gr.Button("Send", variant="primary")
106
 
107
  def respond(message, chat_history, system, temperature, top_p, max_new_tokens):
108
  if not message or not message.strip():
109
  return gr.update(), chat_history
110
+ history_msgs = chat_history or []
111
+ stream = chat_fn(message, history_msgs, system, temperature, top_p, max_new_tokens)
112
  bot_text = ""
113
  for partial in stream:
114
  bot_text = partial
115
+ yield gr.update(value=(history_msgs + [
116
+ {"role": "user", "content": message},
117
+ {"role": "assistant", "content": bot_text}
118
+ ])), (history_msgs + [
119
+ {"role": "user", "content": message},
120
+ {"role": "assistant", "content": bot_text}
121
+ ])
122
 
123
  send.click(
124
  respond,