sourize commited on
Commit
70fd1ee
Β·
verified Β·
1 Parent(s): 52bc809

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -18
app.py CHANGED
@@ -14,37 +14,83 @@ supabase = create_client(SUPA_URL, SUPA_KEY)
14
  embedder = SentenceTransformer("paraphrase-MiniLM-L3-v2")
15
  def fetch_mems(query, k=5):
16
  vec = embedder.encode(query).tolist()
17
- # call your RPC
18
- data = supabase.rpc("match_memories", {"query_embedding": vec, "match_count": k}).execute().data
 
 
19
  return data
20
 
21
  def add_mem(speaker, text):
22
  vec = embedder.encode(text).tolist()
23
  supabase.table("memories").insert({
24
- "speaker": speaker, "text": text, "embedding": vec
 
 
25
  }).execute()
26
 
27
- # β€” Load LoRA model from HF hub β€”
28
  REPO = "sourize/phi2-memory-lora"
29
- tokenizer = AutoTokenizer.from_pretrained(REPO, trust_remote_code=True, padding_side="left")
30
- model_base = AutoModelForCausalLM.from_pretrained(REPO, trust_remote_code=True)
31
- model_base.resize_token_embeddings(len(tokenizer))
32
- model = PeftModel.from_pretrained(model_base, REPO)
33
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0,
34
- do_sample=True, top_p=0.9, temperature=0.8)
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  st.title("🧠 Memory-Aware Phi-2 Bot")
37
  if "history" not in st.session_state:
38
  st.session_state.history = []
39
 
40
- def chat(u):
 
41
  add_mem("user", u)
 
 
42
  mems = fetch_mems(u, 3)
43
  block = "\n".join(f"{m['speaker']}: {m['text']}" for m in mems)
44
- prompt = f"Memory:\n{block}\n\nUser: {u}\nAssistant:"
45
- out = pipe(prompt, max_length=200)[0]["generated_text"].split("Assistant:")[-1].strip()
46
- add_mem("assistant", out)
47
- return out
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  user = st.text_input("You:")
50
  if user:
@@ -52,6 +98,8 @@ if user:
52
  st.session_state.history.append(("You", user))
53
  st.session_state.history.append(("Bot", resp))
54
 
55
- for s, t in st.session_state.history:
56
- style = "### You:" if s=="You" else "**Bot:**"
57
- st.markdown(f"{style} {t}")
 
 
 
14
  embedder = SentenceTransformer("paraphrase-MiniLM-L3-v2")
15
  def fetch_mems(query, k=5):
16
  vec = embedder.encode(query).tolist()
17
+ data = supabase.rpc(
18
+ "match_memories",
19
+ {"query_embedding": vec, "match_count": k}
20
+ ).execute().data
21
  return data
22
 
23
  def add_mem(speaker, text):
24
  vec = embedder.encode(text).tolist()
25
  supabase.table("memories").insert({
26
+ "speaker": speaker,
27
+ "text": text,
28
+ "embedding": vec
29
  }).execute()
30
 
31
+ # β€” Load tokenizer & adapter from HF hub β€”
32
  REPO = "sourize/phi2-memory-lora"
 
 
 
 
 
 
33
 
34
+ # 1) Tokenizer (with your extra PAD token)
35
+ tokenizer = AutoTokenizer.from_pretrained(
36
+ REPO, trust_remote_code=True, padding_side="left"
37
+ )
38
+ if tokenizer.pad_token_id is None:
39
+ tokenizer.add_special_tokens({"pad_token": "[PAD]"})
40
+
41
+ # 2) Base Phi-2 β†’ resize embeddings to match tokenizer
42
+ base = AutoModelForCausalLM.from_pretrained(
43
+ "microsoft/phi-2", trust_remote_code=True, torch_dtype="auto"
44
+ )
45
+ base.resize_token_embeddings(len(tokenizer))
46
+
47
+ # 3) Overlay your LoRA adapter
48
+ model = PeftModel.from_pretrained(
49
+ base,
50
+ REPO,
51
+ torch_dtype="auto",
52
+ device_map="auto" # let accelerate pick CPU/GPU
53
+ )
54
+ model.eval()
55
+
56
+ # 4) Build the generation pipeline
57
+ pipe = pipeline(
58
+ "text-generation",
59
+ model=model,
60
+ tokenizer=tokenizer,
61
+ device=0, # or device_map="auto"
62
+ do_sample=True,
63
+ top_p=0.9,
64
+ temperature=0.8,
65
+ )
66
+
67
+ # β€” Streamlit UI β€”
68
  st.title("🧠 Memory-Aware Phi-2 Bot")
69
  if "history" not in st.session_state:
70
  st.session_state.history = []
71
 
72
+ def chat(u: str) -> str:
73
+ # store user turn
74
  add_mem("user", u)
75
+
76
+ # fetch & format memories
77
  mems = fetch_mems(u, 3)
78
  block = "\n".join(f"{m['speaker']}: {m['text']}" for m in mems)
79
+
80
+ # build prompt
81
+ prompt = f"""Memory:
82
+ {block}
83
+
84
+ User: {u}
85
+ Assistant:"""
86
+
87
+ # generate reply
88
+ out = pipe(prompt, max_length=200)[0]["generated_text"]
89
+ reply = out.split("Assistant:")[-1].strip()
90
+
91
+ # store assistant turn
92
+ add_mem("assistant", reply)
93
+ return reply
94
 
95
  user = st.text_input("You:")
96
  if user:
 
98
  st.session_state.history.append(("You", user))
99
  st.session_state.history.append(("Bot", resp))
100
 
101
+ for speaker, text in st.session_state.history:
102
+ if speaker == "You":
103
+ st.markdown(f"**You:** {text}")
104
+ else:
105
+ st.markdown(f"**Assistant:** {text}")