NimrodDev commited on
Commit
537a3fc
·
1 Parent(s): cefc270
Files changed (2) hide show
  1. Dockerfile +30 -13
  2. app.py +138 -58
Dockerfile CHANGED
@@ -1,23 +1,40 @@
1
- FROM python:3.11-slim
 
2
 
3
  RUN apt-get update && \
4
- apt-get install -y --no-install-recommends curl ca-certificates ffmpeg procps && \
5
  curl -fsSL https://ollama.com/install.sh | sh && \
6
- ln -sf /usr/local/bin/ollama /usr/bin/ollama && \
7
  apt-get clean && rm -rf /var/lib/apt/lists/*
8
 
9
- COPY requirements.txt /requirements.txt
10
- RUN pip install --no-cache-dir -r /requirements.txt
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  WORKDIR /code
13
- COPY services.txt /code/services.txt
14
- COPY . /code
15
 
16
  ENV OLLAMA_HOST=0.0.0.0:11434
17
- EXPOSE 7860
 
 
 
 
18
 
19
- CMD ["bash","-c","\
20
- ollama serve & \
21
- while ! curl -s http://localhost:11434 >/dev/null; do sleep 0.5; done && \
22
- ollama pull tinyllama:1.1b-chat-q4_0 && \
23
- gunicorn -b 0.0.0.0:7860 --workers 1 --timeout 30 app:app"]
 
 
1
+ # ---------- builder stage ----------
2
+ FROM python:3.11-slim AS builder
3
 
4
  RUN apt-get update && \
5
+ apt-get install -y --no-install-recommends curl ca-certificates && \
6
  curl -fsSL https://ollama.com/install.sh | sh && \
 
7
  apt-get clean && rm -rf /var/lib/apt/lists/*
8
 
9
+ # ---------- runtime stage ----------
10
+ FROM python:3.11-slim
11
+
12
+ # runtime utils (ffmpeg only if you really need audio transcription)
13
+ RUN apt-get update && \
14
+ apt-get install -y --no-install-recommends curl ca-certificates procps && \
15
+ apt-get clean && rm -rf /var/lib/apt/lists/*
16
+
17
+ # copy ollama binary from builder
18
+ COPY --from=builder /usr/local/bin/ollama /usr/local/bin/ollama
19
+
20
+ # python deps
21
+ COPY requirements.txt /tmp/
22
+ RUN pip install --no-cache-dir -U pip && \
23
+ pip install --no-cache-dir -r /tmp/requirements.txt
24
 
25
  WORKDIR /code
26
+ COPY . .
 
27
 
28
  ENV OLLAMA_HOST=0.0.0.0:11434
29
+ EXPOSE 7860 11434
30
+
31
+ # health-check so Docker knows when the container is really ready
32
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=15s --retries=3 \
33
+ CMD curl -f http://localhost:7860/ || exit 1
34
 
35
+ CMD bash -c "\
36
+ ollama serve & \
37
+ while ! curl -s http://localhost:11434/api/tags >/dev/null; do \
38
+ echo 'waiting for ollama…'; sleep 1; done; \
39
+ ollama pull ${OLLAMA_MODEL:-tinyllama:1.1b-chat-q4_0}; \
40
+ exec gunicorn -b 0.0.0.0:7860 --workers 1 --timeout 120 app:app"
app.py CHANGED
@@ -1,8 +1,14 @@
1
  #!/usr/bin/env python3
 
 
 
 
 
 
 
2
  import os
3
- import re
4
  import pathlib
5
- import logging
6
  from functools import lru_cache
7
  from typing import List, Optional
8
 
@@ -11,141 +17,215 @@ import ollama
11
  from flask import Flask, request, jsonify
12
  from langchain_core.documents import Document
13
  from langchain_community.vectorstores import FAISS
14
- from langchain_community.embeddings import HuggingFaceEmbeddings
15
  from rank_bm25 import BM25Okapi
16
  from supabase import create_client, Client
17
 
18
- logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(message)s")
 
 
 
 
 
19
  log = logging.getLogger("wa")
20
 
21
- # ---------- CONFIG ----------
22
- VERIFY_TOKEN = os.getenv("WEBHOOK_VERIFY", "123456")
23
- SUPABASE_URL = os.getenv("SUPABASE_URL")
24
- SUPABASE_KEY = os.getenv("SUPABASE_KEY")
25
- OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "tinyllama:1.1b-chat-q4_0") # 700 MB
26
- EMBED = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
 
 
27
 
28
- supabase: Optional[Client] = create_client(SUPABASE_URL, SUPABASE_KEY) if SUPABASE_URL else None
 
 
 
 
 
29
 
30
- # ---------- FAST LLM ----------
31
  ollama_client = ollama.Client(host="http://localhost:11434")
32
 
 
33
  @lru_cache(maxsize=512)
34
  def fast_llm(prompt: str, max_new: int = 60) -> str:
 
35
  try:
36
  resp = ollama_client.generate(
37
  model=OLLAMA_MODEL,
38
  prompt=prompt[-512:],
39
- options={"temperature": 0.2, "num_predict": max_new, "stop": ["\n", "User:", "Human:"]}
 
 
 
 
40
  )
41
  return resp["response"].strip()
42
- except Exception as e:
43
- log.warning("ollama: %s", e)
44
  return "Sorry, I am having trouble thinking right now."
45
 
46
- # ---------- DB ----------
 
47
  def get_last(user: str, n: int = 4) -> List[str]:
 
48
  if not supabase:
49
  return []
50
  try:
51
- rows = (supabase.table("chat_memory").select("role,message")
52
- .eq("user_phone", user).order("created_at", desc=True).limit(n).execute().data)[::-1]
 
 
 
 
 
 
 
53
  return [f"{r['role']}: {r['message']}" for r in rows]
54
- except Exception as e:
55
- log.warning("db: %s", e)
56
  return []
57
 
58
- def save_msg(user: str, text: str, role: str = "assistant"):
59
- if supabase:
60
- try:
61
- supabase.table("chat_memory").insert(
62
- {"user_phone": user, "role": role.lower(), "message": text}).execute()
63
- except Exception as e:
64
- log.warning("db write: %s", e)
65
 
66
- # ---------- ATOMIC-LINE RETRIEVER ----------
 
 
 
 
 
 
 
 
 
 
 
 
67
  @lru_cache(maxsize=1)
68
  def atomic_retriever():
69
- docs = []
70
- file = pathlib.Path("services.txt")
71
- if file.exists():
72
- for line in file.read_text(encoding="utf-8").splitlines():
 
73
  line = line.strip()
74
- if line and "KES" in line: # atomic price line
75
  docs.append(Document(page_content=line))
76
- if not docs:
77
- docs = [Document(page_content="LD Events handles events. Lamaki Designs handles interiors.")]
 
 
 
78
  dense = FAISS.from_documents(docs, EMBED).as_retriever(search_kwargs={"k": 5})
79
  tokenized = [re.findall(r"\w+", d.page_content.lower()) for d in docs]
80
  bm25 = BM25Okapi(tokenized)
81
 
82
- def search(q: str) -> List[Document]:
83
- dense_hits = dense.invoke(q) # v1 API
84
- scores = bm25.get_scores(re.findall(r"\w+", q.lower()))
85
  top = np.argsort(scores)[-5:][::-1]
86
  bm25_hits = [docs[i] for i in top if scores[i] > 0]
87
- seen = set()
88
- return [d for d in dense_hits + bm25_hits if not (d.page_content in seen or seen.add(d.page_content))]
 
 
 
 
 
89
  return search
90
 
 
91
  search = atomic_retriever()
92
 
93
- # ---------- COMPANY GREETING ----------
 
94
  def company_greeting(company: str) -> str:
95
  if company == "ld events":
96
- return "🎤 Hey there! Welcome to LD Events – your ultimate sound partner. How can we make your event unforgettable?"
 
 
 
97
  return "🛋️ Hello! Lamaki Designs here – ready to transform your space. What are you dreaming of?"
98
 
99
- # ---------- ZERO-HARDCODE REPLY ----------
100
  @lru_cache(maxsize=512)
101
  def smart_reply(text: str, user: str) -> str:
102
- company = "ld events" if any(k in text.lower() for k in ["wedding","concert","live","stage","sound","ld events","speaker","line array","moving head","parcan","led screen","bronze","silver","gold","platinum"]) else "lamaki designs"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- # 1. greeting
105
- if any(k in text.lower() for k in ["hello","hi","hey","jambo"]):
106
  return company_greeting(company)
107
 
108
- # 2. price / hire → atomic lines only
109
- if any(k in text.lower() for k in ["price","cost","how much","hire","rate","quote"]):
110
  hits = search(text)
111
  if not hits:
112
  return "Which exact item or package would you like a quote for? (e.g. ‘line-array-top’ or ‘Silver-Package’)"
113
-
114
- # inject live atomic lines
115
- context = "\n".join(d.page_content for d in hits[:3]) # <-- FIXED
116
  prompt = (
117
  f"Using ONLY the lines below, answer in one short sentence. "
118
  f"Never invent prices. If the exact item is not listed, ask for clarification.\n\n"
119
- f"Lines:\n{context}\n\n"
120
- f"User: {text}\nAssistant:"
121
  )
122
  return fast_llm(prompt, max_new=40)
123
 
124
  # 3. generic chat
125
  prompt = (
126
  f"You are a lively Kenyan assistant for {company.title()}. "
127
- f"Keep answers under 15 words, use emojis, no emails/phones.\n"
128
- f"User: {text}\nAssistant:"
129
  )
130
  return fast_llm(prompt, max_new=30)
131
 
132
- # ---------- WEBHOOK ----------
 
133
  app = Flask(__name__)
134
 
 
135
  @app.post("/whatsapp")
136
  def whatsapp():
 
137
  if request.json.get("verify") != VERIFY_TOKEN:
138
  return jsonify(error="bad token"), 403
139
  user = request.json.get("from", "unknown")
140
- msg = request.json.get("text", "")
141
  save_msg(user, msg, "user")
142
- ans = smart_reply(msg, user)
143
  save_msg(user, ans, "assistant")
144
  return jsonify(reply=ans)
145
 
 
146
  @app.get("/")
147
  def health():
148
  return "ok\n"
149
 
 
150
  if __name__ == "__main__":
 
151
  app.run(host="0.0.0.0", port=7860, threaded=True)
 
1
  #!/usr/bin/env python3
2
+ """
3
+ WhatsApp webhook + RAG chat-bot for LD-Events / Lamaki-Designs
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ import logging
9
  import os
 
10
  import pathlib
11
+ import re
12
  from functools import lru_cache
13
  from typing import List, Optional
14
 
 
17
  from flask import Flask, request, jsonify
18
  from langchain_core.documents import Document
19
  from langchain_community.vectorstores import FAISS
20
+ from langchain_huggingface import HuggingFaceEmbeddings # <-- new package
21
  from rank_bm25 import BM25Okapi
22
  from supabase import create_client, Client
23
 
24
+ # ---------- logging ----------
25
+ logging.basicConfig(
26
+ level=logging.INFO,
27
+ format="%(asctime)s | %(levelname)s | %(message)s",
28
+ datefmt="%Y-%m-%d %H:%M:%S",
29
+ )
30
  log = logging.getLogger("wa")
31
 
32
+ # ---------- config ----------
33
+ VERIFY_TOKEN = os.getenv("WEBHOOK_VERIFY", "123456")
34
+ SUPABASE_URL = os.getenv("SUPABASE_URL")
35
+ SUPABASE_KEY = os.getenv("SUPABASE_KEY")
36
+ OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "tinyllama:1.1b-chat-q4_0")
37
+
38
+ supabase: Optional[Client] = (
39
+ create_client(SUPABASE_URL, SUPABASE_KEY) if SUPABASE_URL else None
40
+ )
41
 
42
+ # ---------- embeddings ----------
43
+ EMBED = HuggingFaceEmbeddings(
44
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
45
+ model_kwargs={"device": "cpu"},
46
+ encode_kwargs={"normalize_embeddings": True},
47
+ )
48
 
49
+ # ---------- ollama client ----------
50
  ollama_client = ollama.Client(host="http://localhost:11434")
51
 
52
+
53
  @lru_cache(maxsize=512)
54
  def fast_llm(prompt: str, max_new: int = 60) -> str:
55
+ """Call local Ollama model with a short prompt."""
56
  try:
57
  resp = ollama_client.generate(
58
  model=OLLAMA_MODEL,
59
  prompt=prompt[-512:],
60
+ options={
61
+ "temperature": 0.2,
62
+ "num_predict": max_new,
63
+ "stop": ["\n", "User:", "Human:"],
64
+ },
65
  )
66
  return resp["response"].strip()
67
+ except Exception as exc:
68
+ log.warning("ollama error: %s", exc)
69
  return "Sorry, I am having trouble thinking right now."
70
 
71
+
72
+ # ---------- chat memory ----------
73
  def get_last(user: str, n: int = 4) -> List[str]:
74
+ """Fetch last n messages for a user."""
75
  if not supabase:
76
  return []
77
  try:
78
+ rows = (
79
+ supabase.table("chat_memory")
80
+ .select("role,message")
81
+ .eq("user_phone", user)
82
+ .order("created_at", desc=True)
83
+ .limit(n)
84
+ .execute()
85
+ .data
86
+ )[::-1]
87
  return [f"{r['role']}: {r['message']}" for r in rows]
88
+ except Exception as exc:
89
+ log.warning("db read: %s", exc)
90
  return []
91
 
 
 
 
 
 
 
 
92
 
93
+ def save_msg(user: str, text: str, role: str = "assistant") -> None:
94
+ """Persist a single message."""
95
+ if not supabase:
96
+ return
97
+ try:
98
+ supabase.table("chat_memory").insert(
99
+ {"user_phone": user, "role": role.lower(), "message": text}
100
+ ).execute()
101
+ except Exception as exc:
102
+ log.warning("db write: %s", exc)
103
+
104
+
105
+ # ---------- atomic retriever ----------
106
  @lru_cache(maxsize=1)
107
  def atomic_retriever():
108
+ """Hybrid dense + BM25 retriever over price lines."""
109
+ docs: List[Document] = []
110
+ svc_file = pathlib.Path("services.txt")
111
+ if svc_file.exists():
112
+ for line in svc_file.read_text(encoding="utf-8").splitlines():
113
  line = line.strip()
114
+ if line and "KES" in line:
115
  docs.append(Document(page_content=line))
116
+ if not docs: # fallback
117
+ docs.append(
118
+ Document(page_content="LD Events handles events. Lamaki Designs handles interiors.")
119
+ )
120
+
121
  dense = FAISS.from_documents(docs, EMBED).as_retriever(search_kwargs={"k": 5})
122
  tokenized = [re.findall(r"\w+", d.page_content.lower()) for d in docs]
123
  bm25 = BM25Okapi(tokenized)
124
 
125
+ def search(query: str) -> List[Document]:
126
+ dense_hits = dense.invoke(query)
127
+ scores = bm25.get_scores(re.findall(r"\w+", query.lower()))
128
  top = np.argsort(scores)[-5:][::-1]
129
  bm25_hits = [docs[i] for i in top if scores[i] > 0]
130
+ seen, out = set(), []
131
+ for doc in dense_hits + bm25_hits:
132
+ if doc.page_content not in seen:
133
+ out.append(doc)
134
+ seen.add(doc.page_content)
135
+ return out
136
+
137
  return search
138
 
139
+
140
  search = atomic_retriever()
141
 
142
+
143
+ # ---------- business logic ----------
144
  def company_greeting(company: str) -> str:
145
  if company == "ld events":
146
+ return (
147
+ "🎤 Hey there! Welcome to LD Events – your ultimate sound partner. "
148
+ "How can we make your event unforgettable?"
149
+ )
150
  return "🛋️ Hello! Lamaki Designs here – ready to transform your space. What are you dreaming of?"
151
 
152
+
153
  @lru_cache(maxsize=512)
154
  def smart_reply(text: str, user: str) -> str:
155
+ """Main reply logic."""
156
+ text_l = text.lower()
157
+ company = (
158
+ "ld events"
159
+ if any(
160
+ k in text_l
161
+ for k in [
162
+ "wedding",
163
+ "concert",
164
+ "live",
165
+ "stage",
166
+ "sound",
167
+ "ld events",
168
+ "speaker",
169
+ "line array",
170
+ "moving head",
171
+ "parcan",
172
+ "led screen",
173
+ "bronze",
174
+ "silver",
175
+ "gold",
176
+ "platinum",
177
+ ]
178
+ )
179
+ else "lamaki designs"
180
+ )
181
 
182
+ # 1. greetings
183
+ if any(k in text_l for k in ("hello", "hi", "hey", "jambo")):
184
  return company_greeting(company)
185
 
186
+ # 2. pricing
187
+ if any(k in text_l for k in ("price", "cost", "how much", "hire", "rate", "quote")):
188
  hits = search(text)
189
  if not hits:
190
  return "Which exact item or package would you like a quote for? (e.g. ‘line-array-top’ or ‘Silver-Package’)"
191
+ context = "\n".join(d.page_content for d in hits[:3])
 
 
192
  prompt = (
193
  f"Using ONLY the lines below, answer in one short sentence. "
194
  f"Never invent prices. If the exact item is not listed, ask for clarification.\n\n"
195
+ f"Lines:\n{context}\n\nUser: {text}\nAssistant:"
 
196
  )
197
  return fast_llm(prompt, max_new=40)
198
 
199
  # 3. generic chat
200
  prompt = (
201
  f"You are a lively Kenyan assistant for {company.title()}. "
202
+ f"Keep answers under 15 words, use emojis, no emails/phones.\nUser: {text}\nAssistant:"
 
203
  )
204
  return fast_llm(prompt, max_new=30)
205
 
206
+
207
+ # ---------- web layer ----------
208
  app = Flask(__name__)
209
 
210
+
211
  @app.post("/whatsapp")
212
  def whatsapp():
213
+ """Webhook entry point."""
214
  if request.json.get("verify") != VERIFY_TOKEN:
215
  return jsonify(error="bad token"), 403
216
  user = request.json.get("from", "unknown")
217
+ msg = request.json.get("text", "").strip()
218
  save_msg(user, msg, "user")
219
+ ans = smart_reply(msg, user)
220
  save_msg(user, ans, "assistant")
221
  return jsonify(reply=ans)
222
 
223
+
224
  @app.get("/")
225
  def health():
226
  return "ok\n"
227
 
228
+
229
  if __name__ == "__main__":
230
+ # dev only – docker uses gunicorn
231
  app.run(host="0.0.0.0", port=7860, threaded=True)