tharunchndrn commited on
Commit
d2509c7
·
verified ·
1 Parent(s): e33803e

Upload 8 files

Browse files
backend_app/config.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+
6
+ # Storage
7
+ DATA_DIR = os.getenv("DATA_DIR", "data")
8
+ FAISS_INDEX_PATH = os.path.join(DATA_DIR, "faiss.index")
9
+ DOCSTORE_PATH = os.path.join(DATA_DIR, "docs.pkl")
10
+ RAW_CACHE_PATH = os.path.join(DATA_DIR, "raw_cache.jsonl")
11
+ URLS_PATH = os.path.join(DATA_DIR, "urls.json")
12
+
13
+ # Retrieval + Web fallback tuning
14
+ MIN_TOP_SCORE = float(os.getenv("MIN_TOP_SCORE", "0.30"))
15
+ WEB_MAX_RESULTS = int(os.getenv("WEB_MAX_RESULTS", "3"))
16
+
17
+ # Embeddings (free local)
18
+ EMBED_MODEL_NAME = os.getenv(
19
+ "EMBED_MODEL_NAME",
20
+ "sentence-transformers/all-MiniLM-L6-v2"
21
+ )
22
+
23
+ # LLM Provider (free local via Ollama)
24
+ LLM_PROVIDER = os.getenv("LLM_PROVIDER", "ollama").lower()
25
+ OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
26
+ OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "phi3")
27
+
28
+ # Chatbot identity (UI + greeting)
29
+ BOT_NAME = os.getenv("BOT_NAME", "SysLink Assistant")
30
+ BOT_WELCOME = os.getenv(
31
+ "BOT_WELCOME",
32
+ "Welcome to SysLink Food System 👋 How can I help you today?"
33
+ )
34
+ BOT_LOGO_URL = os.getenv("BOT_LOGO_URL", "/assets/bot-logo.png")
backend_app/email_service.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import time
4
+ import smtplib
5
+ from email.message import EmailMessage
6
+ from typing import Dict
7
+
8
+ from .config import DATA_DIR
9
+
10
+
11
+ CONTACT_LOG_PATH = os.path.join(DATA_DIR, "contact_messages.jsonl")
12
+
13
+
14
+ def _save_locally(user_email: str, user_message: str) -> None:
15
+ os.makedirs(DATA_DIR, exist_ok=True)
16
+ entry = {
17
+ "email": user_email,
18
+ "message": user_message,
19
+ "created_at": int(time.time())
20
+ }
21
+ with open(CONTACT_LOG_PATH, "a", encoding="utf-8") as f:
22
+ f.write(json.dumps(entry, ensure_ascii=False) + "\n")
23
+
24
+
25
+ def send_contact_email(user_email: str, user_message: str) -> Dict:
26
+ """
27
+ Free method: Use SMTP with your email account.
28
+ If SMTP isn't configured, we store messages locally and return ok=False.
29
+ """
30
+
31
+ # Always store a copy locally (good for backup)
32
+ _save_locally(user_email, user_message)
33
+
34
+ # SMTP settings from environment
35
+ smtp_host = os.getenv("SMTP_HOST", "")
36
+ smtp_port = int(os.getenv("SMTP_PORT", "0") or "0")
37
+ smtp_user = os.getenv("SMTP_USER", "")
38
+ smtp_pass = os.getenv("SMTP_PASS", "")
39
+ to_email = os.getenv("CONTACT_TO_EMAIL", "")
40
+
41
+ # If not configured, don't fail the whole chatbot
42
+ if not (smtp_host and smtp_port and smtp_user and smtp_pass and to_email):
43
+ return {"ok": False, "error": "SMTP not configured"}
44
+
45
+ try:
46
+ msg = EmailMessage()
47
+ msg["Subject"] = "New Contact Message - SysLink Food System"
48
+ msg["From"] = smtp_user
49
+ msg["To"] = to_email
50
+
51
+ msg.set_content(
52
+ f"User Email: {user_email}\n\n"
53
+ f"Message:\n{user_message}\n"
54
+ )
55
+
56
+ # TLS connection
57
+ with smtplib.SMTP(smtp_host, smtp_port, timeout=30) as server:
58
+ server.starttls()
59
+ server.login(smtp_user, smtp_pass)
60
+ server.send_message(msg)
61
+
62
+ return {"ok": True}
63
+
64
+ except Exception as e:
65
+ return {"ok": False, "error": str(e)}
backend_app/fetcher.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import re
4
+ import time
5
+ from typing import Dict
6
+
7
+ import requests
8
+ from bs4 import BeautifulSoup
9
+
10
+ from .config import RAW_CACHE_PATH
11
+
12
+
13
+ USER_AGENT = "SysLinkBot/1.0 (RAG educational project)"
14
+
15
+
16
+ def _clean_text(text: str) -> str:
17
+ text = re.sub(r"\s+", " ", text)
18
+ return text.strip()
19
+
20
+
21
+ def _load_cache() -> dict:
22
+ if not os.path.exists(RAW_CACHE_PATH):
23
+ return {}
24
+
25
+ cache = {}
26
+ with open(RAW_CACHE_PATH, "r", encoding="utf-8") as f:
27
+ for line in f:
28
+ try:
29
+ obj = json.loads(line)
30
+ cache[obj["url"]] = obj
31
+ except:
32
+ continue
33
+ return cache
34
+
35
+
36
+ def _append_cache(entry: Dict):
37
+ os.makedirs(os.path.dirname(RAW_CACHE_PATH), exist_ok=True)
38
+ with open(RAW_CACHE_PATH, "a", encoding="utf-8") as f:
39
+ f.write(json.dumps(entry, ensure_ascii=False) + "\n")
40
+
41
+
42
+ def fetch_page_text(url: str, use_cache: bool = True) -> Dict:
43
+ """
44
+ Fetch webpage content and return cleaned main text.
45
+ Caches pages to reduce repeated web delays.
46
+ """
47
+
48
+ cache = _load_cache()
49
+
50
+ if use_cache and url in cache:
51
+ return cache[url]
52
+
53
+ headers = {"User-Agent": USER_AGENT}
54
+ resp = requests.get(url, headers=headers, timeout=30)
55
+ resp.raise_for_status()
56
+
57
+ soup = BeautifulSoup(resp.text, "lxml")
58
+
59
+ # Remove noisy tags
60
+ for tag in soup(["script", "style", "noscript", "svg", "footer", "nav"]):
61
+ tag.decompose()
62
+
63
+ main = soup.find("main") or soup.body
64
+ if not main:
65
+ raise ValueError("No readable content found")
66
+
67
+ text = _clean_text(main.get_text(separator=" "))
68
+
69
+ title = soup.title.get_text(strip=True) if soup.title else url
70
+
71
+ result = {
72
+ "url": url,
73
+ "title": title,
74
+ "text": text,
75
+ "fetched_at": int(time.time())
76
+ }
77
+
78
+ _append_cache(result)
79
+
80
+ return result
backend_app/flows.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from typing import Dict, List, Optional
3
+ import re
4
+ from .email_service import send_contact_email
5
+
6
+ from .suggestions import (
7
+ default_suggestions,
8
+ suggestions_for_intent,
9
+ suggestions_from_text,
10
+ )
11
+
12
+ class FlowManager:
13
+ """
14
+ Manages lightweight session state for:
15
+ - Contact flow (collect message + email)
16
+ - Language flow (choose language/region)
17
+ """
18
+
19
+ def __init__(self):
20
+ # session_id -> state
21
+ self.sessions: Dict[str, Dict] = {}
22
+
23
+ # ---------- Suggestions ----------
24
+ def default_suggestions(self) -> List[str]:
25
+ return default_suggestions()
26
+
27
+ # ---------- Session helpers ----------
28
+ def _get(self, session_id: str) -> Dict:
29
+ if session_id not in self.sessions:
30
+ self.sessions[session_id] = {
31
+ "mode": "normal", # normal | contact_wait_msg | contact_wait_email | lang_wait
32
+ "contact_msg": None,
33
+ "lang": None, # e.g. "Sinhala", "Tamil", "English"
34
+ }
35
+ return self.sessions[session_id]
36
+
37
+ # ---------- Intents ----------
38
+ def _detect_intents(self, text: str) -> List[str]:
39
+ t = text.lower()
40
+
41
+ intents = []
42
+ if any(k in t for k in ["contact", "support", "help desk", "reach", "email us", "contact us"]):
43
+ intents.append("contact")
44
+ if any(k in t for k in ["language", "sinhala", "tamil", "english", "change language", "translate"]):
45
+ intents.append("language")
46
+ if any(k in t for k in ["service", "services", "what do you do", "features", "what is syslink", "about"]):
47
+ intents.append("services")
48
+
49
+ return intents or ["rag"]
50
+
51
+ # ---------- Main entry ----------
52
+ def handle_message(self, session_id: str, user_message: str) -> Dict:
53
+ """
54
+ Returns dict:
55
+ {
56
+ "action": "flow" | "rag",
57
+ "answer": "...",
58
+ "suggestions": [...]
59
+ "lang": optional preferred language for RAG
60
+ }
61
+ """
62
+ state = self._get(session_id)
63
+ msg = user_message.strip()
64
+
65
+ # 1) If we're in the middle of a flow, handle it first
66
+ if state["mode"].startswith("contact_"):
67
+ return self._handle_contact_flow(state, msg)
68
+
69
+ if state["mode"] == "lang_wait":
70
+ return self._handle_language_flow(state, msg)
71
+
72
+ # 2) Not in a flow: detect intent(s)
73
+ intents = self._detect_intents(msg)
74
+
75
+ # If user typed custom prompt, we replace suggestions with new related ones
76
+ dynamic_suggestions = suggestions_from_text(msg)
77
+
78
+ # 3) Multi-intent handling (2+ in one message)
79
+ # We'll handle flow intents first, then allow RAG for remaining.
80
+ if "contact" in intents and "language" in intents:
81
+ # Ask language first (quick), then contact
82
+ state["mode"] = "lang_wait"
83
+ return {
84
+ "action": "flow",
85
+ "answer": "Sure. Which language would you like (Sinhala / Tamil / English)?",
86
+ "suggestions": suggestions_for_intent("language"),
87
+ "lang": state.get("lang"),
88
+ }
89
+
90
+ if "language" in intents:
91
+ state["mode"] = "lang_wait"
92
+ return {
93
+ "action": "flow",
94
+ "answer": "Sure. Which language would you like (Sinhala / Tamil / English)?",
95
+ "suggestions": suggestions_for_intent("language"),
96
+ "lang": state.get("lang"),
97
+ }
98
+
99
+ if "contact" in intents:
100
+ state["mode"] = "contact_wait_msg"
101
+ return {
102
+ "action": "flow",
103
+ "answer": "Sure — please type your message for our team.",
104
+ "suggestions": suggestions_for_intent("contact"),
105
+ "lang": state.get("lang"),
106
+ }
107
+
108
+ if "services" in intents:
109
+ # Let RAG answer, but provide service-related suggestions
110
+ return {
111
+ "action": "rag",
112
+ "answer": "",
113
+ "suggestions": suggestions_for_intent("services"),
114
+ "lang": state.get("lang"),
115
+ }
116
+
117
+ # 4) Default: RAG
118
+ return {
119
+ "action": "rag",
120
+ "answer": "",
121
+ "suggestions": dynamic_suggestions,
122
+ "lang": state.get("lang"),
123
+ }
124
+
125
+ # ---------- Contact flow ----------
126
+ def _handle_contact_flow(self, state: Dict, msg: str) -> Dict:
127
+ if state["mode"] == "contact_wait_msg":
128
+ state["contact_msg"] = msg
129
+ state["mode"] = "contact_wait_email"
130
+ return {
131
+ "action": "flow",
132
+ "answer": "Thanks. Now please enter your email address.",
133
+ "suggestions": [],
134
+ "lang": state.get("lang"),
135
+ }
136
+
137
+ if state["mode"] == "contact_wait_email":
138
+ if not self._is_valid_email(msg):
139
+ return {
140
+ "action": "flow",
141
+ "answer": "That email doesn’t look valid. Please type a valid email (example: name@gmail.com).",
142
+ "suggestions": [],
143
+ "lang": state.get("lang"),
144
+ }
145
+
146
+ # Send email (free SMTP). If not configured, we still store and confirm.
147
+ email = msg
148
+ message = state.get("contact_msg") or ""
149
+
150
+ result = send_contact_email(user_email=email, user_message=message)
151
+
152
+ # Reset flow state
153
+ state["mode"] = "normal"
154
+ state["contact_msg"] = None
155
+
156
+ if result["ok"]:
157
+ return {
158
+ "action": "flow",
159
+ "answer": "✅ Sent! Thanks — our team will contact you soon.",
160
+ "suggestions": default_suggestions(),
161
+ "lang": state.get("lang"),
162
+ }
163
+
164
+ return {
165
+ "action": "flow",
166
+ "answer": (
167
+ "✅ I saved your message, but email sending isn’t configured yet on the server.\n"
168
+ "Our team can still contact you using the details you provided."
169
+ ),
170
+ "suggestions": default_suggestions(),
171
+ "lang": state.get("lang"),
172
+ }
173
+
174
+ # fallback
175
+ state["mode"] = "normal"
176
+ return {"action": "rag", "answer": "", "suggestions": default_suggestions(), "lang": state.get("lang")}
177
+
178
+ def submit_contact(self, session_id: str, email: str, message: str) -> Dict:
179
+ """
180
+ Optional endpoint use.
181
+ """
182
+ state = self._get(session_id)
183
+ result = send_contact_email(user_email=email, user_message=message)
184
+ if result["ok"]:
185
+ return {"ok": True, "message": "Sent"}
186
+ return {"ok": False, "message": "Not configured"}
187
+
188
+ def _is_valid_email(self, s: str) -> bool:
189
+ return bool(re.match(r"^[^@\s]+@[^@\s]+\.[^@\s]+$", s.strip()))
190
+
191
+ # ---------- Language flow ----------
192
+ def _handle_language_flow(self, state: Dict, msg: str) -> Dict:
193
+ t = msg.strip().lower()
194
+
195
+ # Accept direct language choice
196
+ if "sinhala" in t or t in ["si", "sinhala", "sin"]:
197
+ state["lang"] = "Sinhala"
198
+ elif "tamil" in t or t in ["ta", "tamil"]:
199
+ state["lang"] = "Tamil"
200
+ elif "english" in t or t in ["en", "english"]:
201
+ state["lang"] = "English"
202
+ else:
203
+ # Accept region words -> map quickly
204
+ # (You can expand this later)
205
+ if any(k in t for k in ["sri lanka", "colombo", "kandy", "galle", "jaffna"]):
206
+ state["lang"] = "Sinhala"
207
+ else:
208
+ return {
209
+ "action": "flow",
210
+ "answer": "Please type the language you want: Sinhala / Tamil / English.",
211
+ "suggestions": suggestions_for_intent("language"),
212
+ "lang": state.get("lang"),
213
+ }
214
+
215
+ # Finish language flow
216
+ state["mode"] = "normal"
217
+ return {
218
+ "action": "flow",
219
+ "answer": f"✅ Done. I’ll reply in {state['lang']} from now on.",
220
+ "suggestions": default_suggestions(),
221
+ "lang": state.get("lang"),
222
+ }
backend_app/ingest.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import pickle
4
+ from typing import List, Dict
5
+
6
+ import numpy as np
7
+ import faiss
8
+ from sentence_transformers import SentenceTransformer
9
+
10
+ from .config import (
11
+ DATA_DIR,
12
+ URLS_PATH,
13
+ FAISS_INDEX_PATH,
14
+ DOCSTORE_PATH,
15
+ EMBED_MODEL_NAME,
16
+ )
17
+ from .fetcher import fetch_page_text
18
+
19
+
20
+ def ensure_data_dir():
21
+ os.makedirs(DATA_DIR, exist_ok=True)
22
+
23
+
24
+ def load_urls() -> List[str]:
25
+ """
26
+ Expects data/urls.json like:
27
+ {
28
+ "urls": ["https://...", "https://..."]
29
+ }
30
+ """
31
+ if not os.path.exists(URLS_PATH):
32
+ raise FileNotFoundError(
33
+ f"Missing {URLS_PATH}. Create it with your 4 URLs."
34
+ )
35
+ with open(URLS_PATH, "r", encoding="utf-8") as f:
36
+ obj = json.load(f)
37
+ urls = obj.get("urls", [])
38
+ if not urls:
39
+ raise ValueError("urls.json has no URLs. Add at least 1 URL.")
40
+ return urls
41
+
42
+
43
+ def chunk_text(text: str, chunk_size_words: int = 900, overlap_words: int = 150) -> List[str]:
44
+ """
45
+ Simple word-based chunking (fast + reliable).
46
+ """
47
+ words = text.split()
48
+ chunks = []
49
+ i = 0
50
+ step = max(1, chunk_size_words - overlap_words)
51
+
52
+ while i < len(words):
53
+ chunk = words[i:i + chunk_size_words]
54
+ chunks.append(" ".join(chunk))
55
+ i += step
56
+
57
+ return chunks
58
+
59
+
60
+ def build_docs_from_urls(urls: List[str]) -> List[Dict]:
61
+ docs: List[Dict] = []
62
+ for url in urls:
63
+ page = fetch_page_text(url, use_cache=True)
64
+ chunks = chunk_text(page["text"])
65
+
66
+ for idx, ch in enumerate(chunks):
67
+ docs.append({
68
+ "text": ch,
69
+ "meta": {
70
+ "url": page["url"],
71
+ "title": page["title"],
72
+ "chunk": idx
73
+ }
74
+ })
75
+ return docs
76
+
77
+
78
+ def build_faiss_index(docs: List[Dict]) -> None:
79
+ model = SentenceTransformer(EMBED_MODEL_NAME)
80
+
81
+ texts = [d["text"] for d in docs]
82
+ emb = model.encode(texts, normalize_embeddings=True, show_progress_bar=True)
83
+ emb = np.array(emb, dtype="float32")
84
+
85
+ index = faiss.IndexFlatIP(emb.shape[1])
86
+ index.add(emb)
87
+
88
+ faiss.write_index(index, FAISS_INDEX_PATH)
89
+
90
+ with open(DOCSTORE_PATH, "wb") as f:
91
+ pickle.dump(docs, f)
92
+
93
+
94
+ def run_ingestion():
95
+ ensure_data_dir()
96
+ urls = load_urls()
97
+ docs = build_docs_from_urls(urls)
98
+
99
+ if not docs:
100
+ raise RuntimeError("No documents created from URLs. Check your URLs/pages.")
101
+
102
+ build_faiss_index(docs)
103
+
104
+ print("✅ Ingestion complete")
105
+ print(f"URLs: {len(urls)}")
106
+ print(f"Chunks: {len(docs)}")
107
+ print(f"Saved index: {FAISS_INDEX_PATH}")
108
+ print(f"Saved docs: {DOCSTORE_PATH}")
109
+
110
+
111
+ if __name__ == "__main__":
112
+ run_ingestion()
backend_app/rag_hf.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ from typing import List, Dict, Optional, Tuple
4
+
5
+ import numpy as np
6
+ import faiss
7
+ from sentence_transformers import SentenceTransformer
8
+ from huggingface_hub import InferenceClient
9
+
10
+ from .config import (
11
+ FAISS_INDEX_PATH,
12
+ DOCSTORE_PATH,
13
+ EMBED_MODEL_NAME,
14
+ MIN_TOP_SCORE,
15
+ WEB_MAX_RESULTS,
16
+ )
17
+ from .fetcher import fetch_page_text
18
+ from .web_search import web_search
19
+
20
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
21
+ HF_MODEL = os.getenv("HF_MODEL", "HuggingFaceH4/zephyr-7b-beta") # you can change later
22
+
23
+ class RAGEngineHF:
24
+ def __init__(self):
25
+ self.embedder = SentenceTransformer(EMBED_MODEL_NAME)
26
+ self.index = faiss.read_index(FAISS_INDEX_PATH)
27
+ with open(DOCSTORE_PATH, "rb") as f:
28
+ self.docs: List[Dict] = pickle.load(f)
29
+
30
+ self.client = InferenceClient(model=HF_MODEL, token=HF_TOKEN)
31
+ self.TOP_K = 5
32
+ self.MAX_CONTEXT_CHARS_PER_DOC = 1800
33
+
34
+ def retrieve_local(self, query: str, k: int = 5) -> List[Dict]:
35
+ q_emb = self.embedder.encode([query], normalize_embeddings=True)
36
+ q_emb = np.array(q_emb, dtype="float32")
37
+ scores, ids = self.index.search(q_emb, k)
38
+
39
+ out = []
40
+ for rank, doc_id in enumerate(ids[0]):
41
+ if doc_id == -1:
42
+ continue
43
+ d = self.docs[int(doc_id)]
44
+ out.append({
45
+ "rank": rank + 1,
46
+ "score": float(scores[0][rank]),
47
+ "text": d["text"],
48
+ "meta": d["meta"],
49
+ })
50
+ return out
51
+
52
+ def _needs_web_fallback(self, contexts: List[Dict]) -> bool:
53
+ return (not contexts) or (contexts[0]["score"] < MIN_TOP_SCORE)
54
+
55
+ def fetch_web_context(self, query: str) -> Tuple[List[Dict], List[Dict]]:
56
+ queries = [f"site:foodsystemsdashboard.org {query}", query]
57
+ links, seen = [], set()
58
+
59
+ for q in queries:
60
+ for r in web_search(q, max_results=WEB_MAX_RESULTS):
61
+ if r["url"] not in seen:
62
+ links.append(r)
63
+ seen.add(r["url"])
64
+ if len(links) >= WEB_MAX_RESULTS:
65
+ break
66
+
67
+ contexts, sources = [], []
68
+ for r in links[:WEB_MAX_RESULTS]:
69
+ try:
70
+ page = fetch_page_text(r["url"], use_cache=True)
71
+ contexts.append({
72
+ "rank": len(contexts) + 1,
73
+ "score": 0.0,
74
+ "text": page["text"],
75
+ "meta": {"url": page["url"], "title": page["title"], "chunk": 0},
76
+ })
77
+ sources.append({"title": page["title"], "url": page["url"]})
78
+ except:
79
+ continue
80
+ return contexts, sources
81
+
82
+ def answer(self, query: str, preferred_lang: Optional[str] = None) -> Dict:
83
+ local = self.retrieve_local(query, k=self.TOP_K)
84
+ used = "local"
85
+ contexts = local
86
+ sources = self._unique_sources(local)
87
+
88
+ if self._needs_web_fallback(local):
89
+ web_ctx, web_src = self.fetch_web_context(query)
90
+ if web_ctx:
91
+ used = "web"
92
+ contexts = web_ctx
93
+ sources = web_src
94
+
95
+ context_block = "\n\n".join(
96
+ [f"[{i+1}] {c['meta']['title']}\n{c['text'][:self.MAX_CONTEXT_CHARS_PER_DOC]}"
97
+ for i, c in enumerate(contexts)]
98
+ )
99
+
100
+ lang_line = f"Respond in {preferred_lang}.\n" if preferred_lang else ""
101
+
102
+ prompt = f"""
103
+ You are the SysLink Food System assistant.
104
+ Use ONLY the context below. Do not invent facts.
105
+ Write in simple language, MEDIUM length (8–14 lines). Not too brief.
106
+ If info is missing, say what is missing.
107
+
108
+ {lang_line}
109
+ QUESTION: {query}
110
+
111
+ CONTEXT:
112
+ {context_block}
113
+
114
+ ANSWER:
115
+ """.strip()
116
+
117
+ out = self.client.text_generation(
118
+ prompt,
119
+ max_new_tokens=250,
120
+ temperature=0.2,
121
+ return_full_text=False,
122
+ ).strip()
123
+
124
+ if not out:
125
+ out = "I couldn’t find enough reliable information in the provided sources. Please rephrase or share more details."
126
+
127
+ return {"answer": out, "sources": sources, "used": used}
128
+
129
+ def _unique_sources(self, contexts: List[Dict]) -> List[Dict]:
130
+ seen, out = set(), []
131
+ for c in contexts:
132
+ u = c["meta"]["url"]
133
+ if u not in seen:
134
+ out.append({"title": c["meta"]["title"], "url": u})
135
+ seen.add(u)
136
+ return out
backend_app/suggestions.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/app/suggestions.py
2
+ from typing import List
3
+
4
+
5
+ def default_suggestions() -> List[str]:
6
+ """
7
+ Suggestions shown when chat opens and after flows complete.
8
+ """
9
+ return [
10
+ "Tell us about your services",
11
+ "Contact us",
12
+ "Change response language",
13
+ ]
14
+
15
+
16
+ def suggestions_for_intent(intent: str) -> List[str]:
17
+ """
18
+ Suggestions used inside specific flows/intents.
19
+ """
20
+ intent = (intent or "").lower()
21
+
22
+ if intent == "contact":
23
+ return [
24
+ "I want to contact support",
25
+ "Back to main menu",
26
+ ]
27
+
28
+ if intent == "language":
29
+ return [
30
+ "Sinhala",
31
+ "Tamil",
32
+ "English",
33
+ ]
34
+
35
+ if intent == "services":
36
+ return [
37
+ "What is Food SysLink?",
38
+ "How does it work?",
39
+ "What problems does it solve?",
40
+ "Change response language",
41
+ "Contact us",
42
+ ]
43
+
44
+ return default_suggestions()
45
+
46
+
47
+ def suggestions_from_text(user_text: str) -> List[str]:
48
+ """
49
+ When user types a custom prompt, old suggestions should disappear
50
+ and new related ones should appear.
51
+
52
+ This is a lightweight keyword-based approach (fast and free).
53
+ You can improve it later using embeddings or an LLM.
54
+ """
55
+ t = (user_text or "").lower()
56
+
57
+ # If they ask about language, show language options
58
+ if any(k in t for k in ["language", "sinhala", "tamil", "english", "translate"]):
59
+ return ["Sinhala", "Tamil", "English"]
60
+
61
+ # If they ask about contact/support
62
+ if any(k in t for k in ["contact", "support", "help", "email", "reach"]):
63
+ return ["Contact us", "Tell us about your services", "Change response language"]
64
+
65
+ # If they ask about services/features/about
66
+ if any(k in t for k in ["service", "services", "feature", "about", "what is", "syslink"]):
67
+ return [
68
+ "Tell us about your services",
69
+ "What is Food SysLink?",
70
+ "How does it work?",
71
+ "Contact us",
72
+ ]
73
+
74
+ # Default suggestions
75
+ return [
76
+ "Tell us about your services",
77
+ "Contact us",
78
+ "Change response language",
79
+ ]
backend_app/web_search.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/app/web_search.py
2
+ from typing import List, Dict
3
+ from duckduckgo_search import DDGS
4
+
5
+
6
+ def web_search(query: str, max_results: int = 3) -> List[Dict]:
7
+ """
8
+ DuckDuckGo web search (free).
9
+ Returns: [{"title": "...", "url": "..."}]
10
+ """
11
+ results: List[Dict] = []
12
+
13
+ with DDGS() as ddgs:
14
+ for r in ddgs.text(query, max_results=max_results):
15
+ href = r.get("href")
16
+ title = r.get("title")
17
+ if href and title:
18
+ results.append({"title": title, "url": href})
19
+
20
+ return results