drrobot9 commited on
Commit
66e3f51
·
verified ·
1 Parent(s): 8b37f9c

heath care AI initial commit

Browse files
Files changed (42) hide show
  1. .dockerignore +0 -0
  2. .dockerigore +0 -0
  3. .gitattributes +5 -0
  4. Dockerfile +56 -0
  5. app/__init__.py +0 -0
  6. app/__pycache__/__init__.cpython-311.pyc +0 -0
  7. app/__pycache__/__init__.cpython-312.pyc +0 -0
  8. app/__pycache__/main.cpython-311.pyc +0 -0
  9. app/__pycache__/main.cpython-312.pyc +0 -0
  10. app/agents/__init__.py +0 -0
  11. app/agents/__pycache__/__init__.cpython-311.pyc +0 -0
  12. app/agents/__pycache__/__init__.cpython-312.pyc +0 -0
  13. app/agents/__pycache__/crew_pipeline.cpython-311.pyc +0 -0
  14. app/agents/__pycache__/crew_pipeline.cpython-312.pyc +0 -0
  15. app/agents/crew_pipeline.py +278 -0
  16. app/main.py +85 -0
  17. app/models/__init__.py +0 -0
  18. app/models/intent_classifier_v2.joblib +3 -0
  19. app/tasks/__init__.py +0 -0
  20. app/tasks/__pycache__/__init__.cpython-311.pyc +0 -0
  21. app/tasks/__pycache__/__init__.cpython-312.pyc +0 -0
  22. app/tasks/__pycache__/rag_updater.cpython-311.pyc +0 -0
  23. app/tasks/__pycache__/rag_updater.cpython-312.pyc +0 -0
  24. app/tasks/rag_updater.py +141 -0
  25. app/utils/__init__.py +0 -0
  26. app/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  27. app/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  28. app/utils/__pycache__/config.cpython-311.pyc +0 -0
  29. app/utils/__pycache__/config.cpython-312.pyc +0 -0
  30. app/utils/__pycache__/memory.cpython-312.pyc +0 -0
  31. app/utils/config.py +54 -0
  32. app/utils/memory.py +28 -0
  33. app/vectorstore/__init__.py +0 -0
  34. app/vectorstore/faiss_index/index.faiss +3 -0
  35. app/vectorstore/faiss_index/index.pkl +3 -0
  36. app/vectorstore/live_rag_index/index.faiss +3 -0
  37. app/vectorstore/live_rag_index/index.pkl +3 -0
  38. app/venv/bin/python +3 -0
  39. app/venv/bin/python3 +3 -0
  40. app/venv/bin/python3.11 +3 -0
  41. app/venv/pyvenv.cfg +5 -0
  42. requirements.txt +21 -0
.dockerignore ADDED
File without changes
.dockerigore ADDED
File without changes
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ app/vectorstore/faiss_index/index.faiss filter=lfs diff=lfs merge=lfs -text
37
+ app/vectorstore/live_rag_index/index.faiss filter=lfs diff=lfs merge=lfs -text
38
+ app/venv/bin/python filter=lfs diff=lfs merge=lfs -text
39
+ app/venv/bin/python3 filter=lfs diff=lfs merge=lfs -text
40
+ app/venv/bin/python3.11 filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base Image
2
+ FROM python:3.10-slim
3
+
4
+
5
+ ENV DEBIAN_FRONTEND=noninteractive \
6
+ PYTHONUNBUFFERED=1 \
7
+ PYTHONDONTWRITEBYTECODE=1
8
+
9
+
10
+ WORKDIR /code
11
+
12
+ # System Dependencies
13
+ RUN apt-get update && apt-get install -y --no-install-recommends \
14
+ build-essential \
15
+ git \
16
+ curl \
17
+ libopenblas-dev \
18
+ libomp-dev \
19
+ && rm -rf /var/lib/apt/lists/*
20
+
21
+
22
+ COPY requirements.txt .
23
+ RUN pip install --no-cache-dir -r requirements.txt
24
+
25
+ # Hugging Face + model tools
26
+ RUN pip install --no-cache-dir huggingface-hub sentencepiece accelerate fasttext
27
+
28
+ # Hugging Face cache environment
29
+ ENV HF_HOME=/models/huggingface \
30
+ TRANSFORMERS_CACHE=/models/huggingface \
31
+ HUGGINGFACE_HUB_CACHE=/models/huggingface \
32
+ HF_HUB_CACHE=/models/huggingface
33
+
34
+ # Created cache dir and set permissions
35
+ RUN mkdir -p /models/huggingface && chmod -R 777 /models/huggingface
36
+
37
+ # Pre-download models at build time
38
+ RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='Qwen/Qwen3-4B-Instruct-2507')" \
39
+ && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')" \
40
+ && python -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='facebook/fasttext-language-identification', filename='model.bin')" \
41
+ && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='drrobot9/nllb-ig-yo-ha-finetuned')" \
42
+ && find /models/huggingface -name '*.lock' -delete
43
+
44
+ # Preload tokenizers (avoid runtime delays)
45
+ RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('Qwen/Qwen3-4B-Instruct-2507', use_fast=True)" \
46
+ && python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', use_fast=True)" \
47
+ && python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('drrobot9/nllb-ig-yo-ha-finetuned', use_fast=True)"
48
+
49
+ # Copy project files
50
+ COPY . .
51
+
52
+ # Expose FastAPI port
53
+ EXPOSE 7860
54
+
55
+ # Run FastAPI app with uvicorn (1 workers for concurrency)
56
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
app/__init__.py ADDED
File without changes
app/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (166 Bytes). View file
 
app/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (154 Bytes). View file
 
app/__pycache__/main.cpython-311.pyc ADDED
Binary file (3.31 kB). View file
 
app/__pycache__/main.cpython-312.pyc ADDED
Binary file (3.62 kB). View file
 
app/agents/__init__.py ADDED
File without changes
app/agents/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (173 Bytes). View file
 
app/agents/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (161 Bytes). View file
 
app/agents/__pycache__/crew_pipeline.cpython-311.pyc ADDED
Binary file (8.73 kB). View file
 
app/agents/__pycache__/crew_pipeline.cpython-312.pyc ADDED
Binary file (13.7 kB). View file
 
app/agents/crew_pipeline.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # farmlingua/app/agents/crew_pipeline.pymemorysection
2
+ import os
3
+ import sys
4
+ import re
5
+ import uuid
6
+ import requests
7
+ import joblib
8
+ import faiss
9
+ import numpy as np
10
+ import torch
11
+ import fasttext
12
+ from huggingface_hub import hf_hub_download
13
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
14
+ from sentence_transformers import SentenceTransformer
15
+ from app.utils import config
16
+ from app.utils.memory import memory_store # memory module
17
+ from typing import List
18
+
19
+
20
+ hf_cache = "/models/huggingface"
21
+ os.environ["HF_HOME"] = hf_cache
22
+ os.environ["TRANSFORMERS_CACHE"] = hf_cache
23
+ os.environ["HUGGINGFACE_HUB_CACHE"] = hf_cache
24
+ os.makedirs(hf_cache, exist_ok=True)
25
+
26
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
27
+ if BASE_DIR not in sys.path:
28
+ sys.path.insert(0, BASE_DIR)
29
+
30
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
31
+
32
+
33
+ try:
34
+ classifier = joblib.load(config.CLASSIFIER_PATH)
35
+ except Exception:
36
+ classifier = None
37
+
38
+
39
+ print(f"Loading expert model ({config.EXPERT_MODEL_NAME})...")
40
+ tokenizer = AutoTokenizer.from_pretrained(config.EXPERT_MODEL_NAME, use_fast=False)
41
+ model = AutoModelForCausalLM.from_pretrained(
42
+ config.EXPERT_MODEL_NAME,
43
+ torch_dtype="auto",
44
+ device_map="auto"
45
+ )
46
+
47
+
48
+ embedder = SentenceTransformer(config.EMBEDDING_MODEL)
49
+
50
+ # language detector
51
+ print(f"Loading FastText language identifier ({config.LANG_ID_MODEL_REPO})...")
52
+ lang_model_path = hf_hub_download(
53
+ repo_id=config.LANG_ID_MODEL_REPO,
54
+ filename=getattr(config, "LANG_ID_MODEL_FILE", "model.bin")
55
+ )
56
+ lang_identifier = fasttext.load_model(lang_model_path)
57
+
58
+ def detect_language(text: str, top_k: int = 1):
59
+ if not text or not text.strip():
60
+ return [("eng_Latn", 1.0)]
61
+ clean_text = text.replace("\n", " ").strip()
62
+ labels, probs = lang_identifier.predict(clean_text, k=top_k)
63
+ return [(l.replace("__label__", ""), float(p)) for l, p in zip(labels, probs)]
64
+
65
+ # Translation model
66
+ print(f"Loading translation model ({config.TRANSLATION_MODEL_NAME})...")
67
+ translation_pipeline = pipeline(
68
+ "translation",
69
+ model=config.TRANSLATION_MODEL_NAME,
70
+ device=0 if DEVICE == "cuda" else -1,
71
+ max_new_tokens=400,
72
+ )
73
+
74
+ SUPPORTED_LANGS = {
75
+ "eng_Latn": "English",
76
+ "ibo_Latn": "Igbo",
77
+ "yor_Latn": "Yoruba",
78
+ "hau_Latn": "Hausa",
79
+ "swh_Latn": "Swahili",
80
+ "amh_Latn": "Amharic",
81
+ }
82
+
83
+ # Text chunking
84
+ _SENTENCE_SPLIT_RE = re.compile(r'(?<=[.!?])\s+')
85
+
86
+ def chunk_text(text: str, max_len: int = 400) -> List[str]:
87
+ if not text:
88
+ return []
89
+ sentences = _SENTENCE_SPLIT_RE.split(text)
90
+ chunks, current = [], ""
91
+ for s in sentences:
92
+ if not s:
93
+ continue
94
+ if len(current) + len(s) + 1 <= max_len:
95
+ current = (current + " " + s).strip()
96
+ else:
97
+ if current:
98
+ chunks.append(current.strip())
99
+ current = s.strip()
100
+ if current:
101
+ chunks.append(current.strip())
102
+ return chunks
103
+
104
+ def translate_text(text: str, src_lang: str, tgt_lang: str, max_chunk_len: int = 400) -> str:
105
+ if not text.strip():
106
+ return text
107
+ chunks = chunk_text(text, max_len=max_chunk_len)
108
+ translated_parts = []
109
+ for chunk in chunks:
110
+ res = translation_pipeline(chunk, src_lang=src_lang, tgt_lang=tgt_lang)
111
+ translated_parts.append(res[0]["translation_text"])
112
+ return " ".join(translated_parts).strip()
113
+
114
+ # RAG retrieval
115
+ def retrieve_docs(query: str, vs_path: str):
116
+ if not vs_path or not os.path.exists(vs_path):
117
+ return None
118
+ try:
119
+ index = faiss.read_index(str(vs_path))
120
+ except Exception:
121
+ return None
122
+ query_vec = np.array([embedder.encode(query)], dtype=np.float32)
123
+ D, I = index.search(query_vec, k=3)
124
+ if D[0][0] == 0:
125
+ return None
126
+ meta_path = str(vs_path) + "_meta.npy"
127
+ if os.path.exists(meta_path):
128
+ metadata = np.load(meta_path, allow_pickle=True).item()
129
+ docs = [metadata.get(str(idx), "") for idx in I[0] if str(idx) in metadata]
130
+ docs = [d for d in docs if d]
131
+ return "\n\n".join(docs) if docs else None
132
+ return None
133
+
134
+
135
+ def get_weather(state_name: str) -> str:
136
+ url = "http://api.weatherapi.com/v1/current.json"
137
+ params = {"key": config.WEATHER_API_KEY, "q": f"{state_name}, Nigeria", "aqi": "no"}
138
+ r = requests.get(url, params=params, timeout=10)
139
+ if r.status_code != 200:
140
+ return f"Unable to retrieve weather for {state_name}."
141
+ data = r.json()
142
+ return (
143
+ f"Weather in {state_name}:\n"
144
+ f"- Condition: {data['current']['condition']['text']}\n"
145
+ f"- Temperature: {data['current']['temp_c']}°C\n"
146
+ f"- Humidity: {data['current']['humidity']}%\n"
147
+ f"- Wind: {data['current']['wind_kph']} kph"
148
+ )
149
+
150
+
151
+ def detect_intent(query: str):
152
+ q_lower = (query or "").lower()
153
+ if any(word in q_lower for word in ["weather", "temperature", "rain", "forecast"]):
154
+ for state in getattr(config, "STATES", []):
155
+ if state.lower() in q_lower:
156
+ return "weather", state
157
+ return "weather", None
158
+
159
+ if any(word in q_lower for word in ["latest", "update", "breaking", "news", "current", "predict"]):
160
+ return "live_update", None
161
+
162
+ if hasattr(classifier, "predict") and hasattr(classifier, "predict_proba"):
163
+ try:
164
+ predicted_intent = classifier.predict([query])[0]
165
+ confidence = max(classifier.predict_proba([query])[0])
166
+ if confidence < getattr(config, "CLASSIFIER_CONFIDENCE_THRESHOLD", 0.6):
167
+ return "low_confidence", None
168
+ return predicted_intent, None
169
+ except Exception:
170
+ pass
171
+ return "normal", None
172
+
173
+ # expert runner
174
+ def run_qwen(messages: List[dict], max_new_tokens: int = 1300) -> str:
175
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
176
+ inputs = tokenizer([text], return_tensors="pt").to(model.device)
177
+ generated_ids = model.generate(
178
+ **inputs,
179
+ max_new_tokens=max_new_tokens,
180
+ temperature=0.4,
181
+ repetition_penalty=1.1
182
+ )
183
+ output_ids = generated_ids[0][len(inputs.input_ids[0]):].tolist()
184
+ return tokenizer.decode(output_ids, skip_special_tokens=True).strip()
185
+
186
+ # Memory
187
+ MAX_HISTORY_MESSAGES = getattr(config, "MAX_HISTORY_MESSAGES", 30)
188
+
189
+ def build_messages_from_history(history: List[dict], system_prompt: str) -> List[dict]:
190
+ msgs = [{"role": "system", "content": system_prompt}]
191
+ msgs.extend(history)
192
+ return msgs
193
+
194
+ # Main pipeline
195
+ def run_pipeline(user_query: str, session_id: str = None):
196
+ """
197
+ Run FarmLingua pipeline with per-session memory.
198
+ Each session_id keeps its own history.
199
+ """
200
+ if session_id is None:
201
+ session_id = str(uuid.uuid4()) # fallback unique session
202
+
203
+ # Language detection
204
+ lang_label, prob = detect_language(user_query, top_k=1)[0]
205
+ if lang_label not in SUPPORTED_LANGS:
206
+ lang_label = "eng_Latn"
207
+
208
+ translated_query = (
209
+ translate_text(user_query, src_lang=lang_label, tgt_lang="eng_Latn")
210
+ if lang_label != "eng_Latn"
211
+ else user_query
212
+ )
213
+
214
+ intent, extra = detect_intent(translated_query)
215
+
216
+ # Load conversation history
217
+ history = memory_store.get_history(session_id) or []
218
+ if len(history) > MAX_HISTORY_MESSAGES:
219
+ history = history[-MAX_HISTORY_MESSAGES:]
220
+
221
+
222
+ history.append({"role": "user", "content": translated_query})
223
+
224
+
225
+ system_prompt = (
226
+ "You are an AI called DR ROBOT. "
227
+ "Answer directly without repeating the question. "
228
+ "Use clear nigerian doctor friendly English with emojis . "
229
+ "Avoid jargon and irrelevant details. "
230
+ "If asked who built you, say: 'jackson kelvin developed me to help as Your doctor ."
231
+ "you are to be nice and also a companion to people ."
232
+ " you are a medical doctor with strong knowledge in medicine and surgery."
233
+ " you always care for people emotionaly, mentally, health and life."
234
+ " you can diagnose sick people give good advice and medical recommendations."
235
+ " you can tell the best drug to use base on budget for them."
236
+ " you are to know users age, health conditons before recommending a drug."
237
+ " you are 50 times smarter than 100 doctors put in one brain."
238
+ " be something people could never leave because you are the best doctor ever."
239
+
240
+ )
241
+
242
+
243
+ if intent == "weather" and extra:
244
+ weather_text = get_weather(extra)
245
+ history.append({"role": "user", "content": f"Rewrite this weather update simply for farmers:\n{weather_text}"})
246
+ messages_for_qwen = build_messages_from_history(history, system_prompt)
247
+ english_answer = run_qwen(messages_for_qwen, max_new_tokens=256)
248
+ else:
249
+ if intent == "live_update":
250
+ context = retrieve_docs(translated_query, config.LIVE_VS_PATH)
251
+ if context:
252
+ history.append({"role": "user", "content": f"Latest agricultural updates:\n{context}"})
253
+ if intent == "low_confidence":
254
+ context = retrieve_docs(translated_query, config.STATIC_VS_PATH)
255
+ if context:
256
+ history.append({"role": "user", "content": f"Reference information:\n{context}"})
257
+
258
+ messages_for_qwen = build_messages_from_history(history, system_prompt)
259
+ english_answer = run_qwen(messages_for_qwen, max_new_tokens=700)
260
+
261
+ # Save assistant reply
262
+ history.append({"role": "assistant", "content": english_answer})
263
+ if len(history) > MAX_HISTORY_MESSAGES:
264
+ history = history[-MAX_HISTORY_MESSAGES:]
265
+ memory_store.save_history(session_id, history)
266
+
267
+ # Translate back if needed
268
+ final_answer = (
269
+ translate_text(english_answer, src_lang="eng_Latn", tgt_lang=lang_label)
270
+ if lang_label != "eng_Latn"
271
+ else english_answer
272
+ )
273
+
274
+ return {
275
+ "session_id": session_id,
276
+ "detected_language": SUPPORTED_LANGS.get(lang_label, "Unknown"),
277
+ "answer": final_answer
278
+ }
app/main.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # farmlingua_backend/app/main.py
2
+ import os
3
+ import sys
4
+ import logging
5
+ import uuid
6
+ from fastapi import FastAPI, Body
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ import uvicorn
9
+
10
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
11
+ if BASE_DIR not in sys.path:
12
+ sys.path.insert(0, BASE_DIR)
13
+
14
+ from app.tasks.rag_updater import schedule_updates
15
+ from app.utils import config
16
+ from app.agents.crew_pipeline import run_pipeline
17
+
18
+ logging.basicConfig(
19
+ format="%(asctime)s [%(levelname)s] %(message)s",
20
+ level=logging.INFO
21
+ )
22
+
23
+ app = FastAPI(
24
+ title="DR ROBOT Backend",
25
+ description="Backend service for dr robot with RAG updates, multilingual support, and expert AI pipeline",
26
+ version="1.2.0"
27
+ )
28
+
29
+ app.add_middleware(
30
+ CORSMiddleware,
31
+ allow_origins=getattr(config, "ALLOWED_ORIGINS", ["*"]),
32
+ allow_credentials=True,
33
+ allow_methods=["*"],
34
+ allow_headers=["*"],
35
+ )
36
+
37
+ @app.on_event("startup")
38
+ def startup_event():
39
+ logging.info("Starting dr robot backend...")
40
+ schedule_updates()
41
+
42
+ @app.get("/")
43
+ def home():
44
+ """Health check endpoint."""
45
+ return {
46
+ "status": "DR ROBOT backend running",
47
+ "version": "1.2.0",
48
+ "vectorstore_path": config.VECTORSTORE_PATH
49
+ }
50
+
51
+ @app.post("/ask")
52
+ def ask_farmbot(
53
+ query: str = Body(..., embed=True),
54
+ session_id: str = Body(None, embed=True)
55
+ ):
56
+ """
57
+ Ask DR ROBOT a farming-related question.
58
+ - Supports Hausa, Igbo, Yoruba, Swahili, Amharic, and English.
59
+ - Automatically detects user language, translates if needed,
60
+ and returns response in the same language.
61
+ - Maintains separate conversation memory per session_id.
62
+ """
63
+ if not session_id:
64
+ session_id = str(uuid.uuid4()) # assign new session if missing
65
+
66
+ logging.info(f"Received query: {query} [session_id={session_id}]")
67
+ answer_data = run_pipeline(query, session_id=session_id)
68
+
69
+ detected_lang = answer_data.get("detected_language", "Unknown")
70
+ logging.info(f"Detected language: {detected_lang}")
71
+
72
+ return {
73
+ "query": query,
74
+ "answer": answer_data.get("answer"),
75
+ "session_id": answer_data.get("session_id"),
76
+ "detected_language": detected_lang
77
+ }
78
+
79
+ if __name__ == "__main__":
80
+ uvicorn.run(
81
+ "app.main:app",
82
+ host="0.0.0.0",
83
+ port=getattr(config, "PORT", 7860),
84
+ reload=bool(getattr(config, "DEBUG", False))
85
+ )
app/models/__init__.py ADDED
File without changes
app/models/intent_classifier_v2.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffeda9eeb604a1a24ef64e774eb6b503ead5eae6ad3b043401033040a4309405
3
+ size 39296294
app/tasks/__init__.py ADDED
File without changes
app/tasks/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (172 Bytes). View file
 
app/tasks/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (160 Bytes). View file
 
app/tasks/__pycache__/rag_updater.cpython-311.pyc ADDED
Binary file (8.43 kB). View file
 
app/tasks/__pycache__/rag_updater.cpython-312.pyc ADDED
Binary file (7.42 kB). View file
 
app/tasks/rag_updater.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # farmlingua_backend/app/tasks/rag_updater.py
2
+ import os
3
+ import sys
4
+ from datetime import datetime, date
5
+ import logging
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
+ from apscheduler.schedulers.background import BackgroundScheduler
9
+
10
+ from langchain.vectorstores import FAISS
11
+ from langchain.embeddings import SentenceTransformerEmbeddings
12
+ from langchain.docstore.document import Document
13
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
14
+
15
+ from app.utils import config
16
+
17
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
18
+ if BASE_DIR not in sys.path:
19
+ sys.path.insert(0, BASE_DIR)
20
+
21
+ logging.basicConfig(
22
+ format="%(asctime)s [%(levelname)s] %(message)s",
23
+ level=logging.INFO
24
+ )
25
+
26
+ session = requests.Session()
27
+
28
+ def fetch_weather_now():
29
+ """Fetch current weather for all configured states."""
30
+ docs = []
31
+ for state in config.STATES:
32
+ try:
33
+ url = "http://api.weatherapi.com/v1/current.json"
34
+ params = {
35
+ "key": config.WEATHER_API_KEY,
36
+ "q": f"{state}, Nigeria",
37
+ "aqi": "no"
38
+ }
39
+ res = session.get(url, params=params, timeout=10)
40
+ res.raise_for_status()
41
+ data = res.json()
42
+
43
+ if "current" in data:
44
+ condition = data['current']['condition']['text']
45
+ temp_c = data['current']['temp_c']
46
+ humidity = data['current']['humidity']
47
+ text = (
48
+ f"Weather in {state}: {condition}, "
49
+ f"Temperature: {temp_c}°C, Humidity: {humidity}%"
50
+ )
51
+ docs.append(Document(
52
+ page_content=text,
53
+ metadata={
54
+ "source": "WeatherAPI",
55
+ "location": state,
56
+ "timestamp": datetime.utcnow().isoformat()
57
+ }
58
+ ))
59
+ except Exception as e:
60
+ logging.error(f"Weather fetch failed for {state}: {e}")
61
+ return docs
62
+
63
+ def fetch_harvestplus_articles():
64
+ """Fetch ALL today's articles from HarvestPlus site."""
65
+ try:
66
+ res = session.get(config.DATA_SOURCES["harvestplus"], timeout=10)
67
+ res.raise_for_status()
68
+ soup = BeautifulSoup(res.text, "html.parser")
69
+ articles = soup.find_all("article")
70
+
71
+ docs = []
72
+ today_str = date.today().strftime("%Y-%m-%d")
73
+
74
+ for a in articles:
75
+ content = a.get_text(strip=True)
76
+ if content and len(content) > 100:
77
+
78
+ if today_str in a.text or True:
79
+ docs.append(Document(
80
+ page_content=content,
81
+ metadata={
82
+ "source": "HarvestPlus",
83
+ "timestamp": datetime.utcnow().isoformat()
84
+ }
85
+ ))
86
+ return docs
87
+ except Exception as e:
88
+ logging.error(f"HarvestPlus fetch failed: {e}")
89
+ return []
90
+
91
+ def build_rag_vectorstore(reset=False):
92
+ job_type = "FULL REBUILD" if reset else "INCREMENTAL UPDATE"
93
+ logging.info(f"RAG update started — {job_type}")
94
+
95
+ all_docs = fetch_weather_now() + fetch_harvestplus_articles()
96
+
97
+ logging.info(f"Weather docs fetched: {len([d for d in all_docs if d.metadata['source'] == 'WeatherAPI'])}")
98
+ logging.info(f"News docs fetched: {len([d for d in all_docs if d.metadata['source'] == 'HarvestPlus'])}")
99
+
100
+ if not all_docs:
101
+ logging.warning("No documents fetched, skipping update")
102
+ return
103
+
104
+ splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)
105
+ chunks = splitter.split_documents(all_docs)
106
+
107
+ embedder = SentenceTransformerEmbeddings(model_name=config.EMBEDDING_MODEL)
108
+
109
+ vectorstore_path = config.LIVE_VS_PATH
110
+
111
+ if reset and os.path.exists(vectorstore_path):
112
+ for file in os.listdir(vectorstore_path):
113
+ file_path = os.path.join(vectorstore_path, file)
114
+ try:
115
+ os.remove(file_path)
116
+ logging.info(f"Deleted old file: {file_path}")
117
+ except Exception as e:
118
+ logging.error(f"Failed to delete {file_path}: {e}")
119
+
120
+ if os.path.exists(vectorstore_path) and not reset:
121
+ vs = FAISS.load_local(
122
+ vectorstore_path,
123
+ embedder,
124
+ allow_dangerous_deserialization=True
125
+ )
126
+ vs.add_documents(chunks)
127
+ else:
128
+ vs = FAISS.from_documents(chunks, embedder)
129
+
130
+ os.makedirs(vectorstore_path, exist_ok=True)
131
+ vs.save_local(vectorstore_path)
132
+
133
+ logging.info(f"Vectorstore updated at {vectorstore_path}")
134
+
135
+ def schedule_updates():
136
+ scheduler = BackgroundScheduler()
137
+ scheduler.add_job(build_rag_vectorstore, 'interval', hours=12, kwargs={"reset": False})
138
+ scheduler.add_job(build_rag_vectorstore, 'interval', days=7, kwargs={"reset": True})
139
+ scheduler.start()
140
+ logging.info("Scheduler started — 12-hour incremental updates + weekly full rebuild")
141
+ return scheduler
app/utils/__init__.py ADDED
File without changes
app/utils/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (172 Bytes). View file
 
app/utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (160 Bytes). View file
 
app/utils/__pycache__/config.cpython-311.pyc ADDED
Binary file (1.85 kB). View file
 
app/utils/__pycache__/config.cpython-312.pyc ADDED
Binary file (2.33 kB). View file
 
app/utils/__pycache__/memory.cpython-312.pyc ADDED
Binary file (1.71 kB). View file
 
app/utils/config.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ # farmlingua_backend/app/utils/config.py
4
+ from pathlib import Path
5
+ import os
6
+ import sys
7
+
8
+
9
+ BASE_DIR = Path(__file__).resolve().parents[2]
10
+
11
+
12
+ if str(BASE_DIR) not in sys.path:
13
+ sys.path.insert(0, str(BASE_DIR))
14
+
15
+ EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
16
+ STATIC_VS_PATH = BASE_DIR / "app" / "vectorstore" / "faiss_index"
17
+ LIVE_VS_PATH = BASE_DIR / "app" / "vectorstore" / "live_rag_index"
18
+
19
+ VECTORSTORE_PATH = LIVE_VS_PATH
20
+
21
+
22
+ WEATHER_API_KEY = os.getenv("WEATHER_API_KEY", "1eefcad138134d62a1e220003252608")
23
+
24
+
25
+ CLASSIFIER_PATH = BASE_DIR / "app" / "models" / "intent_classifier_v2.joblib"
26
+ CLASSIFIER_CONFIDENCE_THRESHOLD = float(os.getenv("CLASSIFIER_CONFIDENCE_THRESHOLD", "0.6"))
27
+
28
+
29
+ EXPERT_MODEL_NAME = os.getenv("EXPERT_MODEL_NAME", "Qwen/Qwen3-4B-Instruct-2507")
30
+ #FORMATTER_MODEL_NAME = os.getenv("FORMATTER_MODEL_NAME", "google/flan-t5-large")
31
+
32
+ LANG_ID_MODEL_REPO = os.getenv("LANG_ID_MODEL_REPO", "facebook/fasttext-language-identification")
33
+ LANG_ID_MODEL_FILE = os.getenv("LANG_ID_MODEL_FILE", "model.bin")
34
+
35
+ TRANSLATION_MODEL_NAME = os.getenv("TRANSLATION_MODEL_NAME", "drrobot9/nllb-ig-yo-ha-finetuned")
36
+
37
+ DATA_SOURCES = {
38
+ "harvestplus": "https://agronigeria.ng/category/news/",
39
+ }
40
+
41
+ STATES = [
42
+ "Abuja", "Lagos", "Kano", "Kaduna", "Rivers", "Enugu", "Anambra", "Ogun",
43
+ "Oyo", "Delta", "Edo", "Katsina", "Borno", "Benue", "Niger", "Plateau",
44
+ "Bauchi", "Adamawa", "Cross River", "Akwa Ibom", "Ekiti", "Osun", "Ondo",
45
+ "Imo", "Abia", "Ebonyi", "Taraba", "Kebbi", "Zamfara", "Yobe", "Gombe",
46
+ "Sokoto", "Kogi", "Bayelsa", "Nasarawa", "Jigawa"
47
+ ]
48
+
49
+
50
+ hf_cache = "/models/huggingface"
51
+ os.environ["HF_HOME"] = hf_cache
52
+ os.environ["TRANSFORMERS_CACHE"] = hf_cache
53
+ os.environ["HUGGINGFACE_HUB_CACHE"] = hf_cache
54
+ os.makedirs(hf_cache, exist_ok=True)
app/utils/memory.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #app/utils/memory.py
2
+
3
+ from cachetools import TTLCache
4
+ from threading import Lock
5
+
6
+ memory_cache = TTLCache(maxsize=10000, ttl=3600)
7
+ lock = Lock()
8
+
9
+
10
+ class MemoryStore:
11
+ """ In memory conversational history with 1-hour expiry."""
12
+ def get_history(self, session_id: str):
13
+ """ Retrieve conversation history list of messages"""
14
+
15
+ with lock:
16
+ return memory_cache.get(session_id, []).copy()
17
+
18
+ def save_history(self,session_id: str, history: list) :
19
+ """ save/overwrite conversation history."""
20
+ with lock:
21
+ memory_cache[session_id] = history.copy()
22
+
23
+ def clear_history(self, session_id: str):
24
+ """Manually clear a session. """
25
+ with lock:
26
+ memory_cache.pop(session_id, None)
27
+
28
+ memory_store = MemoryStore()
app/vectorstore/__init__.py ADDED
File without changes
app/vectorstore/faiss_index/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4faefcc68ae5a575b18f559e04cd2c68e166a73c4c89c9550e1794ccbf90695
3
+ size 19648557
app/vectorstore/faiss_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1c75f31eab757e90e9c9771b62368c2de5dc11ed776629521fb007d8d47b84a
3
+ size 5863908
app/vectorstore/live_rag_index/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd2aebc178c85d2fa6a2c1071389cd67479f9d233b4f33b00ddf455ff56c85e6
3
+ size 141357
app/vectorstore/live_rag_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d99a0cc3b10e0dd46ceb810553e28e4b273cb1ed94645a2a7fc5f76869f2ef7
3
+ size 25409
app/venv/bin/python ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddfaecd2bd157a57e1211cde4fce9bf8107d4993a131bbf4b890ae53b76554bd
3
+ size 7901928
app/venv/bin/python3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddfaecd2bd157a57e1211cde4fce9bf8107d4993a131bbf4b890ae53b76554bd
3
+ size 7901928
app/venv/bin/python3.11 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddfaecd2bd157a57e1211cde4fce9bf8107d4993a131bbf4b890ae53b76554bd
3
+ size 7901928
app/venv/pyvenv.cfg ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ home = /usr/bin
2
+ include-system-site-packages = false
3
+ version = 3.11.13
4
+ executable = /usr/bin/python3.11
5
+ command = /usr/bin/python3 -m venv /content/drive/MyDrive/farmlingua_backend/app/venv
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ crewai
2
+ langchain
3
+ langchain-community
4
+ faiss-cpu
5
+ transformers
6
+ sentence-transformers
7
+ pydantic
8
+ joblib
9
+ pyyaml
10
+ torch
11
+ fastapi
12
+ uvicorn
13
+ apscheduler
14
+ numpy<2
15
+ requests
16
+ beautifulsoup4
17
+ huggingface-hub
18
+ python-dotenv
19
+ blobfile
20
+ sentencepiece
21
+ fasttext