Spaces:

drrobot9
/

doctor_robot_ai

Running

App Files Files Community

drrobot9 commited on Dec 1, 2025

Commit

b15c6d8

1 Parent(s): 0d39360

super initial commit

Browse files

Files changed (34) hide show

.dockerignore +0 -0
.dockerigore +0 -0
.gitattributes +5 -0
Dockerfile +56 -0
app/__init__.py +0 -0
app/__pycache__/__init__.cpython-311.pyc +0 -0
app/__pycache__/__init__.cpython-312.pyc +0 -0
app/__pycache__/main.cpython-311.pyc +0 -0
app/__pycache__/main.cpython-312.pyc +0 -0
app/agents/__init__.py +0 -0
app/agents/__pycache__/__init__.cpython-311.pyc +0 -0
app/agents/__pycache__/__init__.cpython-312.pyc +0 -0
app/agents/__pycache__/crew_pipeline.cpython-311.pyc +0 -0
app/agents/__pycache__/crew_pipeline.cpython-312.pyc +0 -0
app/agents/crew_pipeline.py +280 -0
app/main.py +85 -0
app/models/__init__.py +0 -0
app/tasks/__init__.py +0 -0
app/tasks/__pycache__/__init__.cpython-311.pyc +0 -0
app/tasks/__pycache__/__init__.cpython-312.pyc +0 -0
app/tasks/__pycache__/rag_updater.cpython-311.pyc +0 -0
app/tasks/__pycache__/rag_updater.cpython-312.pyc +0 -0
app/tasks/rag_updater.py +141 -0
app/utils/__init__.py +0 -0
app/utils/__pycache__/__init__.cpython-311.pyc +0 -0
app/utils/__pycache__/__init__.cpython-312.pyc +0 -0
app/utils/__pycache__/config.cpython-311.pyc +0 -0
app/utils/__pycache__/config.cpython-312.pyc +0 -0
app/utils/__pycache__/memory.cpython-312.pyc +0 -0
app/utils/config.py +54 -0
app/utils/memory.py +28 -0
app/vectorstore/__init__.py +0 -0
app/venv/pyvenv.cfg +5 -0
requirements.txt +21 -0

.dockerignore ADDED Viewed

File without changes

.dockerigore ADDED Viewed

File without changes

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+app/vectorstore/faiss_index/index.faiss filter=lfs diff=lfs merge=lfs -text
+app/vectorstore/live_rag_index/index.faiss filter=lfs diff=lfs merge=lfs -text
+app/venv/bin/python filter=lfs diff=lfs merge=lfs -text
+app/venv/bin/python3 filter=lfs diff=lfs merge=lfs -text
+app/venv/bin/python3.11 filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,56 @@

+# Base Image
+FROM python:3.10-slim
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1
+WORKDIR /code
+# System Dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    git \
+    curl \
+    libopenblas-dev \
+    libomp-dev \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Hugging Face + model tools
+RUN pip install --no-cache-dir huggingface-hub sentencepiece accelerate fasttext
+# Hugging Face cache environment
+ENV HF_HOME=/models/huggingface \
+    TRANSFORMERS_CACHE=/models/huggingface \
+    HUGGINGFACE_HUB_CACHE=/models/huggingface \
+    HF_HUB_CACHE=/models/huggingface
+# Created cache dir and set permissions
+RUN mkdir -p /models/huggingface && chmod -R 777 /models/huggingface
+# Pre-download models at build time
+RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='Qwen/Qwen3-4B-Instruct-2507')" \
+ && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')" \
+ && python -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='facebook/fasttext-language-identification', filename='model.bin')" \
+ && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='drrobot9/nllb-ig-yo-ha-finetuned')" \
+ && find /models/huggingface -name '*.lock' -delete
+# Preload tokenizers (avoid runtime delays)
+RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('Qwen/Qwen3-4B-Instruct-2507', use_fast=True)" \
+ && python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', use_fast=True)" \
+ && python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('drrobot9/nllb-ig-yo-ha-finetuned', use_fast=True)"
+# Copy project files
+COPY . .
+# Expose FastAPI port
+EXPOSE 7860
+# Run FastAPI app with uvicorn (1 workers for concurrency)
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

app/__init__.py ADDED Viewed

File without changes

app/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (166 Bytes). View file

app/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (154 Bytes). View file

app/__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (3.31 kB). View file

app/__pycache__/main.cpython-312.pyc ADDED Viewed

Binary file (3.65 kB). View file

app/agents/__init__.py ADDED Viewed

File without changes

app/agents/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (173 Bytes). View file

app/agents/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (161 Bytes). View file

app/agents/__pycache__/crew_pipeline.cpython-311.pyc ADDED Viewed

Binary file (8.73 kB). View file

app/agents/__pycache__/crew_pipeline.cpython-312.pyc ADDED Viewed

Binary file (13.6 kB). View file

app/agents/crew_pipeline.py ADDED Viewed

	@@ -0,0 +1,280 @@

+# farmlingua/app/agents/crew_pipeline.pymemorysection
+import os
+import sys
+import re
+import uuid
+import requests
+import joblib
+import faiss
+import numpy as np
+import torch
+import fasttext
+from huggingface_hub import hf_hub_download
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from sentence_transformers import SentenceTransformer
+from app.utils import config
+from app.utils.memory import memory_store  # memory module
+from typing import List
+hf_cache = "/models/huggingface"
+os.environ["HF_HOME"] = hf_cache
+os.environ["TRANSFORMERS_CACHE"] = hf_cache
+os.environ["HUGGINGFACE_HUB_CACHE"] = hf_cache
+os.makedirs(hf_cache, exist_ok=True)
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if BASE_DIR not in sys.path:
+    sys.path.insert(0, BASE_DIR)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+try:
+    classifier = joblib.load(config.CLASSIFIER_PATH)
+except Exception:
+    classifier = None
+print(f"Loading expert model ({config.EXPERT_MODEL_NAME})...")
+tokenizer = AutoTokenizer.from_pretrained(config.EXPERT_MODEL_NAME, use_fast=False)
+model = AutoModelForCausalLM.from_pretrained(
+    config.EXPERT_MODEL_NAME,
+    torch_dtype="auto",
+    device_map="auto"
+)
+embedder = SentenceTransformer(config.EMBEDDING_MODEL)
+#   language detector
+print(f"Loading FastText language identifier ({config.LANG_ID_MODEL_REPO})...")
+lang_model_path = hf_hub_download(
+    repo_id=config.LANG_ID_MODEL_REPO,
+    filename=getattr(config, "LANG_ID_MODEL_FILE", "model.bin")
+)
+lang_identifier = fasttext.load_model(lang_model_path)
+def detect_language(text: str, top_k: int = 1):
+    if not text or not text.strip():
+        return [("eng_Latn", 1.0)]
+    clean_text = text.replace("\n", " ").strip()
+    labels, probs = lang_identifier.predict(clean_text, k=top_k)
+    return [(l.replace("__label__", ""), float(p)) for l, p in zip(labels, probs)]
+#  Translation model
+print(f"Loading translation model ({config.TRANSLATION_MODEL_NAME})...")
+translation_pipeline = pipeline(
+    "translation",
+    model=config.TRANSLATION_MODEL_NAME,
+    device=0 if DEVICE == "cuda" else -1,
+    max_new_tokens=400,
+)
+SUPPORTED_LANGS = {
+    "eng_Latn": "English",
+    "ibo_Latn": "Igbo",
+    "yor_Latn": "Yoruba",
+    "hau_Latn": "Hausa",
+    "swh_Latn": "Swahili",
+    "amh_Latn": "Amharic",
+}
+# Text chunking
+_SENTENCE_SPLIT_RE = re.compile(r'(?<=[.!?])\s+')
+def chunk_text(text: str, max_len: int = 400) -> List[str]:
+    if not text:
+        return []
+    sentences = _SENTENCE_SPLIT_RE.split(text)
+    chunks, current = [], ""
+    for s in sentences:
+        if not s:
+            continue
+        if len(current) + len(s) + 1 <= max_len:
+            current = (current + " " + s).strip()
+        else:
+            if current:
+                chunks.append(current.strip())
+            current = s.strip()
+    if current:
+        chunks.append(current.strip())
+    return chunks
+def translate_text(text: str, src_lang: str, tgt_lang: str, max_chunk_len: int = 400) -> str:
+    if not text.strip():
+        return text
+    chunks = chunk_text(text, max_len=max_chunk_len)
+    translated_parts = []
+    for chunk in chunks:
+        res = translation_pipeline(chunk, src_lang=src_lang, tgt_lang=tgt_lang)
+        translated_parts.append(res[0]["translation_text"])
+    return " ".join(translated_parts).strip()
+#  RAG retrieval
+def retrieve_docs(query: str, vs_path: str):
+    if not vs_path or not os.path.exists(vs_path):
+        return None
+    try:
+        index = faiss.read_index(str(vs_path))
+    except Exception:
+        return None
+    query_vec = np.array([embedder.encode(query)], dtype=np.float32)
+    D, I = index.search(query_vec, k=3)
+    if D[0][0] == 0:
+        return None
+    meta_path = str(vs_path) + "_meta.npy"
+    if os.path.exists(meta_path):
+        metadata = np.load(meta_path, allow_pickle=True).item()
+        docs = [metadata.get(str(idx), "") for idx in I[0] if str(idx) in metadata]
+        docs = [d for d in docs if d]
+        return "\n\n".join(docs) if docs else None
+    return None
+def get_weather(state_name: str) -> str:
+    url = "http://api.weatherapi.com/v1/current.json"
+    params = {"key": config.WEATHER_API_KEY, "q": f"{state_name}, Nigeria", "aqi": "no"}
+    r = requests.get(url, params=params, timeout=10)
+    if r.status_code != 200:
+        return f"Unable to retrieve weather for {state_name}."
+    data = r.json()
+    return (
+        f"Weather in {state_name}:\n"
+        f"- Condition: {data['current']['condition']['text']}\n"
+        f"- Temperature: {data['current']['temp_c']}°C\n"
+        f"- Humidity: {data['current']['humidity']}%\n"
+        f"- Wind: {data['current']['wind_kph']} kph"
+    )
+def detect_intent(query: str):
+    q_lower = (query or "").lower()
+    if any(word in q_lower for word in ["weather", "temperature", "rain", "forecast"]):
+        for state in getattr(config, "STATES", []):
+            if state.lower() in q_lower:
+                return "weather", state
+        return "weather", None
+    if any(word in q_lower for word in ["latest", "update", "breaking", "news", "current", "predict"]):
+        return "live_update", None
+    if hasattr(classifier, "predict") and hasattr(classifier, "predict_proba"):
+        try:
+            predicted_intent = classifier.predict([query])[0]
+            confidence = max(classifier.predict_proba([query])[0])
+            if confidence < getattr(config, "CLASSIFIER_CONFIDENCE_THRESHOLD", 0.6):
+                return "low_confidence", None
+            return predicted_intent, None
+        except Exception:
+            pass
+    return "normal", None
+# expert runner
+def run_qwen(messages: List[dict], max_new_tokens: int = 1300) -> str:
+    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    generated_ids = model.generate(
+        **inputs,
+        max_new_tokens=max_new_tokens,
+        temperature=0.4,
+        repetition_penalty=1.1
+    )
+    output_ids = generated_ids[0][len(inputs.input_ids[0]):].tolist()
+    return tokenizer.decode(output_ids, skip_special_tokens=True).strip()
+#  Memory
+MAX_HISTORY_MESSAGES = getattr(config, "MAX_HISTORY_MESSAGES", 30)
+def build_messages_from_history(history: List[dict], system_prompt: str) -> List[dict]:
+    msgs = [{"role": "system", "content": system_prompt}]
+    msgs.extend(history)
+    return msgs
+#  Main pipeline
+def run_pipeline(user_query: str, session_id: str = None):
+    """
+    Run FarmLingua pipeline with per-session memory.
+    Each session_id keeps its own history.
+    """
+    if session_id is None:
+        session_id = str(uuid.uuid4())  # fallback unique session
+    # Language detection
+    lang_label, prob = detect_language(user_query, top_k=1)[0]
+    if lang_label not in SUPPORTED_LANGS:
+        lang_label = "eng_Latn"
+    translated_query = (
+        translate_text(user_query, src_lang=lang_label, tgt_lang="eng_Latn")
+        if lang_label != "eng_Latn"
+        else user_query
+    )
+    intent, extra = detect_intent(translated_query)
+    #  Load conversation history
+    history = memory_store.get_history(session_id) or []
+    if len(history) > MAX_HISTORY_MESSAGES:
+        history = history[-MAX_HISTORY_MESSAGES:]
+    history.append({"role": "user", "content": translated_query})
+    system_prompt = (
+        "You are DR ROBOT, an AI assistant for Nigerian patient. "
+        "Answer directly without repeating the question. "
+        "Use clear nigerian-friendly English with emojis . "
+        "Avoid jargon and irrelevant details. "
+        "If asked who built you, say: 'kelvin jackson  built and engineered  me  to help patient."
+        " you are a doctor that breaks things down for patient to understand"
+        " you are to diagnose  before recommending any pharmaceutical medications."
+        " you are a medical consultant a general medicine doctor a mental health doctor."
+        " you are to know patient relevant information for diagnosis before recommending what wrong with them."
+        " always watch out on ur patients be interactive ask questions be like real human being."
+        " if user mistakenly calls u dr roberts or drr ruberts just know they are saying dr robot. "
+        " when facial diagnosis data is been sent to you  study the data and diagnose the user  accurately from the data and tell them what to do after seeing the data."
+        " be smart to know what is wrong with the user i permit u to prescribe drugs that are better for that and give dosage that is favourable."
+        " you are smarter in medical diagnosing because you have more medical knowledge."
+        "not all user will type correctly or completely so be smart to know what a user is saying even if the spellings are wrong or incomplete."
+    )
+    if intent == "weather" and extra:
+        weather_text = get_weather(extra)
+        history.append({"role": "user", "content": f"Rewrite this weather update simply for farmers:\n{weather_text}"})
+        messages_for_qwen = build_messages_from_history(history, system_prompt)
+        english_answer = run_qwen(messages_for_qwen, max_new_tokens=256)
+    else:
+        if intent == "live_update":
+            context = retrieve_docs(translated_query, config.LIVE_VS_PATH)
+            if context:
+                history.append({"role": "user", "content": f"Latest agricultural updates:\n{context}"})
+        if intent == "low_confidence":
+            context = retrieve_docs(translated_query, config.STATIC_VS_PATH)
+            if context:
+                history.append({"role": "user", "content": f"Reference information:\n{context}"})
+        messages_for_qwen = build_messages_from_history(history, system_prompt)
+        english_answer = run_qwen(messages_for_qwen, max_new_tokens=700)
+    # Save assistant reply
+    history.append({"role": "assistant", "content": english_answer})
+    if len(history) > MAX_HISTORY_MESSAGES:
+        history = history[-MAX_HISTORY_MESSAGES:]
+    memory_store.save_history(session_id, history)
+    # Translate back if needed
+    final_answer = (
+        translate_text(english_answer, src_lang="eng_Latn", tgt_lang=lang_label)
+        if lang_label != "eng_Latn"
+        else english_answer
+    )
+    return {
+        "session_id": session_id,
+        "detected_language": SUPPORTED_LANGS.get(lang_label, "Unknown"),
+        "answer": final_answer
+    }

app/main.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# farmlingua_backend/app/main.py
+import os
+import sys
+import logging
+import uuid
+from fastapi import FastAPI, Body
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if BASE_DIR not in sys.path:
+    sys.path.insert(0, BASE_DIR)
+from app.tasks.rag_updater import schedule_updates
+from app.utils import config
+from app.agents.crew_pipeline import run_pipeline
+logging.basicConfig(
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    level=logging.INFO
+)
+app = FastAPI(
+    title="doctor robot",
+    description="Backend service for DOCTOR ROBOT AI with RAG updates, multilingual support, and expert AI pipeline",
+    version="1.2.0"
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=getattr(config, "ALLOWED_ORIGINS", ["*"]),
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.on_event("startup")
+def startup_event():
+    logging.info("Starting farmlingua AI backend...")
+    schedule_updates()
+@app.get("/")
+def home():
+    """Health check endpoint."""
+    return {
+        "status": "Farmlingua AI backend running",
+        "version": "1.2.0",
+        "vectorstore_path": config.VECTORSTORE_PATH
+    }
+@app.post("/ask")
+def ask_farmbot(
+    query: str = Body(..., embed=True),
+    session_id: str = Body(None, embed=True)
+):
+    """
+    Ask DOCTOR ROBOT AI a farming-related question.
+    - Supports Hausa, Igbo, Yoruba, Swahili, Amharic, and English.
+    - Automatically detects user language, translates if needed,
+      and returns response in the same language.
+    - Maintains separate conversation memory per session_id.
+    """
+    if not session_id:
+        session_id = str(uuid.uuid4())  # assign new session if missing
+    logging.info(f"Received query: {query} [session_id={session_id}]")
+    answer_data = run_pipeline(query, session_id=session_id)
+    detected_lang = answer_data.get("detected_language", "Unknown")
+    logging.info(f"Detected language: {detected_lang}")
+    return {
+        "query": query,
+        "answer": answer_data.get("answer"),
+        "session_id": answer_data.get("session_id"),
+        "detected_language": detected_lang
+    }
+if __name__ == "__main__":
+    uvicorn.run(
+        "app.main:app",
+        host="0.0.0.0",
+        port=getattr(config, "PORT", 7860),
+        reload=bool(getattr(config, "DEBUG", False))
+    )

app/models/__init__.py ADDED Viewed

File without changes

app/tasks/__init__.py ADDED Viewed

File without changes

app/tasks/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (172 Bytes). View file

app/tasks/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (160 Bytes). View file

app/tasks/__pycache__/rag_updater.cpython-311.pyc ADDED Viewed

Binary file (8.43 kB). View file

app/tasks/__pycache__/rag_updater.cpython-312.pyc ADDED Viewed

Binary file (7.42 kB). View file

app/tasks/rag_updater.py ADDED Viewed

	@@ -0,0 +1,141 @@

+# farmlingua_backend/app/tasks/rag_updater.py
+import os
+import sys
+from datetime import datetime, date
+import logging
+import requests
+from bs4 import BeautifulSoup
+from apscheduler.schedulers.background import BackgroundScheduler
+from langchain.vectorstores import FAISS
+from langchain.embeddings import SentenceTransformerEmbeddings
+from langchain.docstore.document import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from app.utils import config
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if BASE_DIR not in sys.path:
+    sys.path.insert(0, BASE_DIR)
+logging.basicConfig(
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    level=logging.INFO
+)
+session = requests.Session()
+def fetch_weather_now():
+    """Fetch current weather for all configured states."""
+    docs = []
+    for state in config.STATES:
+        try:
+            url = "http://api.weatherapi.com/v1/current.json"
+            params = {
+                "key": config.WEATHER_API_KEY,
+                "q": f"{state}, Nigeria",
+                "aqi": "no"
+            }
+            res = session.get(url, params=params, timeout=10)
+            res.raise_for_status()
+            data = res.json()
+            if "current" in data:
+                condition = data['current']['condition']['text']
+                temp_c = data['current']['temp_c']
+                humidity = data['current']['humidity']
+                text = (
+                    f"Weather in {state}: {condition}, "
+                    f"Temperature: {temp_c}°C, Humidity: {humidity}%"
+                )
+                docs.append(Document(
+                    page_content=text,
+                    metadata={
+                        "source": "WeatherAPI",
+                        "location": state,
+                        "timestamp": datetime.utcnow().isoformat()
+                    }
+                ))
+        except Exception as e:
+            logging.error(f"Weather fetch failed for {state}: {e}")
+    return docs
+def fetch_harvestplus_articles():
+    """Fetch ALL today's articles from HarvestPlus site."""
+    try:
+        res = session.get(config.DATA_SOURCES["harvestplus"], timeout=10)
+        res.raise_for_status()
+        soup = BeautifulSoup(res.text, "html.parser")
+        articles = soup.find_all("article")
+        docs = []
+        today_str = date.today().strftime("%Y-%m-%d")
+        for a in articles:
+            content = a.get_text(strip=True)
+            if content and len(content) > 100:
+                if today_str in a.text or True:
+                    docs.append(Document(
+                        page_content=content,
+                        metadata={
+                            "source": "HarvestPlus",
+                            "timestamp": datetime.utcnow().isoformat()
+                        }
+                    ))
+        return docs
+    except Exception as e:
+        logging.error(f"HarvestPlus fetch failed: {e}")
+        return []
+def build_rag_vectorstore(reset=False):
+    job_type = "FULL REBUILD" if reset else "INCREMENTAL UPDATE"
+    logging.info(f"RAG update started — {job_type}")
+    all_docs = fetch_weather_now() + fetch_harvestplus_articles()
+    logging.info(f"Weather docs fetched: {len([d for d in all_docs if d.metadata['source'] == 'WeatherAPI'])}")
+    logging.info(f"News docs fetched: {len([d for d in all_docs if d.metadata['source'] == 'HarvestPlus'])}")
+    if not all_docs:
+        logging.warning("No documents fetched, skipping update")
+        return
+    splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)
+    chunks = splitter.split_documents(all_docs)
+    embedder = SentenceTransformerEmbeddings(model_name=config.EMBEDDING_MODEL)
+    vectorstore_path = config.LIVE_VS_PATH
+    if reset and os.path.exists(vectorstore_path):
+        for file in os.listdir(vectorstore_path):
+            file_path = os.path.join(vectorstore_path, file)
+            try:
+                os.remove(file_path)
+                logging.info(f"Deleted old file: {file_path}")
+            except Exception as e:
+                logging.error(f"Failed to delete {file_path}: {e}")
+    if os.path.exists(vectorstore_path) and not reset:
+        vs = FAISS.load_local(
+            vectorstore_path,
+            embedder,
+            allow_dangerous_deserialization=True
+        )
+        vs.add_documents(chunks)
+    else:
+        vs = FAISS.from_documents(chunks, embedder)
+    os.makedirs(vectorstore_path, exist_ok=True)
+    vs.save_local(vectorstore_path)
+    logging.info(f"Vectorstore updated at {vectorstore_path}")
+def schedule_updates():
+    scheduler = BackgroundScheduler()
+    scheduler.add_job(build_rag_vectorstore, 'interval', hours=12, kwargs={"reset": False})
+    scheduler.add_job(build_rag_vectorstore, 'interval', days=7, kwargs={"reset": True})
+    scheduler.start()
+    logging.info("Scheduler started — 12-hour incremental updates + weekly full rebuild")
+    return scheduler

app/utils/__init__.py ADDED Viewed

File without changes

app/utils/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (172 Bytes). View file

app/utils/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (160 Bytes). View file

app/utils/__pycache__/config.cpython-311.pyc ADDED Viewed

Binary file (1.85 kB). View file

app/utils/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (2.33 kB). View file

app/utils/__pycache__/memory.cpython-312.pyc ADDED Viewed

Binary file (1.71 kB). View file

app/utils/config.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# farmlingua_backend/app/utils/config.py
+from pathlib import Path
+import os
+import sys
+BASE_DIR = Path(__file__).resolve().parents[2]
+if str(BASE_DIR) not in sys.path:
+    sys.path.insert(0, str(BASE_DIR))
+EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
+STATIC_VS_PATH = BASE_DIR / "app" / "vectorstore" / "faiss_index"
+LIVE_VS_PATH = BASE_DIR / "app" / "vectorstore" / "live_rag_index"
+VECTORSTORE_PATH = LIVE_VS_PATH
+WEATHER_API_KEY = os.getenv("WEATHER_API_KEY", "1eefcad138134d62a1e220003252608")
+CLASSIFIER_PATH = BASE_DIR / "app" / "models" / "intent_classifier_v2.joblib"
+CLASSIFIER_CONFIDENCE_THRESHOLD = float(os.getenv("CLASSIFIER_CONFIDENCE_THRESHOLD", "0.6"))
+EXPERT_MODEL_NAME = os.getenv("EXPERT_MODEL_NAME", "Qwen/Qwen3-4B-Instruct-2507")
+#FORMATTER_MODEL_NAME = os.getenv("FORMATTER_MODEL_NAME", "google/flan-t5-large")
+LANG_ID_MODEL_REPO = os.getenv("LANG_ID_MODEL_REPO", "facebook/fasttext-language-identification")
+LANG_ID_MODEL_FILE = os.getenv("LANG_ID_MODEL_FILE", "model.bin")
+TRANSLATION_MODEL_NAME = os.getenv("TRANSLATION_MODEL_NAME", "drrobot9/nllb-ig-yo-ha-finetuned")
+DATA_SOURCES = {
+    "harvestplus": "https://agronigeria.ng/category/news/",
+}
+STATES = [
+    "Abuja", "Lagos", "Kano", "Kaduna", "Rivers", "Enugu", "Anambra", "Ogun",
+    "Oyo", "Delta", "Edo", "Katsina", "Borno", "Benue", "Niger", "Plateau",
+    "Bauchi", "Adamawa", "Cross River", "Akwa Ibom", "Ekiti", "Osun", "Ondo",
+    "Imo", "Abia", "Ebonyi", "Taraba", "Kebbi", "Zamfara", "Yobe", "Gombe",
+    "Sokoto", "Kogi", "Bayelsa", "Nasarawa", "Jigawa"
+]
+hf_cache = "/models/huggingface"
+os.environ["HF_HOME"] = hf_cache
+os.environ["TRANSFORMERS_CACHE"] = hf_cache
+os.environ["HUGGINGFACE_HUB_CACHE"] = hf_cache
+os.makedirs(hf_cache, exist_ok=True)

app/utils/memory.py ADDED Viewed

	@@ -0,0 +1,28 @@

+#app/utils/memory.py
+from cachetools import TTLCache
+from threading import Lock
+memory_cache = TTLCache(maxsize=10000, ttl=3600)
+lock = Lock()
+class MemoryStore:
+  """ In memory conversational history with 1-hour expiry."""
+  def get_history(self, session_id: str):
+      """ Retrieve conversation history list of messages"""
+      with lock:
+          return memory_cache.get(session_id, []).copy()
+  def save_history(self,session_id: str, history: list) :
+      """ save/overwrite conversation history."""
+      with lock:
+           memory_cache[session_id] = history.copy()
+  def clear_history(self, session_id: str):
+      """Manually clear a session. """
+      with lock:
+           memory_cache.pop(session_id, None)
+memory_store = MemoryStore()

app/vectorstore/__init__.py ADDED Viewed

File without changes

app/venv/pyvenv.cfg ADDED Viewed

	@@ -0,0 +1,5 @@

+home = /usr/bin
+include-system-site-packages = false
+version = 3.11.13
+executable = /usr/bin/python3.11
+command = /usr/bin/python3 -m venv /content/drive/MyDrive/farmlingua_backend/app/venv

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+crewai
+langchain
+langchain-community
+faiss-cpu
+transformers
+sentence-transformers
+pydantic
+joblib
+pyyaml
+torch
+fastapi
+uvicorn
+apscheduler
+numpy<2
+requests
+beautifulsoup4
+huggingface-hub
+python-dotenv
+blobfile
+sentencepiece
+fasttext