"""
projects_data.py
================
Full project detail — never sent automatically.
Only fetched when a tool is called by the LLM.

Two tools use this file:

  get_project_by_name(names: list[str])
      Direct dict lookup by project key.
      Used when the user mentions a project by name clearly.
      Example: "tell me about Sofia" -> get_project_by_name(["Sofia"])

  search_projects(query: str)
      Semantic FAISS search over each project's search_text field.
      Used when the user describes something without naming a project.
      Example: "what games did you build?" -> search_projects("games motion voice")

FAISS index at startup only embeds the 6 search_text strings — tiny and fast.

TO ADD A NEW PROJECT:
  1. Copy any existing entry as a template.
  2. Fill in all fields, especially search_text (make it rich with synonyms).
  3. Redeploy — FAISS rebuilds automatically at startup.

TO UPDATE AN EXISTING PROJECT:
  1. Edit the entry below.
  2. Redeploy.
"""

PROJECTS: dict[str, dict] = {

    "Sofia": {
        "name": "Sofia — 3D Intelligent Virtual AI Agent",
        "year": "2025",
        "status": "Completed. Exhibited at ReXtro 2025.",
        "award": None,
        "detail": (
            "Sofia is my most advanced project — a fully multi-agent AI virtual assistant "
            "with a 3D animated character presence available through both a web interface "
            "and an AR application.\n\n"
            "Built on Google ADK as a multi-agent system, Sofia delivers real-time "
            "lip-synced responses generated by Gemini. Microsoft Azure STT handles "
            "speech-to-text input and Azure TTS handles text-to-speech output, making "
            "her fully voice-interactive.\n\n"
            "A dedicated computer vision module continuously analyses the person she is "
            "speaking with — estimating gender, approximate age, and emotional state — to "
            "personalise every response in real time. Live weather API data adds "
            "environmental context.\n\n"
            "Designed as a customer assistant for an ice cream shop: Sofia recommends "
            "products via real-time customer clustering, logs emotion and satisfaction data "
            "to MongoDB throughout each interaction, and surfaces aggregated insights on a "
            "manager dashboard to support service decisions. SQLite handles fast in-agent "
            "state retrieval. A custom model I trained is hosted on Hugging Face. "
            "The web frontend is React and TypeScript, hosted on Railway during ReXtro 2025. "
            "The AR app is built in Unity."
        ),
        "tech_stack": (
            "React, TypeScript, FastAPI, Google ADK, Gemini API, "
            "Microsoft Azure STT, Microsoft Azure TTS, Computer Vision, "
            "MongoDB, SQLite, Unity (AR), Hugging Face, Railway"
        ),
        "search_text": (
            "Sofia 3D AI virtual agent AR augmented reality Unity web interface React TypeScript "
            "Google ADK Gemini multi-agent real-time lip sync computer vision facial analysis "
            "emotion detection gender age estimation personalisation customer assistant ice cream "
            "shop clustering manager dashboard MongoDB SQLite FastAPI Hugging Face Azure STT TTS "
            "voice interaction weather API ReXtro 2025 exhibition animated character agentic AI "
            "LLM virtual assistant customer service"
        ),
    },

    "MotionX": {
        "name": "MotionX — Motion and Voice Controlled Original Game Series",
        "year": "2025",
        "status": "Completed. Exhibited at ReXtro 2025.",
        "award": None,
        "detail": (
            "MotionX is a series of three original games I built, each controlled entirely "
            "through player body movement and/or voice via a real-time computer vision and "
            "speech detection pipeline. No keyboard, no mouse — the player's body is the "
            "controller.\n\n"
            "LumRun: An endless runner set in a cemetery. The player runs in place to control "
            "the character — the faster they run, the faster the game goes.\n\n"
            "Music Bubble Runner: The player runs to intercept musical bubbles mid-air. "
            "Each bubble popped generates a sound, creating a live reactive soundtrack "
            "driven entirely by the player's movement.\n\n"
            "CWL (Charms and Wands League): A Harry Potter-inspired game. The in-game wand "
            "syncs in real time with the player's physical wand hand movements via pose "
            "estimation. A voice detection system identifies the charm spoken aloud — such as "
            "Expelliarmus or Lumos — and triggers the corresponding in-game visual effect.\n\n"
            "All three games were exhibited alongside Sofia at ReXtro 2025, Faculty of "
            "Engineering, University of Ruhuna, December 2025."
        ),
        "tech_stack": (
            "Python, Computer Vision, Pose Estimation, "
            "Speech Detection, Real-time Body Tracking"
        ),
        "search_text": (
            "MotionX motion controlled games voice controlled games body movement gesture control "
            "no keyboard no mouse computer vision pose estimation speech detection voice recognition "
            "LumRun cemetery endless runner Music Bubble Runner music reactive gameplay "
            "CWL Charms Wands League Harry Potter wand spell charm Expelliarmus Lumos "
            "real-time body tracking Python ReXtro 2025 interactive games original game "
            "human computer interaction physical game controller immersive gaming"
        ),
    },

    "Groceria": {
        "name": "Groceria — Multi-Agent Smart Shopping Planning System",
        "year": "2025",
        "status": "Completed. SLAIC AI Challenge 2025 Finalist.",
        "award": "Finalist — SLAIC AI Challenge 2025",
        "detail": (
            "Groceria is a cooperative multi-agent system that generates fully optimised "
            "grocery shopping plans.\n\n"
            "Agents perform real-time web scraping of vendor product pages. Scraped data is "
            "cached in an automatically self-updating database that refreshes only after a "
            "configurable expiry window — always working with fresh prices without unnecessary "
            "re-scraping.\n\n"
            "The planning pipeline reasons jointly over: user food preferences and dietary "
            "requirements, product availability across vendors, delivery costs per vendor, "
            "budget constraints, and applicable credit card-specific discounts. The output is "
            "the most cost-effective, preference-aligned shopping plan from the given list.\n\n"
            "Frontend built in Next.js, backend in FastAPI, agents orchestrated with Google ADK."
        ),
        "tech_stack": (
            "Next.js, FastAPI, Google ADK, Python, "
            "Real-time Web Scraper, Auto-updating Product DB, Multi-Agent Reasoning"
        ),
        "search_text": (
            "Groceria multi-agent grocery shopping planner optimised shopping list web scraping "
            "real-time product prices vendor scraper auto-updating database cache expiry "
            "Google ADK FastAPI Next.js budget optimisation delivery cost reasoning "
            "credit card discount preference filtering cooperative agents agent coordination "
            "SLAIC AI Challenge finalist 2025 food shopping optimiser planning system"
        ),
    },

    "QuickRef": {
        "name": "QuickRef — AI Document Question Answering System",
        "year": "2025",
        "status": "Completed.",
        "award": None,
        "detail": (
            "QuickRef is a full retrieval-augmented generation (RAG) pipeline built from "
            "scratch using LangChain and Python.\n\n"
            "Documents — PDFs via PyPDFLoader, web pages via UnstructuredURLLoader — are "
            "split into chunks using RecursiveCharacterTextSplitter, then embedded with "
            "NVIDIA BGE-M3 and stored in a FAISS vector index.\n\n"
            "User questions are embedded and matched against the index via semantic similarity "
            "search. Top matching chunks are passed to NVIDIA Falcon-7B-Instruct via "
            "RetrievalQAWithSourcesChain, which generates a grounded answer with source "
            "citations included in the response.\n\n"
            "A Streamlit interface provides a clean UI for uploading documents or entering "
            "URLs, submitting questions, and viewing cited answers."
        ),
        "tech_stack": (
            "Python, LangChain, FAISS, NVIDIA BGE-M3, Falcon-7B-Instruct, "
            "Streamlit, PyPDFLoader, UnstructuredURLLoader, "
            "RecursiveCharacterTextSplitter, RetrievalQAWithSourcesChain"
        ),
        "search_text": (
            "QuickRef RAG retrieval augmented generation document question answering "
            "PDF URL web page LangChain FAISS vector search NVIDIA BGE-M3 embeddings "
            "Falcon-7B-Instruct semantic search source citations Streamlit UI "
            "PyPDFLoader UnstructuredURLLoader text chunking Python NLP "
            "knowledge extraction question answering AI document chat"
        ),
    },

    "AnoNote": {
        "name": "AnoNote — Anonymous Messaging Platform with NLP Harm Detection",
        "year": "2024",
        "status": "Completed.",
        "award": None,
        "detail": (
            "AnoNote lets users create a unique shareable link. Anyone can send them an "
            "anonymous message through that link — the sender's identity is never revealed.\n\n"
            "Every incoming message passes through a multilingual harm detection pipeline. "
            "The pipeline uses four ML models built with TF-IDF vectorisation: one for "
            "English, one for Sinhala, one for Singlish, and one English-Singlish language "
            "classifier that routes messages to the correct detection model.\n\n"
            "A custom SpaCy pipeline provides an additional layer for predefined harmful "
            "term detection. All models are deployed on Hugging Face and served via a "
            "FastAPI inference backend.\n\n"
            "The platform is built on the MERN stack (MongoDB, Express.js, React.js, Node.js) "
            "with FastAPI handling all NLP inference and SpaCy pipeline calls."
        ),
        "tech_stack": (
            "React.js, Node.js, Express.js, MongoDB, "
            "FastAPI, TF-IDF, SpaCy, Hugging Face, Python, NLP, Machine Learning"
        ),
        "search_text": (
            "AnoNote anonymous messaging platform NLP harm detection toxic content "
            "multilingual English Sinhala Singlish TF-IDF text classification SpaCy "
            "custom pipeline content moderation Hugging Face models FastAPI inference "
            "MERN stack MongoDB Express React Node.js social media safety "
            "code switching Sri Lankan languages machine learning anonymous notes"
        ),
    },

    "CropDisease": {
        "name": "Crop Disease Detection System",
        "year": "2024",
        "status": "Completed.",
        "award": None,
        "detail": (
            "A deep learning system with three CNN models trained using TensorFlow.\n\n"
            "Crop Classifier: Identifies whether the uploaded image is a potato or bell "
            "pepper leaf, then routes it to the correct disease detection model.\n\n"
            "Potato Disease Prediction: Detects and classifies potato leaf diseases "
            "including early blight, late blight, and healthy.\n\n"
            "Bell Pepper Disease Prediction: Identifies diseases in bell pepper leaves "
            "with high classification accuracy.\n\n"
            "The React.js frontend provides an image upload interface and displays the "
            "prediction result, disease description, actionable care tips, and fertiliser "
            "recommendations. The FastAPI backend handles image processing, model routing, "
            "and inference."
        ),
        "tech_stack": (
            "TensorFlow, Python, CNN, Deep Learning, React.js, FastAPI"
        ),
        "search_text": (
            "crop disease detection deep learning CNN convolutional neural network "
            "TensorFlow potato bell pepper leaf disease classification image recognition "
            "plant disease agriculture AI React FastAPI full stack web application "
            "fertiliser recommendation early blight late blight classification "
            "image upload prediction machine learning computer vision agriculture"
        ),
    },

}


# ── Helper: get by name ───────────────────────────────────────────────────────

def get_by_names(names: list[str]) -> str:
    """
    Return formatted detail for a list of project names.
    Matched case-insensitively. Returns a prompt-ready string.
    """
    name_map = {k.lower(): k for k in PROJECTS}
    blocks = []

    for name in names:
        key = name_map.get(name.strip().lower())
        if key:
            p = PROJECTS[key]
            block = (
                f"### {p['name']} ({p['year']})\n"
                f"**Status**: {p['status']}\n"
                + (f"**Award**: {p['award']}\n" if p["award"] else "")
                + f"\n{p['detail']}\n\n"
                f"**Tech stack**: {p['tech_stack']}"
            )
            blocks.append(block)
        else:
            blocks.append(
                f"### {name}\n"
                f"No detailed info found. The known projects are: "
                f"{', '.join(PROJECTS.keys())}."
            )

    return "\n\n---\n\n".join(blocks)


# ── Helper: get all texts for FAISS indexing ──────────────────────────────────

def get_all_search_texts() -> list[tuple[str, str]]:
    """
    Returns list of (project_key, search_text) for FAISS index build at startup.
    Only called once — in agent.py build_index().
    """
    return [(key, proj["search_text"]) for key, proj in PROJECTS.items()]