sulitha-agent / projects_data.py
Sulitha's picture
use gpt-4.1-nano for response validation instead of Gemini
ea686e1
"""
projects_data.py
================
Full project detail β€” never sent automatically.
Only fetched when a tool is called by the LLM.
Two tools use this file:
get_project_by_name(names: list[str])
Direct dict lookup by project key.
Used when the user mentions a project by name clearly.
Example: "tell me about Sofia" -> get_project_by_name(["Sofia"])
search_projects(query: str)
Semantic FAISS search over each project's search_text field.
Used when the user describes something without naming a project.
Example: "what games did you build?" -> search_projects("games motion voice")
FAISS index at startup only embeds the 6 search_text strings β€” tiny and fast.
TO ADD A NEW PROJECT:
1. Copy any existing entry as a template.
2. Fill in all fields, especially search_text (make it rich with synonyms).
3. Redeploy β€” FAISS rebuilds automatically at startup.
TO UPDATE AN EXISTING PROJECT:
1. Edit the entry below.
2. Redeploy.
"""
PROJECTS: dict[str, dict] = {
"Sofia": {
"name": "Sofia β€” 3D Intelligent Virtual AI Agent",
"year": "2025",
"status": "Completed. Exhibited at ReXtro 2025.",
"award": None,
"detail": (
"Sofia is my most advanced project β€” a fully multi-agent AI virtual assistant "
"with a 3D animated character presence available through both a web interface "
"and an AR application.\n\n"
"Built on Google ADK as a multi-agent system, Sofia delivers real-time "
"lip-synced responses generated by Gemini. Microsoft Azure STT handles "
"speech-to-text input and Azure TTS handles text-to-speech output, making "
"her fully voice-interactive.\n\n"
"A dedicated computer vision module continuously analyses the person she is "
"speaking with β€” estimating gender, approximate age, and emotional state β€” to "
"personalise every response in real time. Live weather API data adds "
"environmental context.\n\n"
"Designed as a customer assistant for an ice cream shop: Sofia recommends "
"products via real-time customer clustering, logs emotion and satisfaction data "
"to MongoDB throughout each interaction, and surfaces aggregated insights on a "
"manager dashboard to support service decisions. SQLite handles fast in-agent "
"state retrieval. A custom model I trained is hosted on Hugging Face. "
"The web frontend is React and TypeScript, hosted on Railway during ReXtro 2025. "
"The AR app is built in Unity."
),
"tech_stack": (
"React, TypeScript, FastAPI, Google ADK, Gemini API, "
"Microsoft Azure STT, Microsoft Azure TTS, Computer Vision, "
"MongoDB, SQLite, Unity (AR), Hugging Face, Railway"
),
"search_text": (
"Sofia 3D AI virtual agent AR augmented reality Unity web interface React TypeScript "
"Google ADK Gemini multi-agent real-time lip sync computer vision facial analysis "
"emotion detection gender age estimation personalisation customer assistant ice cream "
"shop clustering manager dashboard MongoDB SQLite FastAPI Hugging Face Azure STT TTS "
"voice interaction weather API ReXtro 2025 exhibition animated character agentic AI "
"LLM virtual assistant customer service"
),
},
"MotionX": {
"name": "MotionX β€” Motion and Voice Controlled Original Game Series",
"year": "2025",
"status": "Completed. Exhibited at ReXtro 2025.",
"award": None,
"detail": (
"MotionX is a series of three original games I built, each controlled entirely "
"through player body movement and/or voice via a real-time computer vision and "
"speech detection pipeline. No keyboard, no mouse β€” the player's body is the "
"controller.\n\n"
"LumRun: An endless runner set in a cemetery. The player runs in place to control "
"the character β€” the faster they run, the faster the game goes.\n\n"
"Music Bubble Runner: The player runs to intercept musical bubbles mid-air. "
"Each bubble popped generates a sound, creating a live reactive soundtrack "
"driven entirely by the player's movement.\n\n"
"CWL (Charms and Wands League): A Harry Potter-inspired game. The in-game wand "
"syncs in real time with the player's physical wand hand movements via pose "
"estimation. A voice detection system identifies the charm spoken aloud β€” such as "
"Expelliarmus or Lumos β€” and triggers the corresponding in-game visual effect.\n\n"
"All three games were exhibited alongside Sofia at ReXtro 2025, Faculty of "
"Engineering, University of Ruhuna, December 2025."
),
"tech_stack": (
"Python, Computer Vision, Pose Estimation, "
"Speech Detection, Real-time Body Tracking"
),
"search_text": (
"MotionX motion controlled games voice controlled games body movement gesture control "
"no keyboard no mouse computer vision pose estimation speech detection voice recognition "
"LumRun cemetery endless runner Music Bubble Runner music reactive gameplay "
"CWL Charms Wands League Harry Potter wand spell charm Expelliarmus Lumos "
"real-time body tracking Python ReXtro 2025 interactive games original game "
"human computer interaction physical game controller immersive gaming"
),
},
"Groceria": {
"name": "Groceria β€” Multi-Agent Smart Shopping Planning System",
"year": "2025",
"status": "Completed. SLAIC AI Challenge 2025 Finalist.",
"award": "Finalist β€” SLAIC AI Challenge 2025",
"detail": (
"Groceria is a cooperative multi-agent system that generates fully optimised "
"grocery shopping plans.\n\n"
"Agents perform real-time web scraping of vendor product pages. Scraped data is "
"cached in an automatically self-updating database that refreshes only after a "
"configurable expiry window β€” always working with fresh prices without unnecessary "
"re-scraping.\n\n"
"The planning pipeline reasons jointly over: user food preferences and dietary "
"requirements, product availability across vendors, delivery costs per vendor, "
"budget constraints, and applicable credit card-specific discounts. The output is "
"the most cost-effective, preference-aligned shopping plan from the given list.\n\n"
"Frontend built in Next.js, backend in FastAPI, agents orchestrated with Google ADK."
),
"tech_stack": (
"Next.js, FastAPI, Google ADK, Python, "
"Real-time Web Scraper, Auto-updating Product DB, Multi-Agent Reasoning"
),
"search_text": (
"Groceria multi-agent grocery shopping planner optimised shopping list web scraping "
"real-time product prices vendor scraper auto-updating database cache expiry "
"Google ADK FastAPI Next.js budget optimisation delivery cost reasoning "
"credit card discount preference filtering cooperative agents agent coordination "
"SLAIC AI Challenge finalist 2025 food shopping optimiser planning system"
),
},
"QuickRef": {
"name": "QuickRef β€” AI Document Question Answering System",
"year": "2025",
"status": "Completed.",
"award": None,
"detail": (
"QuickRef is a full retrieval-augmented generation (RAG) pipeline built from "
"scratch using LangChain and Python.\n\n"
"Documents β€” PDFs via PyPDFLoader, web pages via UnstructuredURLLoader β€” are "
"split into chunks using RecursiveCharacterTextSplitter, then embedded with "
"NVIDIA BGE-M3 and stored in a FAISS vector index.\n\n"
"User questions are embedded and matched against the index via semantic similarity "
"search. Top matching chunks are passed to NVIDIA Falcon-7B-Instruct via "
"RetrievalQAWithSourcesChain, which generates a grounded answer with source "
"citations included in the response.\n\n"
"A Streamlit interface provides a clean UI for uploading documents or entering "
"URLs, submitting questions, and viewing cited answers."
),
"tech_stack": (
"Python, LangChain, FAISS, NVIDIA BGE-M3, Falcon-7B-Instruct, "
"Streamlit, PyPDFLoader, UnstructuredURLLoader, "
"RecursiveCharacterTextSplitter, RetrievalQAWithSourcesChain"
),
"search_text": (
"QuickRef RAG retrieval augmented generation document question answering "
"PDF URL web page LangChain FAISS vector search NVIDIA BGE-M3 embeddings "
"Falcon-7B-Instruct semantic search source citations Streamlit UI "
"PyPDFLoader UnstructuredURLLoader text chunking Python NLP "
"knowledge extraction question answering AI document chat"
),
},
"AnoNote": {
"name": "AnoNote β€” Anonymous Messaging Platform with NLP Harm Detection",
"year": "2024",
"status": "Completed.",
"award": None,
"detail": (
"AnoNote lets users create a unique shareable link. Anyone can send them an "
"anonymous message through that link β€” the sender's identity is never revealed.\n\n"
"Every incoming message passes through a multilingual harm detection pipeline. "
"The pipeline uses four ML models built with TF-IDF vectorisation: one for "
"English, one for Sinhala, one for Singlish, and one English-Singlish language "
"classifier that routes messages to the correct detection model.\n\n"
"A custom SpaCy pipeline provides an additional layer for predefined harmful "
"term detection. All models are deployed on Hugging Face and served via a "
"FastAPI inference backend.\n\n"
"The platform is built on the MERN stack (MongoDB, Express.js, React.js, Node.js) "
"with FastAPI handling all NLP inference and SpaCy pipeline calls."
),
"tech_stack": (
"React.js, Node.js, Express.js, MongoDB, "
"FastAPI, TF-IDF, SpaCy, Hugging Face, Python, NLP, Machine Learning"
),
"search_text": (
"AnoNote anonymous messaging platform NLP harm detection toxic content "
"multilingual English Sinhala Singlish TF-IDF text classification SpaCy "
"custom pipeline content moderation Hugging Face models FastAPI inference "
"MERN stack MongoDB Express React Node.js social media safety "
"code switching Sri Lankan languages machine learning anonymous notes"
),
},
"CropDisease": {
"name": "Crop Disease Detection System",
"year": "2024",
"status": "Completed.",
"award": None,
"detail": (
"A deep learning system with three CNN models trained using TensorFlow.\n\n"
"Crop Classifier: Identifies whether the uploaded image is a potato or bell "
"pepper leaf, then routes it to the correct disease detection model.\n\n"
"Potato Disease Prediction: Detects and classifies potato leaf diseases "
"including early blight, late blight, and healthy.\n\n"
"Bell Pepper Disease Prediction: Identifies diseases in bell pepper leaves "
"with high classification accuracy.\n\n"
"The React.js frontend provides an image upload interface and displays the "
"prediction result, disease description, actionable care tips, and fertiliser "
"recommendations. The FastAPI backend handles image processing, model routing, "
"and inference."
),
"tech_stack": (
"TensorFlow, Python, CNN, Deep Learning, React.js, FastAPI"
),
"search_text": (
"crop disease detection deep learning CNN convolutional neural network "
"TensorFlow potato bell pepper leaf disease classification image recognition "
"plant disease agriculture AI React FastAPI full stack web application "
"fertiliser recommendation early blight late blight classification "
"image upload prediction machine learning computer vision agriculture"
),
},
}
# ── Helper: get by name ───────────────────────────────────────────────────────
def get_by_names(names: list[str]) -> str:
"""
Return formatted detail for a list of project names.
Matched case-insensitively. Returns a prompt-ready string.
"""
name_map = {k.lower(): k for k in PROJECTS}
blocks = []
for name in names:
key = name_map.get(name.strip().lower())
if key:
p = PROJECTS[key]
block = (
f"### {p['name']} ({p['year']})\n"
f"**Status**: {p['status']}\n"
+ (f"**Award**: {p['award']}\n" if p["award"] else "")
+ f"\n{p['detail']}\n\n"
f"**Tech stack**: {p['tech_stack']}"
)
blocks.append(block)
else:
blocks.append(
f"### {name}\n"
f"No detailed info found. The known projects are: "
f"{', '.join(PROJECTS.keys())}."
)
return "\n\n---\n\n".join(blocks)
# ── Helper: get all texts for FAISS indexing ──────────────────────────────────
def get_all_search_texts() -> list[tuple[str, str]]:
"""
Returns list of (project_key, search_text) for FAISS index build at startup.
Only called once β€” in agent.py build_index().
"""
return [(key, proj["search_text"]) for key, proj in PROJECTS.items()]