Spaces:

Sulitha
/

sulitha-agent

Paused

App Files Files Community

sulitha-agent / projects_data.py

Sulitha

use gpt-4.1-nano for response validation instead of Gemini

ea686e1 about 2 months ago

raw

history blame contribute delete

14.4 kB

	"""
	projects_data.py
	================
	Full project detail — never sent automatically.
	Only fetched when a tool is called by the LLM.

	Two tools use this file:

	get_project_by_name(names: list[str])
	Direct dict lookup by project key.
	Used when the user mentions a project by name clearly.
	Example: "tell me about Sofia" -> get_project_by_name(["Sofia"])

	search_projects(query: str)
	Semantic FAISS search over each project's search_text field.
	Used when the user describes something without naming a project.
	Example: "what games did you build?" -> search_projects("games motion voice")

	FAISS index at startup only embeds the 6 search_text strings — tiny and fast.

	TO ADD A NEW PROJECT:
	1. Copy any existing entry as a template.
	2. Fill in all fields, especially search_text (make it rich with synonyms).
	3. Redeploy — FAISS rebuilds automatically at startup.

	TO UPDATE AN EXISTING PROJECT:
	1. Edit the entry below.
	2. Redeploy.
	"""

	PROJECTS: dict[str, dict] = {

	"Sofia": {
	"name": "Sofia — 3D Intelligent Virtual AI Agent",
	"year": "2025",
	"status": "Completed. Exhibited at ReXtro 2025.",
	"award": None,
	"detail": (
	"Sofia is my most advanced project — a fully multi-agent AI virtual assistant "
	"with a 3D animated character presence available through both a web interface "
	"and an AR application.\n\n"
	"Built on Google ADK as a multi-agent system, Sofia delivers real-time "
	"lip-synced responses generated by Gemini. Microsoft Azure STT handles "
	"speech-to-text input and Azure TTS handles text-to-speech output, making "
	"her fully voice-interactive.\n\n"
	"A dedicated computer vision module continuously analyses the person she is "
	"speaking with — estimating gender, approximate age, and emotional state — to "
	"personalise every response in real time. Live weather API data adds "
	"environmental context.\n\n"
	"Designed as a customer assistant for an ice cream shop: Sofia recommends "
	"products via real-time customer clustering, logs emotion and satisfaction data "
	"to MongoDB throughout each interaction, and surfaces aggregated insights on a "
	"manager dashboard to support service decisions. SQLite handles fast in-agent "
	"state retrieval. A custom model I trained is hosted on Hugging Face. "
	"The web frontend is React and TypeScript, hosted on Railway during ReXtro 2025. "
	"The AR app is built in Unity."
	),
	"tech_stack": (
	"React, TypeScript, FastAPI, Google ADK, Gemini API, "
	"Microsoft Azure STT, Microsoft Azure TTS, Computer Vision, "
	"MongoDB, SQLite, Unity (AR), Hugging Face, Railway"
	),
	"search_text": (
	"Sofia 3D AI virtual agent AR augmented reality Unity web interface React TypeScript "
	"Google ADK Gemini multi-agent real-time lip sync computer vision facial analysis "
	"emotion detection gender age estimation personalisation customer assistant ice cream "
	"shop clustering manager dashboard MongoDB SQLite FastAPI Hugging Face Azure STT TTS "
	"voice interaction weather API ReXtro 2025 exhibition animated character agentic AI "
	"LLM virtual assistant customer service"
	),
	},

	"MotionX": {
	"name": "MotionX — Motion and Voice Controlled Original Game Series",
	"year": "2025",
	"status": "Completed. Exhibited at ReXtro 2025.",
	"award": None,
	"detail": (
	"MotionX is a series of three original games I built, each controlled entirely "
	"through player body movement and/or voice via a real-time computer vision and "
	"speech detection pipeline. No keyboard, no mouse — the player's body is the "
	"controller.\n\n"
	"LumRun: An endless runner set in a cemetery. The player runs in place to control "
	"the character — the faster they run, the faster the game goes.\n\n"
	"Music Bubble Runner: The player runs to intercept musical bubbles mid-air. "
	"Each bubble popped generates a sound, creating a live reactive soundtrack "
	"driven entirely by the player's movement.\n\n"
	"CWL (Charms and Wands League): A Harry Potter-inspired game. The in-game wand "
	"syncs in real time with the player's physical wand hand movements via pose "
	"estimation. A voice detection system identifies the charm spoken aloud — such as "
	"Expelliarmus or Lumos — and triggers the corresponding in-game visual effect.\n\n"
	"All three games were exhibited alongside Sofia at ReXtro 2025, Faculty of "
	"Engineering, University of Ruhuna, December 2025."
	),
	"tech_stack": (
	"Python, Computer Vision, Pose Estimation, "
	"Speech Detection, Real-time Body Tracking"
	),
	"search_text": (
	"MotionX motion controlled games voice controlled games body movement gesture control "
	"no keyboard no mouse computer vision pose estimation speech detection voice recognition "
	"LumRun cemetery endless runner Music Bubble Runner music reactive gameplay "
	"CWL Charms Wands League Harry Potter wand spell charm Expelliarmus Lumos "
	"real-time body tracking Python ReXtro 2025 interactive games original game "
	"human computer interaction physical game controller immersive gaming"
	),
	},

	"Groceria": {
	"name": "Groceria — Multi-Agent Smart Shopping Planning System",
	"year": "2025",
	"status": "Completed. SLAIC AI Challenge 2025 Finalist.",
	"award": "Finalist — SLAIC AI Challenge 2025",
	"detail": (
	"Groceria is a cooperative multi-agent system that generates fully optimised "
	"grocery shopping plans.\n\n"
	"Agents perform real-time web scraping of vendor product pages. Scraped data is "
	"cached in an automatically self-updating database that refreshes only after a "
	"configurable expiry window — always working with fresh prices without unnecessary "
	"re-scraping.\n\n"
	"The planning pipeline reasons jointly over: user food preferences and dietary "
	"requirements, product availability across vendors, delivery costs per vendor, "
	"budget constraints, and applicable credit card-specific discounts. The output is "
	"the most cost-effective, preference-aligned shopping plan from the given list.\n\n"
	"Frontend built in Next.js, backend in FastAPI, agents orchestrated with Google ADK."
	),
	"tech_stack": (
	"Next.js, FastAPI, Google ADK, Python, "
	"Real-time Web Scraper, Auto-updating Product DB, Multi-Agent Reasoning"
	),
	"search_text": (
	"Groceria multi-agent grocery shopping planner optimised shopping list web scraping "
	"real-time product prices vendor scraper auto-updating database cache expiry "
	"Google ADK FastAPI Next.js budget optimisation delivery cost reasoning "
	"credit card discount preference filtering cooperative agents agent coordination "
	"SLAIC AI Challenge finalist 2025 food shopping optimiser planning system"
	),
	},

	"QuickRef": {
	"name": "QuickRef — AI Document Question Answering System",
	"year": "2025",
	"status": "Completed.",
	"award": None,
	"detail": (
	"QuickRef is a full retrieval-augmented generation (RAG) pipeline built from "
	"scratch using LangChain and Python.\n\n"
	"Documents — PDFs via PyPDFLoader, web pages via UnstructuredURLLoader — are "
	"split into chunks using RecursiveCharacterTextSplitter, then embedded with "
	"NVIDIA BGE-M3 and stored in a FAISS vector index.\n\n"
	"User questions are embedded and matched against the index via semantic similarity "
	"search. Top matching chunks are passed to NVIDIA Falcon-7B-Instruct via "
	"RetrievalQAWithSourcesChain, which generates a grounded answer with source "
	"citations included in the response.\n\n"
	"A Streamlit interface provides a clean UI for uploading documents or entering "
	"URLs, submitting questions, and viewing cited answers."
	),
	"tech_stack": (
	"Python, LangChain, FAISS, NVIDIA BGE-M3, Falcon-7B-Instruct, "
	"Streamlit, PyPDFLoader, UnstructuredURLLoader, "
	"RecursiveCharacterTextSplitter, RetrievalQAWithSourcesChain"
	),
	"search_text": (
	"QuickRef RAG retrieval augmented generation document question answering "
	"PDF URL web page LangChain FAISS vector search NVIDIA BGE-M3 embeddings "
	"Falcon-7B-Instruct semantic search source citations Streamlit UI "
	"PyPDFLoader UnstructuredURLLoader text chunking Python NLP "
	"knowledge extraction question answering AI document chat"
	),
	},

	"AnoNote": {
	"name": "AnoNote — Anonymous Messaging Platform with NLP Harm Detection",
	"year": "2024",
	"status": "Completed.",
	"award": None,
	"detail": (
	"AnoNote lets users create a unique shareable link. Anyone can send them an "
	"anonymous message through that link — the sender's identity is never revealed.\n\n"
	"Every incoming message passes through a multilingual harm detection pipeline. "
	"The pipeline uses four ML models built with TF-IDF vectorisation: one for "
	"English, one for Sinhala, one for Singlish, and one English-Singlish language "
	"classifier that routes messages to the correct detection model.\n\n"
	"A custom SpaCy pipeline provides an additional layer for predefined harmful "
	"term detection. All models are deployed on Hugging Face and served via a "
	"FastAPI inference backend.\n\n"
	"The platform is built on the MERN stack (MongoDB, Express.js, React.js, Node.js) "
	"with FastAPI handling all NLP inference and SpaCy pipeline calls."
	),
	"tech_stack": (
	"React.js, Node.js, Express.js, MongoDB, "
	"FastAPI, TF-IDF, SpaCy, Hugging Face, Python, NLP, Machine Learning"
	),
	"search_text": (
	"AnoNote anonymous messaging platform NLP harm detection toxic content "
	"multilingual English Sinhala Singlish TF-IDF text classification SpaCy "
	"custom pipeline content moderation Hugging Face models FastAPI inference "
	"MERN stack MongoDB Express React Node.js social media safety "
	"code switching Sri Lankan languages machine learning anonymous notes"
	),
	},

	"CropDisease": {
	"name": "Crop Disease Detection System",
	"year": "2024",
	"status": "Completed.",
	"award": None,
	"detail": (
	"A deep learning system with three CNN models trained using TensorFlow.\n\n"
	"Crop Classifier: Identifies whether the uploaded image is a potato or bell "
	"pepper leaf, then routes it to the correct disease detection model.\n\n"
	"Potato Disease Prediction: Detects and classifies potato leaf diseases "
	"including early blight, late blight, and healthy.\n\n"
	"Bell Pepper Disease Prediction: Identifies diseases in bell pepper leaves "
	"with high classification accuracy.\n\n"
	"The React.js frontend provides an image upload interface and displays the "
	"prediction result, disease description, actionable care tips, and fertiliser "
	"recommendations. The FastAPI backend handles image processing, model routing, "
	"and inference."
	),
	"tech_stack": (
	"TensorFlow, Python, CNN, Deep Learning, React.js, FastAPI"
	),
	"search_text": (
	"crop disease detection deep learning CNN convolutional neural network "
	"TensorFlow potato bell pepper leaf disease classification image recognition "
	"plant disease agriculture AI React FastAPI full stack web application "
	"fertiliser recommendation early blight late blight classification "
	"image upload prediction machine learning computer vision agriculture"
	),
	},

	}


	# ── Helper: get by name ───────────────────────────────────────────────────────

	def get_by_names(names: list[str]) -> str:
	"""
	Return formatted detail for a list of project names.
	Matched case-insensitively. Returns a prompt-ready string.
	"""
	name_map = {k.lower(): k for k in PROJECTS}
	blocks = []

	for name in names:
	key = name_map.get(name.strip().lower())
	if key:
	p = PROJECTS[key]
	block = (
	f"### {p['name']} ({p['year']})\n"
	f"Status: {p['status']}\n"
	+ (f"Award: {p['award']}\n" if p["award"] else "")
	+ f"\n{p['detail']}\n\n"
	f"Tech stack: {p['tech_stack']}"
	)
	blocks.append(block)
	else:
	blocks.append(
	f"### {name}\n"
	f"No detailed info found. The known projects are: "
	f"{', '.join(PROJECTS.keys())}."
	)

	return "\n\n---\n\n".join(blocks)


	# ── Helper: get all texts for FAISS indexing ──────────────────────────────────

	def get_all_search_texts() -> list[tuple[str, str]]:
	"""
	Returns list of (project_key, search_text) for FAISS index build at startup.
	Only called once — in agent.py build_index().
	"""
	return [(key, proj["search_text"]) for key, proj in PROJECTS.items()]