Spaces:
Sleeping
Sleeping
Commit ·
bc620e9
0
Parent(s):
Initial commit
Browse files- .gitattributes +2 -0
- .github/workflows/sync_to_hub.yml +19 -0
- .gitignore +24 -0
- Dockerfile +22 -0
- README.md +2 -0
- app.py +405 -0
- backend/database.py +38 -0
- backend/main.py +164 -0
- backend/portfolio.sqlite +0 -0
- backend/rag.py +88 -0
- backend/tools.py +120 -0
- backend/vector_store/chunks_metadata.pkl +0 -0
- backend/vector_store/faiss_index.bin +0 -0
- data/articles.json +44 -0
- data/certifications.json +9 -0
- data/education.json +14 -0
- data/experience.json +16 -0
- data/profile.json +13 -0
- data/projects.json +92 -0
- data/research.json +20 -0
- data/skills.json +37 -0
- data/summaries/about_summary.txt +1 -0
- data/summaries/articles_summary.txt +1 -0
- data/summaries/certifications_summary.txt +1 -0
- data/summaries/projects_summary.txt +1 -0
- data/summaries/research_summary.txt +1 -0
- data/summaries/skills_summary.txt +1 -0
- data/summaries/videos_summary.txt +1 -0
- data/videos.json +16 -0
- requirements.txt +8 -0
- scripts/create_vector_db.py +136 -0
- scripts/setup_db.py +149 -0
.gitattributes
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Auto detect text files and perform LF normalization
|
| 2 |
+
* text=auto
|
.github/workflows/sync_to_hub.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Sync to Hugging Face hub
|
| 2 |
+
on:
|
| 3 |
+
push:
|
| 4 |
+
branches: [main]
|
| 5 |
+
workflow_dispatch:
|
| 6 |
+
|
| 7 |
+
jobs:
|
| 8 |
+
sync-to-hub:
|
| 9 |
+
runs-on: ubuntu-latest
|
| 10 |
+
steps:
|
| 11 |
+
- uses: actions/checkout@v3
|
| 12 |
+
with:
|
| 13 |
+
fetch-depth: 0
|
| 14 |
+
lfs: true
|
| 15 |
+
- name: Push to hub
|
| 16 |
+
env:
|
| 17 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 18 |
+
# Added --force to overwrite the default Hugging Face README
|
| 19 |
+
run: git push --force https://tharu280:$HF_TOKEN@huggingface.co/spaces/tharu280/portfolio-rag-api main
|
.gitignore
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# --- Security (NEVER COMMIT THESE) ---
|
| 2 |
+
.env
|
| 3 |
+
firebase_credentials.json
|
| 4 |
+
*.pem
|
| 5 |
+
*.key
|
| 6 |
+
|
| 7 |
+
# --- Python & OS Junk ---
|
| 8 |
+
__pycache__/
|
| 9 |
+
*.pyc
|
| 10 |
+
venv/
|
| 11 |
+
.venv/
|
| 12 |
+
env/
|
| 13 |
+
.idea/
|
| 14 |
+
.vscode/
|
| 15 |
+
.DS_Store
|
| 16 |
+
|
| 17 |
+
# --- Hosting / Build ---
|
| 18 |
+
# We generally ignore build artifacts, but for your specific
|
| 19 |
+
# "Local Build -> Cloud Run" strategy, we DO want to commit
|
| 20 |
+
# your database and vector store so Render can see them.
|
| 21 |
+
#
|
| 22 |
+
# DO NOT UNCOMMENT THESE LINES unless you switch to an external DB:
|
| 23 |
+
# backend/portfolio.sqlite
|
| 24 |
+
# backend/vector_store/
|
Dockerfile
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 1. Use the specific Python version matching your local environment
|
| 2 |
+
FROM python:3.10.16-slim
|
| 3 |
+
|
| 4 |
+
# 2. Set the working directory inside the container
|
| 5 |
+
WORKDIR /code
|
| 6 |
+
|
| 7 |
+
# 3. Copy requirements first (for better caching)
|
| 8 |
+
COPY ./requirements.txt /code/requirements.txt
|
| 9 |
+
|
| 10 |
+
# 4. Install dependencies
|
| 11 |
+
# We use --no-cache-dir to keep the image small
|
| 12 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
| 13 |
+
|
| 14 |
+
# 5. Copy the rest of your application code
|
| 15 |
+
COPY . /code
|
| 16 |
+
|
| 17 |
+
# 6. CRITICAL: Give write permissions to the backend folder
|
| 18 |
+
# This allows SQLite to create lock files and update the DB
|
| 19 |
+
RUN chmod -R 777 /code/backend
|
| 20 |
+
|
| 21 |
+
# 7. Start the app on port 7860 (Hugging Face default)
|
| 22 |
+
CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# portfolio rag
|
| 2 |
+
|
app.py
ADDED
|
@@ -0,0 +1,405 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import requests
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# --- Configuration ---
|
| 8 |
+
API_URL = "http://127.0.0.1:8000/chat"
|
| 9 |
+
IMAGES_DIR = "images"
|
| 10 |
+
|
| 11 |
+
# --- Page Setup ---
|
| 12 |
+
st.set_page_config(
|
| 13 |
+
page_title="Tharushika | AI Portfolio",
|
| 14 |
+
page_icon="👋",
|
| 15 |
+
layout="centered",
|
| 16 |
+
initial_sidebar_state="collapsed"
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
st.markdown("""
|
| 21 |
+
<style>
|
| 22 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
|
| 23 |
+
|
| 24 |
+
html, body, .stApp {
|
| 25 |
+
background-color: #ffffff !important;
|
| 26 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Inter', sans-serif !important;
|
| 27 |
+
color: #1d1d1f !important;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
p, .stMarkdown, .stText {
|
| 31 |
+
color: #333333;
|
| 32 |
+
line-height: 1.6;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
h1, h2, h3, h4, h5, h6 {
|
| 36 |
+
font-weight: 600 !important;
|
| 37 |
+
letter-spacing: -0.02em !important;
|
| 38 |
+
color: #1d1d1f !important;
|
| 39 |
+
}
|
| 40 |
+
h1 { font-size: 2.8rem !important; margin-bottom: 0.5rem !important;}
|
| 41 |
+
h2 { font-size: 2.2rem !important; margin-top: 0.5rem !important;}
|
| 42 |
+
h3 { font-size: 1.5rem !important; margin-top: 2rem !important; margin-bottom: 1rem !important;}
|
| 43 |
+
h4 { font-size: 1.2rem !important; margin-top: 1rem !important;}
|
| 44 |
+
|
| 45 |
+
.centered-title h1, .centered-title h2 {
|
| 46 |
+
text-align: center;
|
| 47 |
+
width: 100%;
|
| 48 |
+
}
|
| 49 |
+
.centered-title h2 {
|
| 50 |
+
color: #6e6e73 !important;
|
| 51 |
+
font-weight: 500 !important;
|
| 52 |
+
font-size: 1.8rem !important;
|
| 53 |
+
}
|
| 54 |
+
.centered-profile-pic {
|
| 55 |
+
display: flex;
|
| 56 |
+
justify-content: center;
|
| 57 |
+
margin-top: 2rem;
|
| 58 |
+
margin-bottom: 2rem;
|
| 59 |
+
}
|
| 60 |
+
.centered-profile-pic img {
|
| 61 |
+
border-radius: 20px;
|
| 62 |
+
box-shadow: 0 4px 20px rgba(0,0,0,0.1);
|
| 63 |
+
border: 1px solid rgba(0,0,0,0.05);
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
.stChatMessage {
|
| 67 |
+
background-color: transparent !important;
|
| 68 |
+
border: none !important;
|
| 69 |
+
padding: 0.8rem 0 !important;
|
| 70 |
+
}
|
| 71 |
+
.stChatMessage [data-testid="stChatMessageContent"] {
|
| 72 |
+
background-color: #f5f5f7 !important;
|
| 73 |
+
border-radius: 18px !important;
|
| 74 |
+
padding: 0.8rem 1.2rem !important;
|
| 75 |
+
box-shadow: 0 1px 2px rgba(0,0,0,0.05);
|
| 76 |
+
color: #1d1d1f !important;
|
| 77 |
+
font-size: 1.0rem !important;
|
| 78 |
+
line-height: 1.5 !important;
|
| 79 |
+
}
|
| 80 |
+
.stChatMessage[data-testid="user-message"] [data-testid="stChatMessageContent"] {
|
| 81 |
+
background-color: #0071e3 !important;
|
| 82 |
+
color: white !important;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
div[data-testid="stVerticalBlock"] > [style*="flex-direction: column;"] > [data-testid="stVerticalBlock"] {
|
| 86 |
+
background-color: #ffffff;
|
| 87 |
+
border: 1px solid rgba(0,0,0,0.08);
|
| 88 |
+
border-radius: 16px !important;
|
| 89 |
+
padding: 20px !important;
|
| 90 |
+
box-shadow: 0 4px 12px rgba(0,0,0,0.03);
|
| 91 |
+
transition: transform 0.2s ease, box-shadow 0.2s ease;
|
| 92 |
+
margin-bottom: 15px;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
div[data-testid="stVerticalBlock"] > [style*="flex-direction: column;"] > [data-testid="stVerticalBlock"]:hover {
|
| 96 |
+
transform: translateY(-2px);
|
| 97 |
+
box-shadow: 0 10px 25px rgba(0,0,0,0.08);
|
| 98 |
+
border-color: rgba(0,0,0,0.15);
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
.stButton > button {
|
| 102 |
+
border-radius: 12px !important;
|
| 103 |
+
font-weight: 500 !important;
|
| 104 |
+
border: 1px solid #d2d2d7 !important;
|
| 105 |
+
background-color: #ffffff !important;
|
| 106 |
+
color: #1d1d1f !important;
|
| 107 |
+
padding: 0.6rem 1rem !important;
|
| 108 |
+
transition: all 0.2s !important;
|
| 109 |
+
box-shadow: 0 1px 3px rgba(0,0,0,0.05);
|
| 110 |
+
width: 100%;
|
| 111 |
+
}
|
| 112 |
+
.stButton > button:hover {
|
| 113 |
+
background-color: #f5f5f7 !important;
|
| 114 |
+
border-color: #c0c0c5 !important;
|
| 115 |
+
transform: translateY(-1px);
|
| 116 |
+
box-shadow: 0 2px 6px rgba(0,0,0,0.1);
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
.stTextInput input {
|
| 120 |
+
border-radius: 12px !important;
|
| 121 |
+
border: 1px solid #d2d2d7 !important;
|
| 122 |
+
padding: 12px 15px !important;
|
| 123 |
+
font-size: 1rem !important;
|
| 124 |
+
background-color: rgba(255,255,255,0.8) !important;
|
| 125 |
+
backdrop-filter: blur(10px);
|
| 126 |
+
color: #1d1d1f !important;
|
| 127 |
+
box-shadow: 0 1px 3px rgba(0,0,0,0.05);
|
| 128 |
+
}
|
| 129 |
+
.stTextInput input:focus {
|
| 130 |
+
border-color: #0071e3 !important;
|
| 131 |
+
box-shadow: 0 0 0 4px rgba(0,113,227,0.15) !important;
|
| 132 |
+
}
|
| 133 |
+
div.stChatInputContainer {
|
| 134 |
+
padding-top: 15px;
|
| 135 |
+
background-color: #ffffff;
|
| 136 |
+
padding-bottom: 1rem;
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
a { color: #0071e3 !important; text-decoration: none !important; }
|
| 140 |
+
a:hover { text-decoration: underline !important; }
|
| 141 |
+
|
| 142 |
+
#MainMenu {visibility: hidden;}
|
| 143 |
+
footer {visibility: hidden;}
|
| 144 |
+
header {visibility: hidden;}
|
| 145 |
+
|
| 146 |
+
.quick-action-button .stButton > button {
|
| 147 |
+
border-radius: 999px !important;
|
| 148 |
+
padding: 0.8rem 1.5rem !important;
|
| 149 |
+
width: auto;
|
| 150 |
+
}
|
| 151 |
+
.quick-action-button {
|
| 152 |
+
display: flex;
|
| 153 |
+
justify-content: center;
|
| 154 |
+
margin-top: 2rem;
|
| 155 |
+
gap: 15px;
|
| 156 |
+
flex-wrap: wrap;
|
| 157 |
+
}
|
| 158 |
+
</style>
|
| 159 |
+
""", unsafe_allow_html=True)
|
| 160 |
+
|
| 161 |
+
# --- Helper Functions ---
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def render_projects(data):
|
| 165 |
+
st.markdown("### Featured Projects")
|
| 166 |
+
if not data:
|
| 167 |
+
st.info("No projects data received.")
|
| 168 |
+
return
|
| 169 |
+
cols = st.columns(2)
|
| 170 |
+
for i, proj in enumerate(data):
|
| 171 |
+
with cols[i % 2]:
|
| 172 |
+
with st.container(border=True):
|
| 173 |
+
img_path = proj.get("image_path", "")
|
| 174 |
+
if img_path and os.path.exists(img_path):
|
| 175 |
+
st.image(img_path, use_container_width=True)
|
| 176 |
+
else:
|
| 177 |
+
st.markdown(f"""<div style='height:140px; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); border-radius: 12px; display:flex; align-items:center; justify-content:center; color:#666;'>No Image</div>""", unsafe_allow_html=True)
|
| 178 |
+
|
| 179 |
+
st.markdown(f"#### {proj.get('title', 'Untitled')}")
|
| 180 |
+
st.caption(proj.get('type', 'Project').upper())
|
| 181 |
+
|
| 182 |
+
with st.expander("View Details"):
|
| 183 |
+
st.write(proj.get('description', ''))
|
| 184 |
+
st.markdown(
|
| 185 |
+
f"**Tech Stack:** {proj.get('technologies', '')}")
|
| 186 |
+
links = []
|
| 187 |
+
if proj.get('github_url'):
|
| 188 |
+
links.append(f"[GitHub]({proj.get('github_url')})")
|
| 189 |
+
if proj.get('demo_url'):
|
| 190 |
+
links.append(f"[Live Demo]({proj.get('demo_url')})")
|
| 191 |
+
if links:
|
| 192 |
+
st.markdown(" • ".join(links))
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
def render_skills(data):
|
| 196 |
+
st.markdown("### Skills & Expertise")
|
| 197 |
+
if not data:
|
| 198 |
+
st.info("No skills data received.")
|
| 199 |
+
return
|
| 200 |
+
for category, skills in data.items():
|
| 201 |
+
with st.container(border=True):
|
| 202 |
+
st.markdown(f"**{category}**")
|
| 203 |
+
badges = "".join(
|
| 204 |
+
[f"<span style='background:#f5f5f7; padding:4px 10px; border-radius:12px; margin:0 5px 5px 0; display:inline-block; font-size:0.85rem;'>{s}</span>" for s in skills])
|
| 205 |
+
st.markdown(badges, unsafe_allow_html=True)
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def render_articles(data):
|
| 209 |
+
st.markdown("### Articles")
|
| 210 |
+
if not data:
|
| 211 |
+
st.info("No articles found.")
|
| 212 |
+
return
|
| 213 |
+
for item in data:
|
| 214 |
+
with st.container(border=True):
|
| 215 |
+
st.markdown(f"**{item.get('title', 'Untitled')}**")
|
| 216 |
+
st.markdown(
|
| 217 |
+
f"<p style='color:#666; font-size:0.9rem;'>{item.get('description', '')}</p>", unsafe_allow_html=True)
|
| 218 |
+
if item.get('url'):
|
| 219 |
+
st.markdown(f"[Read Article ›]({item['url']})")
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
def render_videos(data):
|
| 223 |
+
st.markdown("### Video Tutorials")
|
| 224 |
+
if not data:
|
| 225 |
+
st.info("No videos found.")
|
| 226 |
+
return
|
| 227 |
+
cols = st.columns(2)
|
| 228 |
+
for i, item in enumerate(data):
|
| 229 |
+
with cols[i % 2]:
|
| 230 |
+
with st.container(border=True):
|
| 231 |
+
thumb = item.get('thumbnail_url', "")
|
| 232 |
+
if thumb and os.path.exists(thumb):
|
| 233 |
+
st.image(thumb, use_container_width=True)
|
| 234 |
+
st.markdown(f"**{item.get('title', 'Untitled')}**")
|
| 235 |
+
st.markdown(
|
| 236 |
+
f"<p style='color:#666; font-size:0.9rem;'>{item.get('description', '')}</p>", unsafe_allow_html=True)
|
| 237 |
+
if item.get('url'):
|
| 238 |
+
st.markdown(f"[Watch on YouTube ›]({item['url']})")
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def render_research(data):
|
| 242 |
+
st.markdown("### Research")
|
| 243 |
+
if not data:
|
| 244 |
+
st.info("No research found.")
|
| 245 |
+
return
|
| 246 |
+
for item in data:
|
| 247 |
+
with st.container(border=True):
|
| 248 |
+
st.markdown(f"**{item.get('title', 'Untitled')}**")
|
| 249 |
+
st.markdown(
|
| 250 |
+
f"<p style='color:#666; font-size:0.9rem;'>{item.get('description', '')}</p>", unsafe_allow_html=True)
|
| 251 |
+
if item.get('url'):
|
| 252 |
+
st.markdown(f"[View Publication ›]({item['url']})")
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
def render_certifications(data):
|
| 256 |
+
st.markdown("### Certifications")
|
| 257 |
+
if not data:
|
| 258 |
+
st.info("No certifications found.")
|
| 259 |
+
return
|
| 260 |
+
for item in data:
|
| 261 |
+
st.markdown(f"""
|
| 262 |
+
<div style='display:flex; align-items:center; margin-bottom:10px;'>
|
| 263 |
+
<span style='font-size:1.2rem; margin-right:10px;'>🎖️</span>
|
| 264 |
+
<span style='font-size:1rem; font-weight:500;'>{item}</span>
|
| 265 |
+
</div>
|
| 266 |
+
""", unsafe_allow_html=True)
|
| 267 |
+
|
| 268 |
+
# --- NEW: Resume Renderer ---
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
def render_resume(data):
|
| 272 |
+
st.markdown("### 📄 Resume / CV")
|
| 273 |
+
|
| 274 |
+
col1, col2 = st.columns([1, 2])
|
| 275 |
+
|
| 276 |
+
with col1:
|
| 277 |
+
preview_path = data.get("preview_image", "")
|
| 278 |
+
if preview_path and os.path.exists(preview_path):
|
| 279 |
+
st.image(preview_path, caption="Preview", use_container_width=True)
|
| 280 |
+
else:
|
| 281 |
+
st.markdown("""
|
| 282 |
+
<div style="height: 200px; background-color: #f5f5f7; border-radius: 12px; display: flex; align-items: center; justify-content: center;">
|
| 283 |
+
<span style="font-size: 3rem;">📄</span>
|
| 284 |
+
</div>
|
| 285 |
+
""", unsafe_allow_html=True)
|
| 286 |
+
|
| 287 |
+
with col2:
|
| 288 |
+
st.markdown(f"#### {data.get('title', 'Resume')}")
|
| 289 |
+
st.write(data.get('description', ''))
|
| 290 |
+
|
| 291 |
+
pdf_path = data.get("file_path", "")
|
| 292 |
+
if pdf_path and os.path.exists(pdf_path):
|
| 293 |
+
with open(pdf_path, "rb") as pdf_file:
|
| 294 |
+
pdf_bytes = pdf_file.read()
|
| 295 |
+
|
| 296 |
+
st.download_button(
|
| 297 |
+
label="📥 Download Resume (PDF)",
|
| 298 |
+
data=pdf_bytes,
|
| 299 |
+
file_name="Tharushika_Abedheera_Resume.pdf",
|
| 300 |
+
mime="application/pdf",
|
| 301 |
+
)
|
| 302 |
+
else:
|
| 303 |
+
st.error("Resume file not found.")
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def render_content(data):
|
| 307 |
+
st.markdown("### Content & Research")
|
| 308 |
+
if not data:
|
| 309 |
+
return
|
| 310 |
+
tab1, tab2, tab3 = st.tabs(["Articles", "Videos", "Research"])
|
| 311 |
+
with tab1:
|
| 312 |
+
render_articles(data.get('articles', []))
|
| 313 |
+
with tab2:
|
| 314 |
+
render_videos(data.get('videos', []))
|
| 315 |
+
with tab3:
|
| 316 |
+
render_research(data.get('research', []))
|
| 317 |
+
|
| 318 |
+
# --- Centralized Chat Logic Function ---
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
def process_chat_message(prompt):
|
| 322 |
+
with st.spinner("Processing..."):
|
| 323 |
+
try:
|
| 324 |
+
response = requests.post(API_URL, json={"message": prompt})
|
| 325 |
+
if response.status_code == 200:
|
| 326 |
+
api_data = response.json()
|
| 327 |
+
st.session_state.last_exchange = {
|
| 328 |
+
"user_query": prompt,
|
| 329 |
+
"ai_response": api_data.get("response", ""),
|
| 330 |
+
"tool_code": api_data.get("tool_code"),
|
| 331 |
+
"tool_data": api_data.get("tool_data")
|
| 332 |
+
}
|
| 333 |
+
else:
|
| 334 |
+
st.error(f"Backend Error: {response.status_code}")
|
| 335 |
+
except Exception as e:
|
| 336 |
+
st.error(f"Connection Failed: {e}")
|
| 337 |
+
st.rerun()
|
| 338 |
+
|
| 339 |
+
# --- Main Layout ---
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
if "last_exchange" not in st.session_state:
|
| 343 |
+
st.session_state.last_exchange = None
|
| 344 |
+
|
| 345 |
+
# --- Top Section: Profile and Introduction ---
|
| 346 |
+
if not st.session_state.last_exchange:
|
| 347 |
+
st.markdown("<div class='centered-profile-pic'>", unsafe_allow_html=True)
|
| 348 |
+
profile_pic_path = "images/profile.png"
|
| 349 |
+
if os.path.exists(profile_pic_path):
|
| 350 |
+
st.image(profile_pic_path, width=160)
|
| 351 |
+
else:
|
| 352 |
+
st.markdown(f"""<div style='height:160px; width:160px; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); border-radius: 20px; display:flex; align-items:center; justify-content:center; color:#666; font-size:0.8rem; margin: 0 auto;'>Add profile.png</div>""", unsafe_allow_html=True)
|
| 353 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 354 |
+
|
| 355 |
+
st.markdown("<div class='centered-title'>", unsafe_allow_html=True)
|
| 356 |
+
st.markdown("<h1>Hey, I'm Tharushika 👋</h1>", unsafe_allow_html=True)
|
| 357 |
+
st.markdown("<h2>Machine Learning Engineer</h2>", unsafe_allow_html=True)
|
| 358 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 359 |
+
|
| 360 |
+
st.markdown("<div class='quick-action-button'>", unsafe_allow_html=True)
|
| 361 |
+
if st.button("Me"):
|
| 362 |
+
process_chat_message("Tell me about yourself")
|
| 363 |
+
if st.button("Projects"):
|
| 364 |
+
process_chat_message("Show me your projects")
|
| 365 |
+
if st.button("Skills"):
|
| 366 |
+
process_chat_message("What are your skills?")
|
| 367 |
+
if st.button("Contact"):
|
| 368 |
+
process_chat_message("How can I contact you?")
|
| 369 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 370 |
+
|
| 371 |
+
if prompt := st.chat_input("Ask me anything..."):
|
| 372 |
+
process_chat_message(prompt)
|
| 373 |
+
|
| 374 |
+
# --- Conversation Area ---
|
| 375 |
+
if st.session_state.last_exchange:
|
| 376 |
+
exchange = st.session_state.last_exchange
|
| 377 |
+
|
| 378 |
+
with st.chat_message("user"):
|
| 379 |
+
st.write(exchange["user_query"])
|
| 380 |
+
|
| 381 |
+
with st.chat_message("assistant"):
|
| 382 |
+
st.write(exchange["ai_response"])
|
| 383 |
+
|
| 384 |
+
tool_code = exchange.get("tool_code")
|
| 385 |
+
tool_data = exchange.get("tool_data")
|
| 386 |
+
|
| 387 |
+
if tool_code == "show_projects":
|
| 388 |
+
render_projects(tool_data)
|
| 389 |
+
elif tool_code == "show_skills":
|
| 390 |
+
render_skills(tool_data)
|
| 391 |
+
elif tool_code == "show_content":
|
| 392 |
+
render_content(tool_data)
|
| 393 |
+
elif tool_code == "show_videos":
|
| 394 |
+
render_videos(tool_data)
|
| 395 |
+
elif tool_code == "show_articles":
|
| 396 |
+
render_articles(tool_data)
|
| 397 |
+
elif tool_code == "show_research":
|
| 398 |
+
render_research(tool_data)
|
| 399 |
+
elif tool_code == "show_certifications":
|
| 400 |
+
render_certifications(tool_data)
|
| 401 |
+
elif tool_code == "show_resume":
|
| 402 |
+
render_resume(tool_data) # <--- RESUME HANDLER ADDED
|
| 403 |
+
|
| 404 |
+
if prompt := st.chat_input("Ask for more details..."):
|
| 405 |
+
process_chat_message(prompt)
|
backend/database.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
from typing import List, Dict, Any, Union
|
| 5 |
+
|
| 6 |
+
DB_PATH = os.path.join(os.path.dirname(
|
| 7 |
+
os.path.abspath(__file__)), "portfolio.sqlite")
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def query_sqlite(table_name: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
|
| 11 |
+
"""Fetches all rows from a SQLite table and formats them."""
|
| 12 |
+
try:
|
| 13 |
+
conn = sqlite3.connect(DB_PATH)
|
| 14 |
+
conn.row_factory = sqlite3.Row
|
| 15 |
+
cursor = conn.cursor()
|
| 16 |
+
|
| 17 |
+
cursor.execute(f"SELECT * FROM {table_name}")
|
| 18 |
+
rows = [dict(row) for row in cursor.fetchall()]
|
| 19 |
+
conn.close()
|
| 20 |
+
|
| 21 |
+
if table_name == "skills":
|
| 22 |
+
formatted_skills = {}
|
| 23 |
+
for row in rows:
|
| 24 |
+
try:
|
| 25 |
+
# Parse the JSON string back to a list
|
| 26 |
+
skills_array = json.loads(row['skill_list'])
|
| 27 |
+
formatted_skills[row['category']] = skills_array
|
| 28 |
+
except:
|
| 29 |
+
formatted_skills[row['category']] = []
|
| 30 |
+
return formatted_skills
|
| 31 |
+
|
| 32 |
+
if table_name == "certifications":
|
| 33 |
+
return [row['name'] for row in rows]
|
| 34 |
+
|
| 35 |
+
return rows
|
| 36 |
+
except Exception as e:
|
| 37 |
+
print(f"Database Error: {e}")
|
| 38 |
+
return []
|
backend/main.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import google.generativeai as genai
|
| 4 |
+
from fastapi import FastAPI, HTTPException
|
| 5 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 6 |
+
from pydantic import BaseModel
|
| 7 |
+
from typing import List, Dict, Any, Union, Optional
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
|
| 10 |
+
# Import local modules using relative imports
|
| 11 |
+
# This requires running the app as 'uvicorn backend.main:app ...'
|
| 12 |
+
from . import rag
|
| 13 |
+
from . import tools
|
| 14 |
+
|
| 15 |
+
# 1. Load Environment Variables
|
| 16 |
+
# 'load_dotenv()' works for local dev. On Render/Cloud, it uses system env vars.
|
| 17 |
+
load_dotenv()
|
| 18 |
+
|
| 19 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
| 20 |
+
|
| 21 |
+
if not GEMINI_API_KEY:
|
| 22 |
+
print("⚠️ WARNING: GEMINI_API_KEY not found. Check your .env or Cloud Dashboard.")
|
| 23 |
+
# In production, we might want to crash if no key is found
|
| 24 |
+
# sys.exit(1)
|
| 25 |
+
|
| 26 |
+
genai.configure(api_key=GEMINI_API_KEY)
|
| 27 |
+
|
| 28 |
+
# Use the fast, free-tier friendly model
|
| 29 |
+
MODEL_NAME = 'gemini-1.5-flash'
|
| 30 |
+
|
| 31 |
+
# --- API Models ---
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class ChatRequest(BaseModel):
|
| 35 |
+
message: str
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class ChatResponse(BaseModel):
|
| 39 |
+
response: str
|
| 40 |
+
tool_code: Optional[str] = None
|
| 41 |
+
tool_data: Optional[Union[Dict[str, Any], List[Any]]] = None
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
# --- FastAPI App Setup ---
|
| 45 |
+
app = FastAPI(
|
| 46 |
+
title="Tharushika's AI Portfolio API",
|
| 47 |
+
description="Backend for AI Portfolio using Gemini & RAG",
|
| 48 |
+
version="1.0.0"
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# --- CORS MIDDLEWARE (Critical for React) ---
|
| 52 |
+
app.add_middleware(
|
| 53 |
+
CORSMiddleware,
|
| 54 |
+
# In production, replace ["*"] with your actual frontend URL (e.g., ["https://my-portfolio.vercel.app"])
|
| 55 |
+
allow_origins=["*"],
|
| 56 |
+
allow_credentials=True,
|
| 57 |
+
allow_methods=["*"], # Allows all methods (GET, POST, etc.)
|
| 58 |
+
allow_headers=["*"], # Allows all headers
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
@app.on_event("startup")
|
| 63 |
+
async def startup():
|
| 64 |
+
"""Initialize the RAG system when the server boots up."""
|
| 65 |
+
rag.initialize_rag()
|
| 66 |
+
|
| 67 |
+
# --- Endpoints ---
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
@app.get("/")
|
| 71 |
+
def health_check():
|
| 72 |
+
"""Simple check to see if the API is running."""
|
| 73 |
+
return {"status": "ok", "message": "Portfolio API is live"}
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
@app.post("/chat", response_model=ChatResponse)
|
| 77 |
+
async def chat_endpoint(request: ChatRequest):
|
| 78 |
+
user_msg = request.message
|
| 79 |
+
|
| 80 |
+
# 1. Initialize Gemini with our Toolkit
|
| 81 |
+
model = genai.GenerativeModel(MODEL_NAME, tools=tools.ALL_TOOLS_LIST)
|
| 82 |
+
|
| 83 |
+
try:
|
| 84 |
+
# 2. Ask Gemini the question
|
| 85 |
+
# We don't provide RAG context yet; we let the model decide if it needs a Tool first.
|
| 86 |
+
chat = model.start_chat(enable_automatic_function_calling=False)
|
| 87 |
+
response = chat.send_message(user_msg)
|
| 88 |
+
|
| 89 |
+
# 3. Check for Function Calls (The "Router")
|
| 90 |
+
function_call = None
|
| 91 |
+
if response.parts:
|
| 92 |
+
for part in response.parts:
|
| 93 |
+
if part.function_call:
|
| 94 |
+
function_call = part.function_call
|
| 95 |
+
break
|
| 96 |
+
|
| 97 |
+
# --- PATH A: Tool Triggered (Structured Data) ---
|
| 98 |
+
if function_call:
|
| 99 |
+
tool_name = function_call.name
|
| 100 |
+
print(f"🛠️ Tool Triggered: {tool_name}")
|
| 101 |
+
|
| 102 |
+
if tool_name in tools.TOOL_FUNCTIONS:
|
| 103 |
+
# A. Execute the Python function (e.g., query SQL)
|
| 104 |
+
data = tools.TOOL_FUNCTIONS[tool_name]()
|
| 105 |
+
|
| 106 |
+
# B. Get the frontend signal code (e.g., "show_projects")
|
| 107 |
+
code = tools.TOOL_CODE_MAP.get(tool_name)
|
| 108 |
+
|
| 109 |
+
# C. Generate a polite intro message using RAG context
|
| 110 |
+
# We retrieve a bit of context so the intro sounds personal
|
| 111 |
+
context_chunks = rag.retrieve_context(user_msg, k=2)
|
| 112 |
+
context_text = "\n".join(context_chunks)
|
| 113 |
+
|
| 114 |
+
intro_prompt = f"""
|
| 115 |
+
The user asked: '{user_msg}'.
|
| 116 |
+
You just triggered the tool '{tool_name}' to show them visual data.
|
| 117 |
+
Write a very short (1 sentence), enthusiastic intro for this data.
|
| 118 |
+
Use this context if relevant: {context_text}
|
| 119 |
+
"""
|
| 120 |
+
|
| 121 |
+
intro_model = genai.GenerativeModel(MODEL_NAME)
|
| 122 |
+
intro_resp = intro_model.generate_content(intro_prompt)
|
| 123 |
+
intro_text = intro_resp.text.strip()
|
| 124 |
+
|
| 125 |
+
return ChatResponse(
|
| 126 |
+
response=intro_text,
|
| 127 |
+
tool_code=code,
|
| 128 |
+
tool_data=data
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
# --- PATH B: Pure RAG Chat (Unstructured Context) ---
|
| 132 |
+
print("🧠 RAG Path Triggered")
|
| 133 |
+
|
| 134 |
+
# 1. Retrieve relevant text chunks from our Vector DB
|
| 135 |
+
context_chunks = rag.retrieve_context(user_msg, k=4)
|
| 136 |
+
context_text = "\n\n".join(context_chunks)
|
| 137 |
+
|
| 138 |
+
# 2. Construct the Prompt
|
| 139 |
+
rag_prompt = f"""
|
| 140 |
+
You are an AI assistant for Tharushika Abedheera's portfolio.
|
| 141 |
+
Your goal is to answer the user's question professionally and confidently, acting as Tharushika.
|
| 142 |
+
|
| 143 |
+
STRICT RULES:
|
| 144 |
+
- Use ONLY the context provided below.
|
| 145 |
+
- If the answer isn't in the context, say "I don't have that specific info right now, but feel free to ask about my projects or skills!"
|
| 146 |
+
- Keep answers concise (under 3-4 sentences) unless asked for detail.
|
| 147 |
+
|
| 148 |
+
CONTEXT FROM KNOWLEDGE BASE:
|
| 149 |
+
{context_text}
|
| 150 |
+
|
| 151 |
+
USER QUESTION:
|
| 152 |
+
{user_msg}
|
| 153 |
+
"""
|
| 154 |
+
|
| 155 |
+
# 3. Generate Answer
|
| 156 |
+
rag_model = genai.GenerativeModel(MODEL_NAME)
|
| 157 |
+
text_response = rag_model.generate_content(rag_prompt).text
|
| 158 |
+
|
| 159 |
+
return ChatResponse(response=text_response, tool_code=None, tool_data=None)
|
| 160 |
+
|
| 161 |
+
except Exception as e:
|
| 162 |
+
print(f"❌ Error handling chat: {e}")
|
| 163 |
+
# In production, avoid sending raw error details to the client for security
|
| 164 |
+
raise HTTPException(status_code=500, detail="Internal Server Error")
|
backend/portfolio.sqlite
ADDED
|
Binary file (65.5 kB). View file
|
|
|
backend/rag.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pickle
|
| 3 |
+
import faiss
|
| 4 |
+
import numpy as np
|
| 5 |
+
import google.generativeai as genai
|
| 6 |
+
import traceback
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
|
| 9 |
+
# --- 1. Force Load API Key ---
|
| 10 |
+
load_dotenv()
|
| 11 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
| 12 |
+
|
| 13 |
+
if not GEMINI_API_KEY:
|
| 14 |
+
print("⚠️ WARNING: GEMINI_API_KEY not found in rag.py environment.")
|
| 15 |
+
else:
|
| 16 |
+
genai.configure(api_key=GEMINI_API_KEY)
|
| 17 |
+
|
| 18 |
+
# Paths
|
| 19 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 20 |
+
VECTOR_STORE_DIR = os.path.join(BASE_DIR, "vector_store")
|
| 21 |
+
INDEX_PATH = os.path.join(VECTOR_STORE_DIR, "faiss_index.bin")
|
| 22 |
+
METADATA_PATH = os.path.join(VECTOR_STORE_DIR, "chunks_metadata.pkl")
|
| 23 |
+
|
| 24 |
+
# API Config
|
| 25 |
+
EMBEDDING_MODEL = "models/text-embedding-004"
|
| 26 |
+
|
| 27 |
+
# Global Components
|
| 28 |
+
faiss_index = None
|
| 29 |
+
chunks = []
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def initialize_rag():
|
| 33 |
+
global faiss_index, chunks
|
| 34 |
+
|
| 35 |
+
print("--- RAG INITIALIZATION ---")
|
| 36 |
+
if not os.path.exists(INDEX_PATH) or not os.path.exists(METADATA_PATH):
|
| 37 |
+
print(f"CRITICAL: Index files not found at {VECTOR_STORE_DIR}")
|
| 38 |
+
return
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
faiss_index = faiss.read_index(INDEX_PATH)
|
| 42 |
+
with open(METADATA_PATH, "rb") as f:
|
| 43 |
+
data = pickle.load(f)
|
| 44 |
+
chunks = data['chunks']
|
| 45 |
+
print(f"✅ RAG Loaded. {len(chunks)} chunks indexed.")
|
| 46 |
+
except Exception as e:
|
| 47 |
+
print(f"❌ Error loading RAG files: {e}")
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def retrieve_context(query: str, k: int = 2):
|
| 51 |
+
"""Retrieves text chunks using Gemini Embeddings."""
|
| 52 |
+
if not faiss_index:
|
| 53 |
+
print("⚠️ RAG Retrieval Skipped: Index not loaded.")
|
| 54 |
+
return []
|
| 55 |
+
|
| 56 |
+
try:
|
| 57 |
+
# 1. Get embedding from API
|
| 58 |
+
result = genai.embed_content(
|
| 59 |
+
model=EMBEDDING_MODEL,
|
| 60 |
+
content=query,
|
| 61 |
+
task_type="retrieval_query"
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
# 2. Convert to Numpy
|
| 65 |
+
query_vec = np.array([result['embedding']]).astype("float32")
|
| 66 |
+
|
| 67 |
+
# 3. Check Dimensions (Debug Step)
|
| 68 |
+
if faiss_index.d != query_vec.shape[1]:
|
| 69 |
+
print(
|
| 70 |
+
f"❌ DIMENSION MISMATCH: Index expects {faiss_index.d}, but Query is {query_vec.shape[1]}")
|
| 71 |
+
print(
|
| 72 |
+
"SOLUTION: Delete backend/vector_store and run create_vector_db.py again.")
|
| 73 |
+
return []
|
| 74 |
+
|
| 75 |
+
# 4. Search FAISS
|
| 76 |
+
distances, indices = faiss_index.search(query_vec, k)
|
| 77 |
+
|
| 78 |
+
retrieved_text = []
|
| 79 |
+
for i in indices[0]:
|
| 80 |
+
if i != -1 and i < len(chunks):
|
| 81 |
+
retrieved_text.append(chunks[i])
|
| 82 |
+
|
| 83 |
+
return retrieved_text
|
| 84 |
+
|
| 85 |
+
except Exception as e:
|
| 86 |
+
print(f"❌ RAG ERROR: {e}")
|
| 87 |
+
traceback.print_exc() # Prints the full error to the terminal
|
| 88 |
+
return []
|
backend/tools.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import google.generativeai as genai
|
| 2 |
+
from .database import query_sqlite
|
| 3 |
+
|
| 4 |
+
# --- Python Implementation of Tools ---
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def get_projects(): return query_sqlite("projects")
|
| 8 |
+
def get_skills(): return query_sqlite("skills")
|
| 9 |
+
def get_certifications(): return query_sqlite("certifications")
|
| 10 |
+
def get_articles(): return query_sqlite("articles")
|
| 11 |
+
def get_videos(): return query_sqlite("videos")
|
| 12 |
+
def get_research(): return query_sqlite("research")
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def get_content():
|
| 16 |
+
return {
|
| 17 |
+
"articles": query_sqlite("articles"),
|
| 18 |
+
"videos": query_sqlite("videos"),
|
| 19 |
+
"research": query_sqlite("research")
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def get_resume():
|
| 24 |
+
"""Returns metadata for the resume file."""
|
| 25 |
+
return {
|
| 26 |
+
"file_path": "images/resume.pdf",
|
| 27 |
+
"preview_image": "images/resume_preview.png",
|
| 28 |
+
"title": "Tharushika Abedheera - CV",
|
| 29 |
+
"description": "Machine Learning Engineer & AI Specialist"
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# Mapping for execution
|
| 34 |
+
TOOL_FUNCTIONS = {
|
| 35 |
+
"get_projects": get_projects,
|
| 36 |
+
"get_skills": get_skills,
|
| 37 |
+
"get_certifications": get_certifications,
|
| 38 |
+
"get_articles": get_articles,
|
| 39 |
+
"get_videos": get_videos,
|
| 40 |
+
"get_research": get_research,
|
| 41 |
+
"get_content": get_content,
|
| 42 |
+
"get_resume": get_resume # <--- Added
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
# Mapping for Frontend Action Codes
|
| 46 |
+
TOOL_CODE_MAP = {
|
| 47 |
+
"get_projects": "show_projects",
|
| 48 |
+
"get_skills": "show_skills",
|
| 49 |
+
"get_certifications": "show_certifications",
|
| 50 |
+
"get_articles": "show_articles",
|
| 51 |
+
"get_videos": "show_videos",
|
| 52 |
+
"get_research": "show_research",
|
| 53 |
+
"get_content": "show_content",
|
| 54 |
+
"get_resume": "show_resume" # <--- Added
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
# --- Gemini Schema Definitions ---
|
| 58 |
+
|
| 59 |
+
projects_tool = genai.protos.FunctionDeclaration(
|
| 60 |
+
name="get_projects",
|
| 61 |
+
description="Retrieve the full list of Tharushika's projects.",
|
| 62 |
+
parameters=genai.protos.Schema(
|
| 63 |
+
type=genai.protos.Type.OBJECT, properties={}, required=[])
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
skills_tool = genai.protos.FunctionDeclaration(
|
| 67 |
+
name="get_skills",
|
| 68 |
+
description="Retrieve technical skills.",
|
| 69 |
+
parameters=genai.protos.Schema(
|
| 70 |
+
type=genai.protos.Type.OBJECT, properties={}, required=[])
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
certifications_tool = genai.protos.FunctionDeclaration(
|
| 74 |
+
name="get_certifications",
|
| 75 |
+
description="Retrieve certifications.",
|
| 76 |
+
parameters=genai.protos.Schema(
|
| 77 |
+
type=genai.protos.Type.OBJECT, properties={}, required=[])
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
articles_tool = genai.protos.FunctionDeclaration(
|
| 81 |
+
name="get_articles",
|
| 82 |
+
description="Retrieve articles.",
|
| 83 |
+
parameters=genai.protos.Schema(
|
| 84 |
+
type=genai.protos.Type.OBJECT, properties={}, required=[])
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
videos_tool = genai.protos.FunctionDeclaration(
|
| 88 |
+
name="get_videos",
|
| 89 |
+
description="Retrieve videos.",
|
| 90 |
+
parameters=genai.protos.Schema(
|
| 91 |
+
type=genai.protos.Type.OBJECT, properties={}, required=[])
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
research_tool = genai.protos.FunctionDeclaration(
|
| 95 |
+
name="get_research",
|
| 96 |
+
description="Retrieve research.",
|
| 97 |
+
parameters=genai.protos.Schema(
|
| 98 |
+
type=genai.protos.Type.OBJECT, properties={}, required=[])
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
content_tool = genai.protos.FunctionDeclaration(
|
| 102 |
+
name="get_content",
|
| 103 |
+
description="Retrieve ALL content (articles, videos, research).",
|
| 104 |
+
parameters=genai.protos.Schema(
|
| 105 |
+
type=genai.protos.Type.OBJECT, properties={}, required=[])
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
resume_tool = genai.protos.FunctionDeclaration(
|
| 109 |
+
name="get_resume",
|
| 110 |
+
description="Retrieve Tharushika's official resume/CV. Use this when the user asks to see, download, or get a copy of the resume.",
|
| 111 |
+
parameters=genai.protos.Schema(
|
| 112 |
+
type=genai.protos.Type.OBJECT, properties={}, required=[])
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
# List to pass to the model
|
| 116 |
+
ALL_TOOLS_LIST = [
|
| 117 |
+
projects_tool, skills_tool, certifications_tool,
|
| 118 |
+
articles_tool, videos_tool, research_tool, content_tool,
|
| 119 |
+
resume_tool # <--- Added
|
| 120 |
+
]
|
backend/vector_store/chunks_metadata.pkl
ADDED
|
Binary file (6.95 kB). View file
|
|
|
backend/vector_store/faiss_index.bin
ADDED
|
Binary file (61.5 kB). View file
|
|
|
data/articles.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"title": "From PDF to API: How I Built a Hybrid RAG Insights Engine for Laptop Specs",
|
| 4 |
+
"description": "A deep dive into how I created a dynamic and static data-based RAG system for a business.",
|
| 5 |
+
"url": "https://www.linkedin.com/pulse/from-pdf-api-how-i-built-hybrid-rag-insights-engine-laptop-abedheera-hwwoc",
|
| 6 |
+
"type": "article"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"title": "How to Choose the Best Machine Learning Algorithm for Sentiment Analysis",
|
| 10 |
+
"description": "A guide on how to choose the best machine learning algorithm for sentiment analysis.",
|
| 11 |
+
"url": "https://www.linkedin.com/pulse/how-choose-best-machine-learning-algorithm-sentiment-abedheera-mvctc",
|
| 12 |
+
"type": "article"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"title": "Parallel RAG with LangChain: Three Vector DBs, One Personality.",
|
| 16 |
+
"description": "Lessons learned building an 'Uncle Iroh' mental healthcare assistant. Covers diverse knowledge sourcing, chunking strategies, and parallel retrievers.",
|
| 17 |
+
"url": "https://www.linkedin.com/pulse/parallel-rag-langchain-three-vector-dbs-one-tharushika-abedheera-tognc",
|
| 18 |
+
"type": "article"
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"title": "A Gentle Introduction to LSTMs (Long Short-Term Memory Networks)",
|
| 22 |
+
"description": "Explore what LSTMs are, why they were created, and how they help machines understand sequences.",
|
| 23 |
+
"url": "https://www.linkedin.com/pulse/gentle-introduction-lstms-long-short-term-memory-tharushika-abedheera-irvzc",
|
| 24 |
+
"type": "article"
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"title": "How Does L1 (Lasso) and L2 (Ridge) Regularization Work?",
|
| 28 |
+
"description": "Explore L1 (Lasso) and L2 (Ridge) regularization techniques to improve model performance by addressing overfitting.",
|
| 29 |
+
"url": "https://www.linkedin.com/pulse/how-does-l1-lasso-l2-ridge-regularization-work-tharushika-abedheera-upyvc",
|
| 30 |
+
"type": "article"
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"title": "ROUGE Score: A Key Metric for Evaluating Text Summarisation Models",
|
| 34 |
+
"description": "Introduction to how ROUGE scores provide valuable insights into how well a model has performed in generating summaries.",
|
| 35 |
+
"url": "https://www.linkedin.com/pulse/rouge-score-key-metric-evaluating-text-summarisation-models-qirwc",
|
| 36 |
+
"type": "article"
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"title": "SQuAD Metrics: Evaluating Question-Answering Models Effectively",
|
| 40 |
+
"description": "Introduction to the SQuAD metric, an essential tool for evaluating question-answering models in NLP.",
|
| 41 |
+
"url": "https://www.linkedin.com/pulse/how-does-l1-lasso-l2-ridge-regularization-work-tharushika-abedheera-upyvc",
|
| 42 |
+
"type": "article"
|
| 43 |
+
}
|
| 44 |
+
]
|
data/certifications.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{ "name": "Azure Certified AI Engineer Associate (in progress)" },
|
| 3 |
+
{ "name": "Astronomer Certification DAG Authoring for Apache Airflow 3" },
|
| 4 |
+
{ "name": "LinkedIn- Vector Databases for Caching and Retrieval Augmented Generation (RAG)" },
|
| 5 |
+
{ "name": "LinkedIn- Building AI Agents with AutoGen" },
|
| 6 |
+
{ "name": "LinkedIn- Building NLP Apps with Hugging Face Transformers" },
|
| 7 |
+
{ "name": "LinkedIn- Agentic AI for Developers: Concepts and Application for Enterprises" },
|
| 8 |
+
{ "name": "LinkedIn- Getting Started with Hugging Face Transformers ." }
|
| 9 |
+
]
|
data/education.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"id": "uni_plymouth",
|
| 4 |
+
"degree": "Bachelor of Science (Honours) in Software Engineering",
|
| 5 |
+
"institution": "University of Plymouth",
|
| 6 |
+
"details": "Focused on software engineering principles and AI applications."
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"id": "badulla_college",
|
| 10 |
+
"degree": "GCE Advanced level",
|
| 11 |
+
"institution": "Badulla central collage",
|
| 12 |
+
"details": "Includes an 'A' pass for English Language."
|
| 13 |
+
}
|
| 14 |
+
]
|
data/experience.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"id": "nextgen_intern",
|
| 4 |
+
"role": "Data Science and Machine Learning Engineer intern",
|
| 5 |
+
"company": "NextGen CodeX",
|
| 6 |
+
"duration": "2025/10/15 - Present",
|
| 7 |
+
"description": "Internship focused on advanced Data Science and ML applications."
|
| 8 |
+
},
|
| 9 |
+
{
|
| 10 |
+
"id": "intelligen_freelance",
|
| 11 |
+
"role": "Freelance Machine Learning Engineer",
|
| 12 |
+
"company": "Intelli-Gen AI",
|
| 13 |
+
"duration": "2024/02/10 - Present",
|
| 14 |
+
"description": "Freelance work developing custom ML solutions and AI integrations."
|
| 15 |
+
}
|
| 16 |
+
]
|
data/profile.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"contact": {
|
| 3 |
+
"name": "A.A.D. Tharushika Abedheera",
|
| 4 |
+
"role": "Machine Learning Engineer / AI Engineer",
|
| 5 |
+
"email": "tharushika280@gmail.com",
|
| 6 |
+
"phone": "07655753551",
|
| 7 |
+
"location": "Badulla, Sri Lanka",
|
| 8 |
+
"linkedin": "https://www.linkedin.com/in/tharushika-abedheera-3396311a4/",
|
| 9 |
+
"github": "https://github.com/tharu280",
|
| 10 |
+
"medium": "https://medium.com/@tharushika280"
|
| 11 |
+
},
|
| 12 |
+
"summary": "Machine Learning Engineer with a research-driven approach, specializing in NLP, Generative AI, and AI agent frameworks. Passionate about transforming AI/ML research into real-world applications, with hands-on experience and expertise in Machine learning, Deep learning, large language models (LLMs), retrieval-augmented generation (RAG), and autonomous AI systems. Adept at designing, deploying, and optimizing AI-driven systems to enhance scalability and innovation."
|
| 13 |
+
}
|
data/projects.json
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"id": "rag-book-recommender",
|
| 4 |
+
"title": "RAG-based Advanced Book Recommendation System",
|
| 5 |
+
"type": "AI/ML",
|
| 6 |
+
"description": "Built a personalized book recommender using LangChain, HuggingFace Sentence Transformers, and ChromaDB, with zero-shot classification to auto-fill missing categories and enrich recommendations. Deployed the system using FastAPI, Docker, and Nginx on AWS Elastic Beanstalk for scalable, low-latency access.",
|
| 7 |
+
"technologies": ["Python", "Numpy", "Pandas", "HuggingFace Transformers", "LangChain", "ChromaDB", "FastAPI", "Docker", "Nginx", "AWS Elastic Beanstalk"],
|
| 8 |
+
"github_url": "https://github.com/tharu280",
|
| 9 |
+
"demo_url": null,
|
| 10 |
+
"image_path": "images/project_book.png"
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"id": "eyecon",
|
| 14 |
+
"title": "EyeCon – Real-Time Blink Communication System",
|
| 15 |
+
"type": "AI/ML",
|
| 16 |
+
"description": "Developed the first system enabling fully paralysed users to communicate without any wearable devices, using only eye blinks detected via webcam. Implemented Eye Aspect Ratio (EAR)–based blink detection to differentiate short (next candidate) and long blinks (select candidate), achieving accurate, real-time interpretation of user intent. Integrated a Gemini LLM for context-aware candidate word suggestions, dynamically updating options as the sentence forms. Enabled automatic summarisation of composed sentences, providing users with coherent text output and facilitating streamlined communication.",
|
| 17 |
+
"technologies": ["Python", "OpenCV", "MediaPipe", "Kivy", "Custom Morse Decoder", "Gemini LLM", "Contextual Candidate Selection", "FastAPI"],
|
| 18 |
+
"github_url": "https://github.com/tharu280",
|
| 19 |
+
"demo_url": null,
|
| 20 |
+
"image_path": "images/project_eyecon.png"
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"id": "youtube-analytics",
|
| 24 |
+
"title": "End-to-End YouTube Comment Analytics Pipeline",
|
| 25 |
+
"type": "Data Engineering",
|
| 26 |
+
"description": "Built an AI-powered pipeline to extract, classify, and visualize YouTube comments. Achieved 95.46% accuracy by fine-tuning BERT; also developed a BiLSTM classifier (90.75%); hosted the best model on HuggingFace Hub. Integrated Azure services (SQL Serverless, Data Factory, Power BI) for automated data flow, storage, and interactive visualisations.",
|
| 27 |
+
"technologies": ["Python", "TensorFlow", "Keras", "TF-IDF", "BiLSTM", "BERT", "HuggingFace", "FastAPI", "Docker", "YouTube API", "Azure SQL Serverless", "Azure Data Factory", "Power BI", "Azure Data Studio"],
|
| 28 |
+
"github_url": "https://github.com/tharu280",
|
| 29 |
+
"demo_url": null,
|
| 30 |
+
"image_path": "images/project_youtube.png"
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"id": "mental-health-chatbot",
|
| 34 |
+
"title": "Comic Character-based RAG Mental Healthcare Assistant Chatbot",
|
| 35 |
+
"type": "AI/ML",
|
| 36 |
+
"description": "Built a RAG-based chatbot modelled after “Uncle Iroh” to provide mental health support using multi-source wisdom and personalized tone. Implemented 3 FAISS stores, with RunnableParallel & Sequential chains -cutting latency from 3.9s to 1.5s accurate and combined context retrieval. Optimised with Redis memory, LangSmith monitoring, and FastAPI; voice cloning via ElevenLabs in progress.",
|
| 37 |
+
"technologies": ["Python", "LangChain", "HuggingFace Transformers", "Gemini 1.5 Flash", "FAISS", "Redis", "LangSmith", "ElevenLabs", "FastAPI", "Pydantic"],
|
| 38 |
+
"github_url": "https://github.com/tharu280",
|
| 39 |
+
"demo_url": null,
|
| 40 |
+
"image_path": "images/project_chatbot.png"
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"id": "cafe-chatbot",
|
| 44 |
+
"title": "AI-Powered RAG Chatbot for Cafe Business",
|
| 45 |
+
"type": "Cloud AI",
|
| 46 |
+
"description": "Built a Retrieval-Augmented Generation (RAG) chatbot using Amazon Bedrock, OpenSearch Serverless, and S3 to answer customer queries based on internal café documents. Preprocessed and embedded data for semantic search; integrated foundation models with prompt engineering for contextual responses. Designed a scalable, secure deployment with proper IAM configuration and AWS-native services to ensure real-world readiness.",
|
| 47 |
+
"technologies": ["AWS Bedrock", "AWS OpenSearch Serverless", "AWS S3", "Llama 3 70B", "Titan Text G1 – Lite"],
|
| 48 |
+
"github_url": "https://github.com/tharu280",
|
| 49 |
+
"demo_url": null,
|
| 50 |
+
"image_path": "images/project_cafe.png"
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"id": "ai-coding-agent",
|
| 54 |
+
"title": "AI Coding Agent – V (Open Source Project)",
|
| 55 |
+
"type": "Agentic AI",
|
| 56 |
+
"description": "Designed and developed an AI coding agent demonstrating Agentic AI design patterns including Tool Use and Reflection Loop. Integrated Google Gemini’s function calling to perform grounded code operations such as reading, writing, and executing Python files. Implemented a 20-iteration reflection loop where the agent plans, acts, critiques, and improves results until completion.",
|
| 57 |
+
"technologies": ["Python", "Google Gemini API", "Gemini SDK", "Pydantic", "Agentic design patterns"],
|
| 58 |
+
"github_url": "https://github.com/tharu280",
|
| 59 |
+
"demo_url": null,
|
| 60 |
+
"image_path": "images/project_agent.png"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"id": "laptop-intel-engine",
|
| 64 |
+
"title": "Cross-Marketplace Laptop & Review Intelligence Engine",
|
| 65 |
+
"type": "AI/ML",
|
| 66 |
+
"description": "Developed an insights engine to analyse and compare laptops by integrating two distinct data sources: canonical static technical specifications from PDFs(PSREF) and mutable, real time data (price, availability, reviews) from product pages.. Engineered a hybrid Retrieval-Augmented Generation (RAG) pipeline that fuses context from two data stores: a FAISS vector index (for semantic search on PDF specs) and a SQLite database (for structured SQL queries on dynamic market data).",
|
| 67 |
+
"technologies": ["Python", "FastAPI", "Streamlit", "Google Gemini API", "FAISS", "Sentence Transformers", "SQLite", "Pandas", "Pydantic"],
|
| 68 |
+
"github_url": "https://github.com/tharu280",
|
| 69 |
+
"demo_url": null,
|
| 70 |
+
"image_path": "images/project_laptop.png"
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"id": "food-ordering-backend",
|
| 74 |
+
"title": "Event-Driven Backend for Food Ordering with Real-Time Fraud Detection",
|
| 75 |
+
"type": "Backend/ML",
|
| 76 |
+
"description": "Built a scalable event-driven backend using Python, Kafka, and Docker for a food ordering app with real-time processing. Modelled core events like order_placed, order_confirmed, and fraud_alert as Kafka topics across decoupled microservices. Integrated a fraud detection model into the transaction service to detect fraud in real-time and publish alerts via Kafka.",
|
| 77 |
+
"technologies": ["Python", "Apache Kafka", "kafka-python", "scikit-learn", "Docker", "Pydantic", "MLflow", "FastAPI"],
|
| 78 |
+
"github_url": "https://github.com/tharu280",
|
| 79 |
+
"demo_url": null,
|
| 80 |
+
"image_path": "images/project_food.png"
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"id": "tour-planner-agent",
|
| 84 |
+
"title": "Tour Planner AI Agent",
|
| 85 |
+
"type": "Agentic AI",
|
| 86 |
+
"description": "Designed a hybrid AI system using LangGraph to orchestrate a multi-step agentic workflow for dynamic itinerary generation. Engineered a rule-based data ingestion pipeline to chain disparate APIs for gathering geocoding, route, and a broad set of location data. Implemented an “LLM as a Judge\" pattern that semantically analyses the user's qualitative query to rank and filter the raw data, delivering context-aware, personalized recommendations.",
|
| 87 |
+
"technologies": ["Python", "LangGraph", "FastAPI", "Google Gemini API", "Pydantic", "Geoapify", "OpenRouteService", "Nominatim"],
|
| 88 |
+
"github_url": "https://github.com/tharu280",
|
| 89 |
+
"demo_url": null,
|
| 90 |
+
"image_path": "images/project_tour.png"
|
| 91 |
+
}
|
| 92 |
+
]
|
data/research.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"title": "AI Voice Assistant for Healthcare (ICIET 2023)",
|
| 4 |
+
"description": "Published as a conference paper at ICIET 2023. Developed an AI voice assistant using PyTorch and NLTK to handle general and healthcare-related queries.",
|
| 5 |
+
"url": "https://www.researchgate.net/publication/376522984_A_Deep_Learning_Approach_to_Utilize_AI_Voice_Assistants_In_Healthcare",
|
| 6 |
+
"type": "paper"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"title": "EL_sense: AI Solution for Human-Elephant Conflict (In Progress)",
|
| 10 |
+
"description": "Proposed a bioacoustics-based system for detecting low-frequency (20Hz) elephant rumbles using live audio streaming via Apache Kafka.",
|
| 11 |
+
"url": null,
|
| 12 |
+
"type": "research"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"title": "EyeCon – Real-Time Blink Communication System",
|
| 16 |
+
"description": "Developed the first system enabling fully paralysed users to communicate without any wearable devices, using only eye blinks.",
|
| 17 |
+
"url": null,
|
| 18 |
+
"type": "paper"
|
| 19 |
+
}
|
| 20 |
+
]
|
data/skills.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"id": "programming",
|
| 4 |
+
"category": "Programming",
|
| 5 |
+
"list": ["Python", "JavaScript", "HTML", "CSS", "C", "SQL"]
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"id": "ml_dl",
|
| 9 |
+
"category": "Machine Learning & Deep Learning",
|
| 10 |
+
"list": ["Scikit-learn", "TensorFlow", "Keras", "PyTorch", "Hugging Face", "Transformers", "XGBoost", "Decision Trees", "Random Forest", "SVM", "Naïve Bayes", "KNN", "K-Means Clustering", "PCA", "Model Evaluation", "Hyper-parameter Tuning", "Pandas", "NumPy", "LSTMs", "Transformers", "LLMs", "NLTK", "CNNs", "OpenCV", "MediaPipe", "Vision Transformers (ViTs)", "Yolo"]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"id": "vector_db",
|
| 14 |
+
"category": "Vector Search & Databases",
|
| 15 |
+
"list": ["FAISS", "ANNOY", "Milvus", "ChromaDB", "LlamaIndex", "Pinecone", "Weaviate"]
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"id": "databases",
|
| 19 |
+
"category": "Databases",
|
| 20 |
+
"list": ["MySQL", "MongoDB", "Firebase", "Amazon S3", "PostgreSQL"]
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"id": "cloud_devops",
|
| 24 |
+
"category": "Cloud & CI/CD",
|
| 25 |
+
"list": ["AWS", "Azure", "GCP", "Docker", "GitHub Actions", "Apache Kafka", "MLflow", "LangSmith", "CI/CD pipelines", "AWS EC2"]
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"id": "agentic_ai",
|
| 29 |
+
"category": "Agentic AI",
|
| 30 |
+
"list": ["AutoGen", "LlamaIndex", "Langchain", "LangGraph"]
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"id": "frameworks",
|
| 34 |
+
"category": "Frameworks",
|
| 35 |
+
"list": ["FastAPI", "Flask", "Streamlit", "Tkinter", "Kivy", "React.js"]
|
| 36 |
+
}
|
| 37 |
+
]
|
data/summaries/about_summary.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"My journey into AI is fueled by a desire to solve real-world problems with tangible impact. I’m deeply passionate about Machine Learning and Artificial Intelligence and have spent the past two years learning and building hands on projects completely self driven and grounded in real world applications.I'm not just interested in model accuracy; I'm passionate about engineering systems that are robust, scalable, and genuinely useful, especially in areas like accessibility and practical RAG applications. I thrive on the entire AI lifecycle, from developing the initial mathematical concept to containerizing the final FastAPI endpoint for production. The transition from pure research to a deployed product is where I find the most exciting challenges.I’m confident in my ability to learn quickly, adapt to fast paced environments, and contribute meaningfully to your team. Every drop of my knowledge in the domain of AI ML is learned solely through free resources such as youtube on my own as i am absolutely passionate about this field."
|
data/summaries/articles_summary.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
“Above are some of my published articles, where I regularly write about machine learning, deep learning, NLP, RAG systems, evaluation metrics, and practical engineering lessons from real-world AI projects. My writing focuses on breaking down complex concepts, sharing hands-on insights, and documenting the architectures, techniques, and optimizations I use in my own systems. These featured articles represent only part of my work — I continue to publish more guides, explanations, and deep dives to help others understand and build effective AI/ML solutions.”
|
data/summaries/certifications_summary.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
“Above are some of my professional certifications, reflecting my ongoing commitment to mastering cloud platforms, AI engineering, workflow orchestration, RAG systems, and agentic AI frameworks. I consistently pursue industry-recognized credentials to stay aligned with modern best practices and deepen my expertise. These listed certifications are only part of my continuous learning journey, and I regularly work toward new ones as the field evolves.”
|
data/summaries/projects_summary.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
“Above are some of my most notable projects. I’ve worked across a wide range of AI and engineering areas — including machine learning, deep learning, RAG systems, agentic AI, computer vision, and scalable backend pipelines. My work spans everything from recommendation systems, analytics pipelines, intelligent assistants, and real-time agent frameworks to cloud-native, production-ready AI deployments. I focus heavily on building systems that are not only accurate and reliable, but also cost-efficient, resource-optimized, and scalable. These showcased projects represent only part of my portfolio, and I continue to work on many more innovative solutions in the AI/ML space.”
|
data/summaries/research_summary.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
“Above are some of my key research contributions. My work spans applied AI, computer vision, bioacoustics, and assistive technologies — including conference-published studies, novel communication systems, and ongoing experimental projects. I focus on solving real-world problems using deep learning, signal processing, and intelligent systems, with an emphasis on accessibility, healthcare, and environmental applications. These represent only part of my research journey, and I continue to explore new areas and publish work as I advance further in the field.”
|
data/summaries/skills_summary.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
“The skills listed above represent my core technical strengths across software engineering, machine learning, deep learning, vector search, cloud infrastructure, and agentic AI. I work extensively with modern ML frameworks, LLM ecosystems, scalable backend tools, and cloud-native architectures. My expertise spans end-to-end AI systems — from model training and evaluation to retrieval pipelines, agent frameworks, and production deployment. I continuously expand my skill set as new tools, frameworks, and AI patterns emerge, so this list reflects only a snapshot of the technologies I actively work with.”
|
data/summaries/videos_summary.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
“Above are some of my video tutorials, where I break down complex AI concepts into simple, easy-to-understand explanations. I create educational content focused on RAG systems, agentic AI patterns, embeddings, and practical machine learning ideas — all presented in a clear, beginner-friendly way. These videos represent only a part of what I create, and I continue to produce more content to help others learn modern AI concepts without the jargon.”
|
data/videos.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"id": "01",
|
| 4 |
+
"title": "RAG and Embeddings Explained in Plain English",
|
| 5 |
+
"description": "Explaining RAG and embeddings in plain English. No jargon. No math. Just the core concepts, broken down so anyone can understand.",
|
| 6 |
+
"thumbnail_url": "images/video_thumbnails/01.png",
|
| 7 |
+
"url": "https://www.linkedin.com/posts/tharushika-abedheera-3396311a4_explaining-rag-and-embeddings-in-plain-english-activity-7379239387819274240-__h6"
|
| 8 |
+
},
|
| 9 |
+
{
|
| 10 |
+
"id": "02",
|
| 11 |
+
"title": "Reflection Design Pattern in AI Agents Explained Simply!",
|
| 12 |
+
"description": "In this short tutorial, I walk through how reflection works in AI Agents. You’ll learn how this pattern forms the foundation for self-improving AI systems.",
|
| 13 |
+
"thumbnail_url": "images/video_thumbnails/02.png",
|
| 14 |
+
"url": "https://www.linkedin.com/posts/tharushika-abedheera-3396311a4_ai-machinelearning-agents-activity-7390137862027005952-iI2u"
|
| 15 |
+
}
|
| 16 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
google-generativeai
|
| 4 |
+
faiss-cpu
|
| 5 |
+
numpy
|
| 6 |
+
python-dotenv
|
| 7 |
+
requests
|
| 8 |
+
pydantic
|
scripts/create_vector_db.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import google.generativeai as genai
|
| 2 |
+
import faiss
|
| 3 |
+
import pickle
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
import numpy as np
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
|
| 9 |
+
# Load env to get API Key
|
| 10 |
+
load_dotenv()
|
| 11 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
| 12 |
+
|
| 13 |
+
if not GEMINI_API_KEY:
|
| 14 |
+
print("❌ Error: GEMINI_API_KEY not found in .env")
|
| 15 |
+
exit(1)
|
| 16 |
+
|
| 17 |
+
genai.configure(api_key=GEMINI_API_KEY)
|
| 18 |
+
|
| 19 |
+
# --- Configuration ---
|
| 20 |
+
DATA_DIR = "data"
|
| 21 |
+
SUMMARIES_DIR = os.path.join(DATA_DIR, "summaries")
|
| 22 |
+
OUTPUT_DIR = os.path.join("backend", "vector_store")
|
| 23 |
+
# Google's latest embedding model
|
| 24 |
+
EMBEDDING_MODEL = "models/text-embedding-004"
|
| 25 |
+
|
| 26 |
+
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def load_json(filename):
|
| 30 |
+
path = os.path.join(DATA_DIR, filename)
|
| 31 |
+
return json.load(open(path, 'r', encoding='utf-8')) if os.path.exists(path) else []
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def load_summary_text(filename):
|
| 35 |
+
path = os.path.join(SUMMARIES_DIR, filename)
|
| 36 |
+
if os.path.exists(path):
|
| 37 |
+
with open(path, 'r', encoding='utf-8') as f:
|
| 38 |
+
return f.read().strip()
|
| 39 |
+
return ""
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def get_embedding(text):
|
| 43 |
+
"""Wraps Gemini API to get embeddings"""
|
| 44 |
+
result = genai.embed_content(
|
| 45 |
+
model=EMBEDDING_MODEL,
|
| 46 |
+
content=text,
|
| 47 |
+
task_type="retrieval_document"
|
| 48 |
+
)
|
| 49 |
+
return result['embedding']
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def main():
|
| 53 |
+
print("🔄 Creating Cloud-Based Vector Index...")
|
| 54 |
+
chunks = []
|
| 55 |
+
metadata = []
|
| 56 |
+
|
| 57 |
+
def add_chunk(text, source):
|
| 58 |
+
if text and len(text) > 5:
|
| 59 |
+
chunks.append(text)
|
| 60 |
+
metadata.append({"source": source})
|
| 61 |
+
|
| 62 |
+
# --- Load Data (Same logic as before) ---
|
| 63 |
+
# 1. Profile
|
| 64 |
+
profile = load_json("profile.json")
|
| 65 |
+
if isinstance(profile, dict):
|
| 66 |
+
contact = profile.get("contact", {})
|
| 67 |
+
c_text = f"Contact Details: Name: {contact.get('name')}. Email: {contact.get('email')}. LinkedIn: {contact.get('linkedin')}."
|
| 68 |
+
add_chunk(c_text, "profile_contact")
|
| 69 |
+
if profile.get("summary"):
|
| 70 |
+
add_chunk(
|
| 71 |
+
f"Professional Summary: {profile.get('summary')}", "profile_summary")
|
| 72 |
+
|
| 73 |
+
# 2. Experience
|
| 74 |
+
experience = load_json("experience.json")
|
| 75 |
+
for exp in experience:
|
| 76 |
+
text = f"Experience: {exp.get('role')} at {exp.get('company')} ({exp.get('duration')}). {exp.get('description')}"
|
| 77 |
+
add_chunk(text, "experience_entry")
|
| 78 |
+
|
| 79 |
+
# 3. Education
|
| 80 |
+
education = load_json("education.json")
|
| 81 |
+
for edu in education:
|
| 82 |
+
text = f"Education: {edu.get('degree')} from {edu.get('institution')}. {edu.get('details')}"
|
| 83 |
+
add_chunk(text, "education_entry")
|
| 84 |
+
|
| 85 |
+
# 4. Skills
|
| 86 |
+
skills = load_json("skills.json")
|
| 87 |
+
for s in skills:
|
| 88 |
+
text = f"Skills in {s.get('category')}: {', '.join(s.get('list', []))}"
|
| 89 |
+
add_chunk(text, "skills_list")
|
| 90 |
+
|
| 91 |
+
# 5. Summaries
|
| 92 |
+
summary_files = {
|
| 93 |
+
"about_summary.txt": "profile_about_me",
|
| 94 |
+
"projects_summary.txt": "ui_trigger_projects",
|
| 95 |
+
"articles_summary.txt": "ui_trigger_articles",
|
| 96 |
+
"videos_summary.txt": "ui_trigger_videos",
|
| 97 |
+
"research_summary.txt": "ui_trigger_research",
|
| 98 |
+
"skills_summary.txt": "ui_trigger_skills",
|
| 99 |
+
"certifications_summary.txt": "ui_trigger_certifications"
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
for filename, tag in summary_files.items():
|
| 103 |
+
text = load_summary_text(filename)
|
| 104 |
+
if text:
|
| 105 |
+
add_chunk(text, tag)
|
| 106 |
+
|
| 107 |
+
# --- Generate Embeddings ---
|
| 108 |
+
if not chunks:
|
| 109 |
+
print("❌ Error: No chunks created.")
|
| 110 |
+
return
|
| 111 |
+
|
| 112 |
+
print(f"🧠 Encoding {len(chunks)} chunks via Gemini API...")
|
| 113 |
+
|
| 114 |
+
# Batch processing is better, but simple loop works for small portfolios
|
| 115 |
+
embeddings = []
|
| 116 |
+
for i, chunk in enumerate(chunks):
|
| 117 |
+
if i % 5 == 0:
|
| 118 |
+
print(f" Processing chunk {i}/{len(chunks)}...")
|
| 119 |
+
emb = get_embedding(chunk)
|
| 120 |
+
embeddings.append(emb)
|
| 121 |
+
|
| 122 |
+
embeddings_np = np.array(embeddings).astype("float32")
|
| 123 |
+
|
| 124 |
+
# Create FAISS index
|
| 125 |
+
index = faiss.IndexFlatL2(embeddings_np.shape[1])
|
| 126 |
+
index.add(embeddings_np)
|
| 127 |
+
|
| 128 |
+
faiss.write_index(index, os.path.join(OUTPUT_DIR, "faiss_index.bin"))
|
| 129 |
+
with open(os.path.join(OUTPUT_DIR, "chunks_metadata.pkl"), "wb") as f:
|
| 130 |
+
pickle.dump({"chunks": chunks, "metadata": metadata}, f)
|
| 131 |
+
|
| 132 |
+
print(f"🎉 Cloud Indexing Complete! Saved to {OUTPUT_DIR}")
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
if __name__ == "__main__":
|
| 136 |
+
main()
|
scripts/setup_db.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import sqlite3
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
# --- Configuration ---
|
| 7 |
+
# Ensures the DB is created inside the 'backend' folder
|
| 8 |
+
DATABASE_FILE = os.path.join("backend", "portfolio.sqlite")
|
| 9 |
+
DATA_DIR = "data"
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def create_connection():
|
| 13 |
+
# Ensure the backend directory exists
|
| 14 |
+
os.makedirs(os.path.dirname(DATABASE_FILE), exist_ok=True)
|
| 15 |
+
return sqlite3.connect(DATABASE_FILE)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def setup_database():
|
| 19 |
+
conn = create_connection()
|
| 20 |
+
cursor = conn.cursor()
|
| 21 |
+
|
| 22 |
+
print("🛠️ Building Modular Database...")
|
| 23 |
+
|
| 24 |
+
# --- 1. Create Tables ---
|
| 25 |
+
|
| 26 |
+
# Projects
|
| 27 |
+
cursor.execute('''CREATE TABLE IF NOT EXISTS projects (
|
| 28 |
+
id TEXT PRIMARY KEY,
|
| 29 |
+
title TEXT,
|
| 30 |
+
type TEXT,
|
| 31 |
+
description TEXT,
|
| 32 |
+
technologies TEXT,
|
| 33 |
+
github_url TEXT,
|
| 34 |
+
demo_url TEXT,
|
| 35 |
+
image_path TEXT
|
| 36 |
+
)''')
|
| 37 |
+
|
| 38 |
+
# Articles
|
| 39 |
+
cursor.execute('''CREATE TABLE IF NOT EXISTS articles (
|
| 40 |
+
title TEXT, description TEXT, url TEXT, type TEXT
|
| 41 |
+
)''')
|
| 42 |
+
|
| 43 |
+
# Videos
|
| 44 |
+
cursor.execute('''CREATE TABLE IF NOT EXISTS videos (
|
| 45 |
+
id TEXT, title TEXT, description TEXT, thumbnail_url TEXT, url TEXT
|
| 46 |
+
)''')
|
| 47 |
+
|
| 48 |
+
# Research
|
| 49 |
+
cursor.execute('''CREATE TABLE IF NOT EXISTS research (
|
| 50 |
+
title TEXT, description TEXT, url TEXT, type TEXT
|
| 51 |
+
)''')
|
| 52 |
+
|
| 53 |
+
# Skills (Updated for modular JSON with 3 columns)
|
| 54 |
+
cursor.execute('''CREATE TABLE IF NOT EXISTS skills (
|
| 55 |
+
id TEXT PRIMARY KEY,
|
| 56 |
+
category TEXT,
|
| 57 |
+
skill_list TEXT
|
| 58 |
+
)''')
|
| 59 |
+
|
| 60 |
+
# Experience (New Table)
|
| 61 |
+
cursor.execute('''CREATE TABLE IF NOT EXISTS experience (
|
| 62 |
+
id TEXT PRIMARY KEY,
|
| 63 |
+
role TEXT,
|
| 64 |
+
company TEXT,
|
| 65 |
+
duration TEXT,
|
| 66 |
+
description TEXT
|
| 67 |
+
)''')
|
| 68 |
+
|
| 69 |
+
# Education (New Table)
|
| 70 |
+
cursor.execute('''CREATE TABLE IF NOT EXISTS education (
|
| 71 |
+
id TEXT PRIMARY KEY,
|
| 72 |
+
degree TEXT,
|
| 73 |
+
institution TEXT,
|
| 74 |
+
details TEXT
|
| 75 |
+
)''')
|
| 76 |
+
|
| 77 |
+
# Certifications
|
| 78 |
+
cursor.execute('''CREATE TABLE IF NOT EXISTS certifications (
|
| 79 |
+
name TEXT PRIMARY KEY
|
| 80 |
+
)''')
|
| 81 |
+
|
| 82 |
+
# --- 2. Generic Insert Function ---
|
| 83 |
+
def insert_from_json(filename, table, columns):
|
| 84 |
+
filepath = os.path.join(DATA_DIR, filename)
|
| 85 |
+
if not os.path.exists(filepath):
|
| 86 |
+
print(f" ⚠️ Warning: {filename} not found. Skipping.")
|
| 87 |
+
return
|
| 88 |
+
|
| 89 |
+
with open(filepath, 'r', encoding='utf-8') as f:
|
| 90 |
+
data = json.load(f)
|
| 91 |
+
|
| 92 |
+
count = 0
|
| 93 |
+
for item in data:
|
| 94 |
+
# --- FIX: Pre-process lists into strings ---
|
| 95 |
+
|
| 96 |
+
# 1. Handle 'technologies' list (for projects)
|
| 97 |
+
if 'technologies' in item and isinstance(item['technologies'], list):
|
| 98 |
+
item['technologies'] = json.dumps(item['technologies'])
|
| 99 |
+
|
| 100 |
+
# 2. Handle 'list' -> 'skill_list' mapping (for skills)
|
| 101 |
+
# We look for the key 'list' (from JSON) and convert it to 'skill_list' (for DB)
|
| 102 |
+
if 'list' in item and isinstance(item['list'], list):
|
| 103 |
+
item['skill_list'] = json.dumps(item['list'])
|
| 104 |
+
|
| 105 |
+
# --- FIX: Collect values ---
|
| 106 |
+
values = []
|
| 107 |
+
for col in columns:
|
| 108 |
+
# If the column is 'skill_list', we expect the data might still be in 'list'
|
| 109 |
+
# if we didn't map it above, but we did map it, so we just grab 'skill_list'.
|
| 110 |
+
# We use .get() to avoid errors if a field is missing.
|
| 111 |
+
values.append(item.get(col))
|
| 112 |
+
|
| 113 |
+
placeholders = ",".join(["?"] * len(columns))
|
| 114 |
+
cursor.execute(
|
| 115 |
+
f"INSERT OR REPLACE INTO {table} VALUES ({placeholders})", values)
|
| 116 |
+
count += 1
|
| 117 |
+
print(f" ✅ Loaded {count} items into '{table}'")
|
| 118 |
+
|
| 119 |
+
# --- 3. Run Inserts ---
|
| 120 |
+
print("📥 Inserting modular data...")
|
| 121 |
+
|
| 122 |
+
# Projects
|
| 123 |
+
insert_from_json("projects.json", "projects", [
|
| 124 |
+
"id", "title", "type", "description", "technologies", "github_url", "demo_url", "image_path"])
|
| 125 |
+
|
| 126 |
+
# Standard Content
|
| 127 |
+
insert_from_json("articles.json", "articles", [
|
| 128 |
+
"title", "description", "url", "type"])
|
| 129 |
+
insert_from_json("videos.json", "videos", [
|
| 130 |
+
"id", "title", "description", "thumbnail_url", "url"])
|
| 131 |
+
insert_from_json("research.json", "research", [
|
| 132 |
+
"title", "description", "url", "type"])
|
| 133 |
+
|
| 134 |
+
# New Modular Files
|
| 135 |
+
# Note: 'skill_list' maps to the 'list' key in the json via the fix above
|
| 136 |
+
insert_from_json("skills.json", "skills", ["id", "category", "skill_list"])
|
| 137 |
+
insert_from_json("experience.json", "experience", [
|
| 138 |
+
"id", "role", "company", "duration", "description"])
|
| 139 |
+
insert_from_json("education.json", "education", [
|
| 140 |
+
"id", "degree", "institution", "details"])
|
| 141 |
+
insert_from_json("certifications.json", "certifications", ["name"])
|
| 142 |
+
|
| 143 |
+
conn.commit()
|
| 144 |
+
conn.close()
|
| 145 |
+
print(f"🎉 Database updated at {DATABASE_FILE}")
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
if __name__ == "__main__":
|
| 149 |
+
setup_database()
|