tharu280 commited on
Commit
bc620e9
·
0 Parent(s):

Initial commit

Browse files
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
.github/workflows/sync_to_hub.yml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+ workflow_dispatch:
6
+
7
+ jobs:
8
+ sync-to-hub:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: actions/checkout@v3
12
+ with:
13
+ fetch-depth: 0
14
+ lfs: true
15
+ - name: Push to hub
16
+ env:
17
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
18
+ # Added --force to overwrite the default Hugging Face README
19
+ run: git push --force https://tharu280:$HF_TOKEN@huggingface.co/spaces/tharu280/portfolio-rag-api main
.gitignore ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --- Security (NEVER COMMIT THESE) ---
2
+ .env
3
+ firebase_credentials.json
4
+ *.pem
5
+ *.key
6
+
7
+ # --- Python & OS Junk ---
8
+ __pycache__/
9
+ *.pyc
10
+ venv/
11
+ .venv/
12
+ env/
13
+ .idea/
14
+ .vscode/
15
+ .DS_Store
16
+
17
+ # --- Hosting / Build ---
18
+ # We generally ignore build artifacts, but for your specific
19
+ # "Local Build -> Cloud Run" strategy, we DO want to commit
20
+ # your database and vector store so Render can see them.
21
+ #
22
+ # DO NOT UNCOMMENT THESE LINES unless you switch to an external DB:
23
+ # backend/portfolio.sqlite
24
+ # backend/vector_store/
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 1. Use the specific Python version matching your local environment
2
+ FROM python:3.10.16-slim
3
+
4
+ # 2. Set the working directory inside the container
5
+ WORKDIR /code
6
+
7
+ # 3. Copy requirements first (for better caching)
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ # 4. Install dependencies
11
+ # We use --no-cache-dir to keep the image small
12
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
13
+
14
+ # 5. Copy the rest of your application code
15
+ COPY . /code
16
+
17
+ # 6. CRITICAL: Give write permissions to the backend folder
18
+ # This allows SQLite to create lock files and update the DB
19
+ RUN chmod -R 777 /code/backend
20
+
21
+ # 7. Start the app on port 7860 (Hugging Face default)
22
+ CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # portfolio rag
2
+
app.py ADDED
@@ -0,0 +1,405 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import requests
4
+ import streamlit as st
5
+
6
+
7
+ # --- Configuration ---
8
+ API_URL = "http://127.0.0.1:8000/chat"
9
+ IMAGES_DIR = "images"
10
+
11
+ # --- Page Setup ---
12
+ st.set_page_config(
13
+ page_title="Tharushika | AI Portfolio",
14
+ page_icon="👋",
15
+ layout="centered",
16
+ initial_sidebar_state="collapsed"
17
+ )
18
+
19
+
20
+ st.markdown("""
21
+ <style>
22
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
23
+
24
+ html, body, .stApp {
25
+ background-color: #ffffff !important;
26
+ font-family: -apple-system, BlinkMacSystemFont, 'Inter', sans-serif !important;
27
+ color: #1d1d1f !important;
28
+ }
29
+
30
+ p, .stMarkdown, .stText {
31
+ color: #333333;
32
+ line-height: 1.6;
33
+ }
34
+
35
+ h1, h2, h3, h4, h5, h6 {
36
+ font-weight: 600 !important;
37
+ letter-spacing: -0.02em !important;
38
+ color: #1d1d1f !important;
39
+ }
40
+ h1 { font-size: 2.8rem !important; margin-bottom: 0.5rem !important;}
41
+ h2 { font-size: 2.2rem !important; margin-top: 0.5rem !important;}
42
+ h3 { font-size: 1.5rem !important; margin-top: 2rem !important; margin-bottom: 1rem !important;}
43
+ h4 { font-size: 1.2rem !important; margin-top: 1rem !important;}
44
+
45
+ .centered-title h1, .centered-title h2 {
46
+ text-align: center;
47
+ width: 100%;
48
+ }
49
+ .centered-title h2 {
50
+ color: #6e6e73 !important;
51
+ font-weight: 500 !important;
52
+ font-size: 1.8rem !important;
53
+ }
54
+ .centered-profile-pic {
55
+ display: flex;
56
+ justify-content: center;
57
+ margin-top: 2rem;
58
+ margin-bottom: 2rem;
59
+ }
60
+ .centered-profile-pic img {
61
+ border-radius: 20px;
62
+ box-shadow: 0 4px 20px rgba(0,0,0,0.1);
63
+ border: 1px solid rgba(0,0,0,0.05);
64
+ }
65
+
66
+ .stChatMessage {
67
+ background-color: transparent !important;
68
+ border: none !important;
69
+ padding: 0.8rem 0 !important;
70
+ }
71
+ .stChatMessage [data-testid="stChatMessageContent"] {
72
+ background-color: #f5f5f7 !important;
73
+ border-radius: 18px !important;
74
+ padding: 0.8rem 1.2rem !important;
75
+ box-shadow: 0 1px 2px rgba(0,0,0,0.05);
76
+ color: #1d1d1f !important;
77
+ font-size: 1.0rem !important;
78
+ line-height: 1.5 !important;
79
+ }
80
+ .stChatMessage[data-testid="user-message"] [data-testid="stChatMessageContent"] {
81
+ background-color: #0071e3 !important;
82
+ color: white !important;
83
+ }
84
+
85
+ div[data-testid="stVerticalBlock"] > [style*="flex-direction: column;"] > [data-testid="stVerticalBlock"] {
86
+ background-color: #ffffff;
87
+ border: 1px solid rgba(0,0,0,0.08);
88
+ border-radius: 16px !important;
89
+ padding: 20px !important;
90
+ box-shadow: 0 4px 12px rgba(0,0,0,0.03);
91
+ transition: transform 0.2s ease, box-shadow 0.2s ease;
92
+ margin-bottom: 15px;
93
+ }
94
+
95
+ div[data-testid="stVerticalBlock"] > [style*="flex-direction: column;"] > [data-testid="stVerticalBlock"]:hover {
96
+ transform: translateY(-2px);
97
+ box-shadow: 0 10px 25px rgba(0,0,0,0.08);
98
+ border-color: rgba(0,0,0,0.15);
99
+ }
100
+
101
+ .stButton > button {
102
+ border-radius: 12px !important;
103
+ font-weight: 500 !important;
104
+ border: 1px solid #d2d2d7 !important;
105
+ background-color: #ffffff !important;
106
+ color: #1d1d1f !important;
107
+ padding: 0.6rem 1rem !important;
108
+ transition: all 0.2s !important;
109
+ box-shadow: 0 1px 3px rgba(0,0,0,0.05);
110
+ width: 100%;
111
+ }
112
+ .stButton > button:hover {
113
+ background-color: #f5f5f7 !important;
114
+ border-color: #c0c0c5 !important;
115
+ transform: translateY(-1px);
116
+ box-shadow: 0 2px 6px rgba(0,0,0,0.1);
117
+ }
118
+
119
+ .stTextInput input {
120
+ border-radius: 12px !important;
121
+ border: 1px solid #d2d2d7 !important;
122
+ padding: 12px 15px !important;
123
+ font-size: 1rem !important;
124
+ background-color: rgba(255,255,255,0.8) !important;
125
+ backdrop-filter: blur(10px);
126
+ color: #1d1d1f !important;
127
+ box-shadow: 0 1px 3px rgba(0,0,0,0.05);
128
+ }
129
+ .stTextInput input:focus {
130
+ border-color: #0071e3 !important;
131
+ box-shadow: 0 0 0 4px rgba(0,113,227,0.15) !important;
132
+ }
133
+ div.stChatInputContainer {
134
+ padding-top: 15px;
135
+ background-color: #ffffff;
136
+ padding-bottom: 1rem;
137
+ }
138
+
139
+ a { color: #0071e3 !important; text-decoration: none !important; }
140
+ a:hover { text-decoration: underline !important; }
141
+
142
+ #MainMenu {visibility: hidden;}
143
+ footer {visibility: hidden;}
144
+ header {visibility: hidden;}
145
+
146
+ .quick-action-button .stButton > button {
147
+ border-radius: 999px !important;
148
+ padding: 0.8rem 1.5rem !important;
149
+ width: auto;
150
+ }
151
+ .quick-action-button {
152
+ display: flex;
153
+ justify-content: center;
154
+ margin-top: 2rem;
155
+ gap: 15px;
156
+ flex-wrap: wrap;
157
+ }
158
+ </style>
159
+ """, unsafe_allow_html=True)
160
+
161
+ # --- Helper Functions ---
162
+
163
+
164
+ def render_projects(data):
165
+ st.markdown("### Featured Projects")
166
+ if not data:
167
+ st.info("No projects data received.")
168
+ return
169
+ cols = st.columns(2)
170
+ for i, proj in enumerate(data):
171
+ with cols[i % 2]:
172
+ with st.container(border=True):
173
+ img_path = proj.get("image_path", "")
174
+ if img_path and os.path.exists(img_path):
175
+ st.image(img_path, use_container_width=True)
176
+ else:
177
+ st.markdown(f"""<div style='height:140px; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); border-radius: 12px; display:flex; align-items:center; justify-content:center; color:#666;'>No Image</div>""", unsafe_allow_html=True)
178
+
179
+ st.markdown(f"#### {proj.get('title', 'Untitled')}")
180
+ st.caption(proj.get('type', 'Project').upper())
181
+
182
+ with st.expander("View Details"):
183
+ st.write(proj.get('description', ''))
184
+ st.markdown(
185
+ f"**Tech Stack:** {proj.get('technologies', '')}")
186
+ links = []
187
+ if proj.get('github_url'):
188
+ links.append(f"[GitHub]({proj.get('github_url')})")
189
+ if proj.get('demo_url'):
190
+ links.append(f"[Live Demo]({proj.get('demo_url')})")
191
+ if links:
192
+ st.markdown(" &nbsp;•&nbsp; ".join(links))
193
+
194
+
195
+ def render_skills(data):
196
+ st.markdown("### Skills & Expertise")
197
+ if not data:
198
+ st.info("No skills data received.")
199
+ return
200
+ for category, skills in data.items():
201
+ with st.container(border=True):
202
+ st.markdown(f"**{category}**")
203
+ badges = "".join(
204
+ [f"<span style='background:#f5f5f7; padding:4px 10px; border-radius:12px; margin:0 5px 5px 0; display:inline-block; font-size:0.85rem;'>{s}</span>" for s in skills])
205
+ st.markdown(badges, unsafe_allow_html=True)
206
+
207
+
208
+ def render_articles(data):
209
+ st.markdown("### Articles")
210
+ if not data:
211
+ st.info("No articles found.")
212
+ return
213
+ for item in data:
214
+ with st.container(border=True):
215
+ st.markdown(f"**{item.get('title', 'Untitled')}**")
216
+ st.markdown(
217
+ f"<p style='color:#666; font-size:0.9rem;'>{item.get('description', '')}</p>", unsafe_allow_html=True)
218
+ if item.get('url'):
219
+ st.markdown(f"[Read Article ›]({item['url']})")
220
+
221
+
222
+ def render_videos(data):
223
+ st.markdown("### Video Tutorials")
224
+ if not data:
225
+ st.info("No videos found.")
226
+ return
227
+ cols = st.columns(2)
228
+ for i, item in enumerate(data):
229
+ with cols[i % 2]:
230
+ with st.container(border=True):
231
+ thumb = item.get('thumbnail_url', "")
232
+ if thumb and os.path.exists(thumb):
233
+ st.image(thumb, use_container_width=True)
234
+ st.markdown(f"**{item.get('title', 'Untitled')}**")
235
+ st.markdown(
236
+ f"<p style='color:#666; font-size:0.9rem;'>{item.get('description', '')}</p>", unsafe_allow_html=True)
237
+ if item.get('url'):
238
+ st.markdown(f"[Watch on YouTube ›]({item['url']})")
239
+
240
+
241
+ def render_research(data):
242
+ st.markdown("### Research")
243
+ if not data:
244
+ st.info("No research found.")
245
+ return
246
+ for item in data:
247
+ with st.container(border=True):
248
+ st.markdown(f"**{item.get('title', 'Untitled')}**")
249
+ st.markdown(
250
+ f"<p style='color:#666; font-size:0.9rem;'>{item.get('description', '')}</p>", unsafe_allow_html=True)
251
+ if item.get('url'):
252
+ st.markdown(f"[View Publication ›]({item['url']})")
253
+
254
+
255
+ def render_certifications(data):
256
+ st.markdown("### Certifications")
257
+ if not data:
258
+ st.info("No certifications found.")
259
+ return
260
+ for item in data:
261
+ st.markdown(f"""
262
+ <div style='display:flex; align-items:center; margin-bottom:10px;'>
263
+ <span style='font-size:1.2rem; margin-right:10px;'>🎖️</span>
264
+ <span style='font-size:1rem; font-weight:500;'>{item}</span>
265
+ </div>
266
+ """, unsafe_allow_html=True)
267
+
268
+ # --- NEW: Resume Renderer ---
269
+
270
+
271
+ def render_resume(data):
272
+ st.markdown("### 📄 Resume / CV")
273
+
274
+ col1, col2 = st.columns([1, 2])
275
+
276
+ with col1:
277
+ preview_path = data.get("preview_image", "")
278
+ if preview_path and os.path.exists(preview_path):
279
+ st.image(preview_path, caption="Preview", use_container_width=True)
280
+ else:
281
+ st.markdown("""
282
+ <div style="height: 200px; background-color: #f5f5f7; border-radius: 12px; display: flex; align-items: center; justify-content: center;">
283
+ <span style="font-size: 3rem;">📄</span>
284
+ </div>
285
+ """, unsafe_allow_html=True)
286
+
287
+ with col2:
288
+ st.markdown(f"#### {data.get('title', 'Resume')}")
289
+ st.write(data.get('description', ''))
290
+
291
+ pdf_path = data.get("file_path", "")
292
+ if pdf_path and os.path.exists(pdf_path):
293
+ with open(pdf_path, "rb") as pdf_file:
294
+ pdf_bytes = pdf_file.read()
295
+
296
+ st.download_button(
297
+ label="📥 Download Resume (PDF)",
298
+ data=pdf_bytes,
299
+ file_name="Tharushika_Abedheera_Resume.pdf",
300
+ mime="application/pdf",
301
+ )
302
+ else:
303
+ st.error("Resume file not found.")
304
+
305
+
306
+ def render_content(data):
307
+ st.markdown("### Content & Research")
308
+ if not data:
309
+ return
310
+ tab1, tab2, tab3 = st.tabs(["Articles", "Videos", "Research"])
311
+ with tab1:
312
+ render_articles(data.get('articles', []))
313
+ with tab2:
314
+ render_videos(data.get('videos', []))
315
+ with tab3:
316
+ render_research(data.get('research', []))
317
+
318
+ # --- Centralized Chat Logic Function ---
319
+
320
+
321
+ def process_chat_message(prompt):
322
+ with st.spinner("Processing..."):
323
+ try:
324
+ response = requests.post(API_URL, json={"message": prompt})
325
+ if response.status_code == 200:
326
+ api_data = response.json()
327
+ st.session_state.last_exchange = {
328
+ "user_query": prompt,
329
+ "ai_response": api_data.get("response", ""),
330
+ "tool_code": api_data.get("tool_code"),
331
+ "tool_data": api_data.get("tool_data")
332
+ }
333
+ else:
334
+ st.error(f"Backend Error: {response.status_code}")
335
+ except Exception as e:
336
+ st.error(f"Connection Failed: {e}")
337
+ st.rerun()
338
+
339
+ # --- Main Layout ---
340
+
341
+
342
+ if "last_exchange" not in st.session_state:
343
+ st.session_state.last_exchange = None
344
+
345
+ # --- Top Section: Profile and Introduction ---
346
+ if not st.session_state.last_exchange:
347
+ st.markdown("<div class='centered-profile-pic'>", unsafe_allow_html=True)
348
+ profile_pic_path = "images/profile.png"
349
+ if os.path.exists(profile_pic_path):
350
+ st.image(profile_pic_path, width=160)
351
+ else:
352
+ st.markdown(f"""<div style='height:160px; width:160px; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); border-radius: 20px; display:flex; align-items:center; justify-content:center; color:#666; font-size:0.8rem; margin: 0 auto;'>Add profile.png</div>""", unsafe_allow_html=True)
353
+ st.markdown("</div>", unsafe_allow_html=True)
354
+
355
+ st.markdown("<div class='centered-title'>", unsafe_allow_html=True)
356
+ st.markdown("<h1>Hey, I'm Tharushika 👋</h1>", unsafe_allow_html=True)
357
+ st.markdown("<h2>Machine Learning Engineer</h2>", unsafe_allow_html=True)
358
+ st.markdown("</div>", unsafe_allow_html=True)
359
+
360
+ st.markdown("<div class='quick-action-button'>", unsafe_allow_html=True)
361
+ if st.button("Me"):
362
+ process_chat_message("Tell me about yourself")
363
+ if st.button("Projects"):
364
+ process_chat_message("Show me your projects")
365
+ if st.button("Skills"):
366
+ process_chat_message("What are your skills?")
367
+ if st.button("Contact"):
368
+ process_chat_message("How can I contact you?")
369
+ st.markdown("</div>", unsafe_allow_html=True)
370
+
371
+ if prompt := st.chat_input("Ask me anything..."):
372
+ process_chat_message(prompt)
373
+
374
+ # --- Conversation Area ---
375
+ if st.session_state.last_exchange:
376
+ exchange = st.session_state.last_exchange
377
+
378
+ with st.chat_message("user"):
379
+ st.write(exchange["user_query"])
380
+
381
+ with st.chat_message("assistant"):
382
+ st.write(exchange["ai_response"])
383
+
384
+ tool_code = exchange.get("tool_code")
385
+ tool_data = exchange.get("tool_data")
386
+
387
+ if tool_code == "show_projects":
388
+ render_projects(tool_data)
389
+ elif tool_code == "show_skills":
390
+ render_skills(tool_data)
391
+ elif tool_code == "show_content":
392
+ render_content(tool_data)
393
+ elif tool_code == "show_videos":
394
+ render_videos(tool_data)
395
+ elif tool_code == "show_articles":
396
+ render_articles(tool_data)
397
+ elif tool_code == "show_research":
398
+ render_research(tool_data)
399
+ elif tool_code == "show_certifications":
400
+ render_certifications(tool_data)
401
+ elif tool_code == "show_resume":
402
+ render_resume(tool_data) # <--- RESUME HANDLER ADDED
403
+
404
+ if prompt := st.chat_input("Ask for more details..."):
405
+ process_chat_message(prompt)
backend/database.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ import json
3
+ import os
4
+ from typing import List, Dict, Any, Union
5
+
6
+ DB_PATH = os.path.join(os.path.dirname(
7
+ os.path.abspath(__file__)), "portfolio.sqlite")
8
+
9
+
10
+ def query_sqlite(table_name: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
11
+ """Fetches all rows from a SQLite table and formats them."""
12
+ try:
13
+ conn = sqlite3.connect(DB_PATH)
14
+ conn.row_factory = sqlite3.Row
15
+ cursor = conn.cursor()
16
+
17
+ cursor.execute(f"SELECT * FROM {table_name}")
18
+ rows = [dict(row) for row in cursor.fetchall()]
19
+ conn.close()
20
+
21
+ if table_name == "skills":
22
+ formatted_skills = {}
23
+ for row in rows:
24
+ try:
25
+ # Parse the JSON string back to a list
26
+ skills_array = json.loads(row['skill_list'])
27
+ formatted_skills[row['category']] = skills_array
28
+ except:
29
+ formatted_skills[row['category']] = []
30
+ return formatted_skills
31
+
32
+ if table_name == "certifications":
33
+ return [row['name'] for row in rows]
34
+
35
+ return rows
36
+ except Exception as e:
37
+ print(f"Database Error: {e}")
38
+ return []
backend/main.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import google.generativeai as genai
4
+ from fastapi import FastAPI, HTTPException
5
+ from fastapi.middleware.cors import CORSMiddleware
6
+ from pydantic import BaseModel
7
+ from typing import List, Dict, Any, Union, Optional
8
+ from dotenv import load_dotenv
9
+
10
+ # Import local modules using relative imports
11
+ # This requires running the app as 'uvicorn backend.main:app ...'
12
+ from . import rag
13
+ from . import tools
14
+
15
+ # 1. Load Environment Variables
16
+ # 'load_dotenv()' works for local dev. On Render/Cloud, it uses system env vars.
17
+ load_dotenv()
18
+
19
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
20
+
21
+ if not GEMINI_API_KEY:
22
+ print("⚠️ WARNING: GEMINI_API_KEY not found. Check your .env or Cloud Dashboard.")
23
+ # In production, we might want to crash if no key is found
24
+ # sys.exit(1)
25
+
26
+ genai.configure(api_key=GEMINI_API_KEY)
27
+
28
+ # Use the fast, free-tier friendly model
29
+ MODEL_NAME = 'gemini-1.5-flash'
30
+
31
+ # --- API Models ---
32
+
33
+
34
+ class ChatRequest(BaseModel):
35
+ message: str
36
+
37
+
38
+ class ChatResponse(BaseModel):
39
+ response: str
40
+ tool_code: Optional[str] = None
41
+ tool_data: Optional[Union[Dict[str, Any], List[Any]]] = None
42
+
43
+
44
+ # --- FastAPI App Setup ---
45
+ app = FastAPI(
46
+ title="Tharushika's AI Portfolio API",
47
+ description="Backend for AI Portfolio using Gemini & RAG",
48
+ version="1.0.0"
49
+ )
50
+
51
+ # --- CORS MIDDLEWARE (Critical for React) ---
52
+ app.add_middleware(
53
+ CORSMiddleware,
54
+ # In production, replace ["*"] with your actual frontend URL (e.g., ["https://my-portfolio.vercel.app"])
55
+ allow_origins=["*"],
56
+ allow_credentials=True,
57
+ allow_methods=["*"], # Allows all methods (GET, POST, etc.)
58
+ allow_headers=["*"], # Allows all headers
59
+ )
60
+
61
+
62
+ @app.on_event("startup")
63
+ async def startup():
64
+ """Initialize the RAG system when the server boots up."""
65
+ rag.initialize_rag()
66
+
67
+ # --- Endpoints ---
68
+
69
+
70
+ @app.get("/")
71
+ def health_check():
72
+ """Simple check to see if the API is running."""
73
+ return {"status": "ok", "message": "Portfolio API is live"}
74
+
75
+
76
+ @app.post("/chat", response_model=ChatResponse)
77
+ async def chat_endpoint(request: ChatRequest):
78
+ user_msg = request.message
79
+
80
+ # 1. Initialize Gemini with our Toolkit
81
+ model = genai.GenerativeModel(MODEL_NAME, tools=tools.ALL_TOOLS_LIST)
82
+
83
+ try:
84
+ # 2. Ask Gemini the question
85
+ # We don't provide RAG context yet; we let the model decide if it needs a Tool first.
86
+ chat = model.start_chat(enable_automatic_function_calling=False)
87
+ response = chat.send_message(user_msg)
88
+
89
+ # 3. Check for Function Calls (The "Router")
90
+ function_call = None
91
+ if response.parts:
92
+ for part in response.parts:
93
+ if part.function_call:
94
+ function_call = part.function_call
95
+ break
96
+
97
+ # --- PATH A: Tool Triggered (Structured Data) ---
98
+ if function_call:
99
+ tool_name = function_call.name
100
+ print(f"🛠️ Tool Triggered: {tool_name}")
101
+
102
+ if tool_name in tools.TOOL_FUNCTIONS:
103
+ # A. Execute the Python function (e.g., query SQL)
104
+ data = tools.TOOL_FUNCTIONS[tool_name]()
105
+
106
+ # B. Get the frontend signal code (e.g., "show_projects")
107
+ code = tools.TOOL_CODE_MAP.get(tool_name)
108
+
109
+ # C. Generate a polite intro message using RAG context
110
+ # We retrieve a bit of context so the intro sounds personal
111
+ context_chunks = rag.retrieve_context(user_msg, k=2)
112
+ context_text = "\n".join(context_chunks)
113
+
114
+ intro_prompt = f"""
115
+ The user asked: '{user_msg}'.
116
+ You just triggered the tool '{tool_name}' to show them visual data.
117
+ Write a very short (1 sentence), enthusiastic intro for this data.
118
+ Use this context if relevant: {context_text}
119
+ """
120
+
121
+ intro_model = genai.GenerativeModel(MODEL_NAME)
122
+ intro_resp = intro_model.generate_content(intro_prompt)
123
+ intro_text = intro_resp.text.strip()
124
+
125
+ return ChatResponse(
126
+ response=intro_text,
127
+ tool_code=code,
128
+ tool_data=data
129
+ )
130
+
131
+ # --- PATH B: Pure RAG Chat (Unstructured Context) ---
132
+ print("🧠 RAG Path Triggered")
133
+
134
+ # 1. Retrieve relevant text chunks from our Vector DB
135
+ context_chunks = rag.retrieve_context(user_msg, k=4)
136
+ context_text = "\n\n".join(context_chunks)
137
+
138
+ # 2. Construct the Prompt
139
+ rag_prompt = f"""
140
+ You are an AI assistant for Tharushika Abedheera's portfolio.
141
+ Your goal is to answer the user's question professionally and confidently, acting as Tharushika.
142
+
143
+ STRICT RULES:
144
+ - Use ONLY the context provided below.
145
+ - If the answer isn't in the context, say "I don't have that specific info right now, but feel free to ask about my projects or skills!"
146
+ - Keep answers concise (under 3-4 sentences) unless asked for detail.
147
+
148
+ CONTEXT FROM KNOWLEDGE BASE:
149
+ {context_text}
150
+
151
+ USER QUESTION:
152
+ {user_msg}
153
+ """
154
+
155
+ # 3. Generate Answer
156
+ rag_model = genai.GenerativeModel(MODEL_NAME)
157
+ text_response = rag_model.generate_content(rag_prompt).text
158
+
159
+ return ChatResponse(response=text_response, tool_code=None, tool_data=None)
160
+
161
+ except Exception as e:
162
+ print(f"❌ Error handling chat: {e}")
163
+ # In production, avoid sending raw error details to the client for security
164
+ raise HTTPException(status_code=500, detail="Internal Server Error")
backend/portfolio.sqlite ADDED
Binary file (65.5 kB). View file
 
backend/rag.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ import faiss
4
+ import numpy as np
5
+ import google.generativeai as genai
6
+ import traceback
7
+ from dotenv import load_dotenv
8
+
9
+ # --- 1. Force Load API Key ---
10
+ load_dotenv()
11
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
12
+
13
+ if not GEMINI_API_KEY:
14
+ print("⚠️ WARNING: GEMINI_API_KEY not found in rag.py environment.")
15
+ else:
16
+ genai.configure(api_key=GEMINI_API_KEY)
17
+
18
+ # Paths
19
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
20
+ VECTOR_STORE_DIR = os.path.join(BASE_DIR, "vector_store")
21
+ INDEX_PATH = os.path.join(VECTOR_STORE_DIR, "faiss_index.bin")
22
+ METADATA_PATH = os.path.join(VECTOR_STORE_DIR, "chunks_metadata.pkl")
23
+
24
+ # API Config
25
+ EMBEDDING_MODEL = "models/text-embedding-004"
26
+
27
+ # Global Components
28
+ faiss_index = None
29
+ chunks = []
30
+
31
+
32
+ def initialize_rag():
33
+ global faiss_index, chunks
34
+
35
+ print("--- RAG INITIALIZATION ---")
36
+ if not os.path.exists(INDEX_PATH) or not os.path.exists(METADATA_PATH):
37
+ print(f"CRITICAL: Index files not found at {VECTOR_STORE_DIR}")
38
+ return
39
+
40
+ try:
41
+ faiss_index = faiss.read_index(INDEX_PATH)
42
+ with open(METADATA_PATH, "rb") as f:
43
+ data = pickle.load(f)
44
+ chunks = data['chunks']
45
+ print(f"✅ RAG Loaded. {len(chunks)} chunks indexed.")
46
+ except Exception as e:
47
+ print(f"❌ Error loading RAG files: {e}")
48
+
49
+
50
+ def retrieve_context(query: str, k: int = 2):
51
+ """Retrieves text chunks using Gemini Embeddings."""
52
+ if not faiss_index:
53
+ print("⚠️ RAG Retrieval Skipped: Index not loaded.")
54
+ return []
55
+
56
+ try:
57
+ # 1. Get embedding from API
58
+ result = genai.embed_content(
59
+ model=EMBEDDING_MODEL,
60
+ content=query,
61
+ task_type="retrieval_query"
62
+ )
63
+
64
+ # 2. Convert to Numpy
65
+ query_vec = np.array([result['embedding']]).astype("float32")
66
+
67
+ # 3. Check Dimensions (Debug Step)
68
+ if faiss_index.d != query_vec.shape[1]:
69
+ print(
70
+ f"❌ DIMENSION MISMATCH: Index expects {faiss_index.d}, but Query is {query_vec.shape[1]}")
71
+ print(
72
+ "SOLUTION: Delete backend/vector_store and run create_vector_db.py again.")
73
+ return []
74
+
75
+ # 4. Search FAISS
76
+ distances, indices = faiss_index.search(query_vec, k)
77
+
78
+ retrieved_text = []
79
+ for i in indices[0]:
80
+ if i != -1 and i < len(chunks):
81
+ retrieved_text.append(chunks[i])
82
+
83
+ return retrieved_text
84
+
85
+ except Exception as e:
86
+ print(f"❌ RAG ERROR: {e}")
87
+ traceback.print_exc() # Prints the full error to the terminal
88
+ return []
backend/tools.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import google.generativeai as genai
2
+ from .database import query_sqlite
3
+
4
+ # --- Python Implementation of Tools ---
5
+
6
+
7
+ def get_projects(): return query_sqlite("projects")
8
+ def get_skills(): return query_sqlite("skills")
9
+ def get_certifications(): return query_sqlite("certifications")
10
+ def get_articles(): return query_sqlite("articles")
11
+ def get_videos(): return query_sqlite("videos")
12
+ def get_research(): return query_sqlite("research")
13
+
14
+
15
+ def get_content():
16
+ return {
17
+ "articles": query_sqlite("articles"),
18
+ "videos": query_sqlite("videos"),
19
+ "research": query_sqlite("research")
20
+ }
21
+
22
+
23
+ def get_resume():
24
+ """Returns metadata for the resume file."""
25
+ return {
26
+ "file_path": "images/resume.pdf",
27
+ "preview_image": "images/resume_preview.png",
28
+ "title": "Tharushika Abedheera - CV",
29
+ "description": "Machine Learning Engineer & AI Specialist"
30
+ }
31
+
32
+
33
+ # Mapping for execution
34
+ TOOL_FUNCTIONS = {
35
+ "get_projects": get_projects,
36
+ "get_skills": get_skills,
37
+ "get_certifications": get_certifications,
38
+ "get_articles": get_articles,
39
+ "get_videos": get_videos,
40
+ "get_research": get_research,
41
+ "get_content": get_content,
42
+ "get_resume": get_resume # <--- Added
43
+ }
44
+
45
+ # Mapping for Frontend Action Codes
46
+ TOOL_CODE_MAP = {
47
+ "get_projects": "show_projects",
48
+ "get_skills": "show_skills",
49
+ "get_certifications": "show_certifications",
50
+ "get_articles": "show_articles",
51
+ "get_videos": "show_videos",
52
+ "get_research": "show_research",
53
+ "get_content": "show_content",
54
+ "get_resume": "show_resume" # <--- Added
55
+ }
56
+
57
+ # --- Gemini Schema Definitions ---
58
+
59
+ projects_tool = genai.protos.FunctionDeclaration(
60
+ name="get_projects",
61
+ description="Retrieve the full list of Tharushika's projects.",
62
+ parameters=genai.protos.Schema(
63
+ type=genai.protos.Type.OBJECT, properties={}, required=[])
64
+ )
65
+
66
+ skills_tool = genai.protos.FunctionDeclaration(
67
+ name="get_skills",
68
+ description="Retrieve technical skills.",
69
+ parameters=genai.protos.Schema(
70
+ type=genai.protos.Type.OBJECT, properties={}, required=[])
71
+ )
72
+
73
+ certifications_tool = genai.protos.FunctionDeclaration(
74
+ name="get_certifications",
75
+ description="Retrieve certifications.",
76
+ parameters=genai.protos.Schema(
77
+ type=genai.protos.Type.OBJECT, properties={}, required=[])
78
+ )
79
+
80
+ articles_tool = genai.protos.FunctionDeclaration(
81
+ name="get_articles",
82
+ description="Retrieve articles.",
83
+ parameters=genai.protos.Schema(
84
+ type=genai.protos.Type.OBJECT, properties={}, required=[])
85
+ )
86
+
87
+ videos_tool = genai.protos.FunctionDeclaration(
88
+ name="get_videos",
89
+ description="Retrieve videos.",
90
+ parameters=genai.protos.Schema(
91
+ type=genai.protos.Type.OBJECT, properties={}, required=[])
92
+ )
93
+
94
+ research_tool = genai.protos.FunctionDeclaration(
95
+ name="get_research",
96
+ description="Retrieve research.",
97
+ parameters=genai.protos.Schema(
98
+ type=genai.protos.Type.OBJECT, properties={}, required=[])
99
+ )
100
+
101
+ content_tool = genai.protos.FunctionDeclaration(
102
+ name="get_content",
103
+ description="Retrieve ALL content (articles, videos, research).",
104
+ parameters=genai.protos.Schema(
105
+ type=genai.protos.Type.OBJECT, properties={}, required=[])
106
+ )
107
+
108
+ resume_tool = genai.protos.FunctionDeclaration(
109
+ name="get_resume",
110
+ description="Retrieve Tharushika's official resume/CV. Use this when the user asks to see, download, or get a copy of the resume.",
111
+ parameters=genai.protos.Schema(
112
+ type=genai.protos.Type.OBJECT, properties={}, required=[])
113
+ )
114
+
115
+ # List to pass to the model
116
+ ALL_TOOLS_LIST = [
117
+ projects_tool, skills_tool, certifications_tool,
118
+ articles_tool, videos_tool, research_tool, content_tool,
119
+ resume_tool # <--- Added
120
+ ]
backend/vector_store/chunks_metadata.pkl ADDED
Binary file (6.95 kB). View file
 
backend/vector_store/faiss_index.bin ADDED
Binary file (61.5 kB). View file
 
data/articles.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "title": "From PDF to API: How I Built a Hybrid RAG Insights Engine for Laptop Specs",
4
+ "description": "A deep dive into how I created a dynamic and static data-based RAG system for a business.",
5
+ "url": "https://www.linkedin.com/pulse/from-pdf-api-how-i-built-hybrid-rag-insights-engine-laptop-abedheera-hwwoc",
6
+ "type": "article"
7
+ },
8
+ {
9
+ "title": "How to Choose the Best Machine Learning Algorithm for Sentiment Analysis",
10
+ "description": "A guide on how to choose the best machine learning algorithm for sentiment analysis.",
11
+ "url": "https://www.linkedin.com/pulse/how-choose-best-machine-learning-algorithm-sentiment-abedheera-mvctc",
12
+ "type": "article"
13
+ },
14
+ {
15
+ "title": "Parallel RAG with LangChain: Three Vector DBs, One Personality.",
16
+ "description": "Lessons learned building an 'Uncle Iroh' mental healthcare assistant. Covers diverse knowledge sourcing, chunking strategies, and parallel retrievers.",
17
+ "url": "https://www.linkedin.com/pulse/parallel-rag-langchain-three-vector-dbs-one-tharushika-abedheera-tognc",
18
+ "type": "article"
19
+ },
20
+ {
21
+ "title": "A Gentle Introduction to LSTMs (Long Short-Term Memory Networks)",
22
+ "description": "Explore what LSTMs are, why they were created, and how they help machines understand sequences.",
23
+ "url": "https://www.linkedin.com/pulse/gentle-introduction-lstms-long-short-term-memory-tharushika-abedheera-irvzc",
24
+ "type": "article"
25
+ },
26
+ {
27
+ "title": "How Does L1 (Lasso) and L2 (Ridge) Regularization Work?",
28
+ "description": "Explore L1 (Lasso) and L2 (Ridge) regularization techniques to improve model performance by addressing overfitting.",
29
+ "url": "https://www.linkedin.com/pulse/how-does-l1-lasso-l2-ridge-regularization-work-tharushika-abedheera-upyvc",
30
+ "type": "article"
31
+ },
32
+ {
33
+ "title": "ROUGE Score: A Key Metric for Evaluating Text Summarisation Models",
34
+ "description": "Introduction to how ROUGE scores provide valuable insights into how well a model has performed in generating summaries.",
35
+ "url": "https://www.linkedin.com/pulse/rouge-score-key-metric-evaluating-text-summarisation-models-qirwc",
36
+ "type": "article"
37
+ },
38
+ {
39
+ "title": "SQuAD Metrics: Evaluating Question-Answering Models Effectively",
40
+ "description": "Introduction to the SQuAD metric, an essential tool for evaluating question-answering models in NLP.",
41
+ "url": "https://www.linkedin.com/pulse/how-does-l1-lasso-l2-ridge-regularization-work-tharushika-abedheera-upyvc",
42
+ "type": "article"
43
+ }
44
+ ]
data/certifications.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ { "name": "Azure Certified AI Engineer Associate (in progress)" },
3
+ { "name": "Astronomer Certification DAG Authoring for Apache Airflow 3" },
4
+ { "name": "LinkedIn- Vector Databases for Caching and Retrieval Augmented Generation (RAG)" },
5
+ { "name": "LinkedIn- Building AI Agents with AutoGen" },
6
+ { "name": "LinkedIn- Building NLP Apps with Hugging Face Transformers" },
7
+ { "name": "LinkedIn- Agentic AI for Developers: Concepts and Application for Enterprises" },
8
+ { "name": "LinkedIn- Getting Started with Hugging Face Transformers ." }
9
+ ]
data/education.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "uni_plymouth",
4
+ "degree": "Bachelor of Science (Honours) in Software Engineering",
5
+ "institution": "University of Plymouth",
6
+ "details": "Focused on software engineering principles and AI applications."
7
+ },
8
+ {
9
+ "id": "badulla_college",
10
+ "degree": "GCE Advanced level",
11
+ "institution": "Badulla central collage",
12
+ "details": "Includes an 'A' pass for English Language."
13
+ }
14
+ ]
data/experience.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "nextgen_intern",
4
+ "role": "Data Science and Machine Learning Engineer intern",
5
+ "company": "NextGen CodeX",
6
+ "duration": "2025/10/15 - Present",
7
+ "description": "Internship focused on advanced Data Science and ML applications."
8
+ },
9
+ {
10
+ "id": "intelligen_freelance",
11
+ "role": "Freelance Machine Learning Engineer",
12
+ "company": "Intelli-Gen AI",
13
+ "duration": "2024/02/10 - Present",
14
+ "description": "Freelance work developing custom ML solutions and AI integrations."
15
+ }
16
+ ]
data/profile.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "contact": {
3
+ "name": "A.A.D. Tharushika Abedheera",
4
+ "role": "Machine Learning Engineer / AI Engineer",
5
+ "email": "tharushika280@gmail.com",
6
+ "phone": "07655753551",
7
+ "location": "Badulla, Sri Lanka",
8
+ "linkedin": "https://www.linkedin.com/in/tharushika-abedheera-3396311a4/",
9
+ "github": "https://github.com/tharu280",
10
+ "medium": "https://medium.com/@tharushika280"
11
+ },
12
+ "summary": "Machine Learning Engineer with a research-driven approach, specializing in NLP, Generative AI, and AI agent frameworks. Passionate about transforming AI/ML research into real-world applications, with hands-on experience and expertise in Machine learning, Deep learning, large language models (LLMs), retrieval-augmented generation (RAG), and autonomous AI systems. Adept at designing, deploying, and optimizing AI-driven systems to enhance scalability and innovation."
13
+ }
data/projects.json ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "rag-book-recommender",
4
+ "title": "RAG-based Advanced Book Recommendation System",
5
+ "type": "AI/ML",
6
+ "description": "Built a personalized book recommender using LangChain, HuggingFace Sentence Transformers, and ChromaDB, with zero-shot classification to auto-fill missing categories and enrich recommendations. Deployed the system using FastAPI, Docker, and Nginx on AWS Elastic Beanstalk for scalable, low-latency access.",
7
+ "technologies": ["Python", "Numpy", "Pandas", "HuggingFace Transformers", "LangChain", "ChromaDB", "FastAPI", "Docker", "Nginx", "AWS Elastic Beanstalk"],
8
+ "github_url": "https://github.com/tharu280",
9
+ "demo_url": null,
10
+ "image_path": "images/project_book.png"
11
+ },
12
+ {
13
+ "id": "eyecon",
14
+ "title": "EyeCon – Real-Time Blink Communication System",
15
+ "type": "AI/ML",
16
+ "description": "Developed the first system enabling fully paralysed users to communicate without any wearable devices, using only eye blinks detected via webcam. Implemented Eye Aspect Ratio (EAR)–based blink detection to differentiate short (next candidate) and long blinks (select candidate), achieving accurate, real-time interpretation of user intent. Integrated a Gemini LLM for context-aware candidate word suggestions, dynamically updating options as the sentence forms. Enabled automatic summarisation of composed sentences, providing users with coherent text output and facilitating streamlined communication.",
17
+ "technologies": ["Python", "OpenCV", "MediaPipe", "Kivy", "Custom Morse Decoder", "Gemini LLM", "Contextual Candidate Selection", "FastAPI"],
18
+ "github_url": "https://github.com/tharu280",
19
+ "demo_url": null,
20
+ "image_path": "images/project_eyecon.png"
21
+ },
22
+ {
23
+ "id": "youtube-analytics",
24
+ "title": "End-to-End YouTube Comment Analytics Pipeline",
25
+ "type": "Data Engineering",
26
+ "description": "Built an AI-powered pipeline to extract, classify, and visualize YouTube comments. Achieved 95.46% accuracy by fine-tuning BERT; also developed a BiLSTM classifier (90.75%); hosted the best model on HuggingFace Hub. Integrated Azure services (SQL Serverless, Data Factory, Power BI) for automated data flow, storage, and interactive visualisations.",
27
+ "technologies": ["Python", "TensorFlow", "Keras", "TF-IDF", "BiLSTM", "BERT", "HuggingFace", "FastAPI", "Docker", "YouTube API", "Azure SQL Serverless", "Azure Data Factory", "Power BI", "Azure Data Studio"],
28
+ "github_url": "https://github.com/tharu280",
29
+ "demo_url": null,
30
+ "image_path": "images/project_youtube.png"
31
+ },
32
+ {
33
+ "id": "mental-health-chatbot",
34
+ "title": "Comic Character-based RAG Mental Healthcare Assistant Chatbot",
35
+ "type": "AI/ML",
36
+ "description": "Built a RAG-based chatbot modelled after “Uncle Iroh” to provide mental health support using multi-source wisdom and personalized tone. Implemented 3 FAISS stores, with RunnableParallel & Sequential chains -cutting latency from 3.9s to 1.5s accurate and combined context retrieval. Optimised with Redis memory, LangSmith monitoring, and FastAPI; voice cloning via ElevenLabs in progress.",
37
+ "technologies": ["Python", "LangChain", "HuggingFace Transformers", "Gemini 1.5 Flash", "FAISS", "Redis", "LangSmith", "ElevenLabs", "FastAPI", "Pydantic"],
38
+ "github_url": "https://github.com/tharu280",
39
+ "demo_url": null,
40
+ "image_path": "images/project_chatbot.png"
41
+ },
42
+ {
43
+ "id": "cafe-chatbot",
44
+ "title": "AI-Powered RAG Chatbot for Cafe Business",
45
+ "type": "Cloud AI",
46
+ "description": "Built a Retrieval-Augmented Generation (RAG) chatbot using Amazon Bedrock, OpenSearch Serverless, and S3 to answer customer queries based on internal café documents. Preprocessed and embedded data for semantic search; integrated foundation models with prompt engineering for contextual responses. Designed a scalable, secure deployment with proper IAM configuration and AWS-native services to ensure real-world readiness.",
47
+ "technologies": ["AWS Bedrock", "AWS OpenSearch Serverless", "AWS S3", "Llama 3 70B", "Titan Text G1 – Lite"],
48
+ "github_url": "https://github.com/tharu280",
49
+ "demo_url": null,
50
+ "image_path": "images/project_cafe.png"
51
+ },
52
+ {
53
+ "id": "ai-coding-agent",
54
+ "title": "AI Coding Agent – V (Open Source Project)",
55
+ "type": "Agentic AI",
56
+ "description": "Designed and developed an AI coding agent demonstrating Agentic AI design patterns including Tool Use and Reflection Loop. Integrated Google Gemini’s function calling to perform grounded code operations such as reading, writing, and executing Python files. Implemented a 20-iteration reflection loop where the agent plans, acts, critiques, and improves results until completion.",
57
+ "technologies": ["Python", "Google Gemini API", "Gemini SDK", "Pydantic", "Agentic design patterns"],
58
+ "github_url": "https://github.com/tharu280",
59
+ "demo_url": null,
60
+ "image_path": "images/project_agent.png"
61
+ },
62
+ {
63
+ "id": "laptop-intel-engine",
64
+ "title": "Cross-Marketplace Laptop & Review Intelligence Engine",
65
+ "type": "AI/ML",
66
+ "description": "Developed an insights engine to analyse and compare laptops by integrating two distinct data sources: canonical static technical specifications from PDFs(PSREF) and mutable, real time data (price, availability, reviews) from product pages.. Engineered a hybrid Retrieval-Augmented Generation (RAG) pipeline that fuses context from two data stores: a FAISS vector index (for semantic search on PDF specs) and a SQLite database (for structured SQL queries on dynamic market data).",
67
+ "technologies": ["Python", "FastAPI", "Streamlit", "Google Gemini API", "FAISS", "Sentence Transformers", "SQLite", "Pandas", "Pydantic"],
68
+ "github_url": "https://github.com/tharu280",
69
+ "demo_url": null,
70
+ "image_path": "images/project_laptop.png"
71
+ },
72
+ {
73
+ "id": "food-ordering-backend",
74
+ "title": "Event-Driven Backend for Food Ordering with Real-Time Fraud Detection",
75
+ "type": "Backend/ML",
76
+ "description": "Built a scalable event-driven backend using Python, Kafka, and Docker for a food ordering app with real-time processing. Modelled core events like order_placed, order_confirmed, and fraud_alert as Kafka topics across decoupled microservices. Integrated a fraud detection model into the transaction service to detect fraud in real-time and publish alerts via Kafka.",
77
+ "technologies": ["Python", "Apache Kafka", "kafka-python", "scikit-learn", "Docker", "Pydantic", "MLflow", "FastAPI"],
78
+ "github_url": "https://github.com/tharu280",
79
+ "demo_url": null,
80
+ "image_path": "images/project_food.png"
81
+ },
82
+ {
83
+ "id": "tour-planner-agent",
84
+ "title": "Tour Planner AI Agent",
85
+ "type": "Agentic AI",
86
+ "description": "Designed a hybrid AI system using LangGraph to orchestrate a multi-step agentic workflow for dynamic itinerary generation. Engineered a rule-based data ingestion pipeline to chain disparate APIs for gathering geocoding, route, and a broad set of location data. Implemented an “LLM as a Judge\" pattern that semantically analyses the user's qualitative query to rank and filter the raw data, delivering context-aware, personalized recommendations.",
87
+ "technologies": ["Python", "LangGraph", "FastAPI", "Google Gemini API", "Pydantic", "Geoapify", "OpenRouteService", "Nominatim"],
88
+ "github_url": "https://github.com/tharu280",
89
+ "demo_url": null,
90
+ "image_path": "images/project_tour.png"
91
+ }
92
+ ]
data/research.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "title": "AI Voice Assistant for Healthcare (ICIET 2023)",
4
+ "description": "Published as a conference paper at ICIET 2023. Developed an AI voice assistant using PyTorch and NLTK to handle general and healthcare-related queries.",
5
+ "url": "https://www.researchgate.net/publication/376522984_A_Deep_Learning_Approach_to_Utilize_AI_Voice_Assistants_In_Healthcare",
6
+ "type": "paper"
7
+ },
8
+ {
9
+ "title": "EL_sense: AI Solution for Human-Elephant Conflict (In Progress)",
10
+ "description": "Proposed a bioacoustics-based system for detecting low-frequency (20Hz) elephant rumbles using live audio streaming via Apache Kafka.",
11
+ "url": null,
12
+ "type": "research"
13
+ },
14
+ {
15
+ "title": "EyeCon – Real-Time Blink Communication System",
16
+ "description": "Developed the first system enabling fully paralysed users to communicate without any wearable devices, using only eye blinks.",
17
+ "url": null,
18
+ "type": "paper"
19
+ }
20
+ ]
data/skills.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "programming",
4
+ "category": "Programming",
5
+ "list": ["Python", "JavaScript", "HTML", "CSS", "C", "SQL"]
6
+ },
7
+ {
8
+ "id": "ml_dl",
9
+ "category": "Machine Learning & Deep Learning",
10
+ "list": ["Scikit-learn", "TensorFlow", "Keras", "PyTorch", "Hugging Face", "Transformers", "XGBoost", "Decision Trees", "Random Forest", "SVM", "Naïve Bayes", "KNN", "K-Means Clustering", "PCA", "Model Evaluation", "Hyper-parameter Tuning", "Pandas", "NumPy", "LSTMs", "Transformers", "LLMs", "NLTK", "CNNs", "OpenCV", "MediaPipe", "Vision Transformers (ViTs)", "Yolo"]
11
+ },
12
+ {
13
+ "id": "vector_db",
14
+ "category": "Vector Search & Databases",
15
+ "list": ["FAISS", "ANNOY", "Milvus", "ChromaDB", "LlamaIndex", "Pinecone", "Weaviate"]
16
+ },
17
+ {
18
+ "id": "databases",
19
+ "category": "Databases",
20
+ "list": ["MySQL", "MongoDB", "Firebase", "Amazon S3", "PostgreSQL"]
21
+ },
22
+ {
23
+ "id": "cloud_devops",
24
+ "category": "Cloud & CI/CD",
25
+ "list": ["AWS", "Azure", "GCP", "Docker", "GitHub Actions", "Apache Kafka", "MLflow", "LangSmith", "CI/CD pipelines", "AWS EC2"]
26
+ },
27
+ {
28
+ "id": "agentic_ai",
29
+ "category": "Agentic AI",
30
+ "list": ["AutoGen", "LlamaIndex", "Langchain", "LangGraph"]
31
+ },
32
+ {
33
+ "id": "frameworks",
34
+ "category": "Frameworks",
35
+ "list": ["FastAPI", "Flask", "Streamlit", "Tkinter", "Kivy", "React.js"]
36
+ }
37
+ ]
data/summaries/about_summary.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ "My journey into AI is fueled by a desire to solve real-world problems with tangible impact. I’m deeply passionate about Machine Learning and Artificial Intelligence and have spent the past two years learning and building hands on projects completely self driven and grounded in real world applications.I'm not just interested in model accuracy; I'm passionate about engineering systems that are robust, scalable, and genuinely useful, especially in areas like accessibility and practical RAG applications. I thrive on the entire AI lifecycle, from developing the initial mathematical concept to containerizing the final FastAPI endpoint for production. The transition from pure research to a deployed product is where I find the most exciting challenges.I’m confident in my ability to learn quickly, adapt to fast paced environments, and contribute meaningfully to your team. Every drop of my knowledge in the domain of AI ML is learned solely through free resources such as youtube on my own as i am absolutely passionate about this field."
data/summaries/articles_summary.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ “Above are some of my published articles, where I regularly write about machine learning, deep learning, NLP, RAG systems, evaluation metrics, and practical engineering lessons from real-world AI projects. My writing focuses on breaking down complex concepts, sharing hands-on insights, and documenting the architectures, techniques, and optimizations I use in my own systems. These featured articles represent only part of my work — I continue to publish more guides, explanations, and deep dives to help others understand and build effective AI/ML solutions.”
data/summaries/certifications_summary.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ “Above are some of my professional certifications, reflecting my ongoing commitment to mastering cloud platforms, AI engineering, workflow orchestration, RAG systems, and agentic AI frameworks. I consistently pursue industry-recognized credentials to stay aligned with modern best practices and deepen my expertise. These listed certifications are only part of my continuous learning journey, and I regularly work toward new ones as the field evolves.”
data/summaries/projects_summary.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ “Above are some of my most notable projects. I’ve worked across a wide range of AI and engineering areas — including machine learning, deep learning, RAG systems, agentic AI, computer vision, and scalable backend pipelines. My work spans everything from recommendation systems, analytics pipelines, intelligent assistants, and real-time agent frameworks to cloud-native, production-ready AI deployments. I focus heavily on building systems that are not only accurate and reliable, but also cost-efficient, resource-optimized, and scalable. These showcased projects represent only part of my portfolio, and I continue to work on many more innovative solutions in the AI/ML space.”
data/summaries/research_summary.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ “Above are some of my key research contributions. My work spans applied AI, computer vision, bioacoustics, and assistive technologies — including conference-published studies, novel communication systems, and ongoing experimental projects. I focus on solving real-world problems using deep learning, signal processing, and intelligent systems, with an emphasis on accessibility, healthcare, and environmental applications. These represent only part of my research journey, and I continue to explore new areas and publish work as I advance further in the field.”
data/summaries/skills_summary.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ “The skills listed above represent my core technical strengths across software engineering, machine learning, deep learning, vector search, cloud infrastructure, and agentic AI. I work extensively with modern ML frameworks, LLM ecosystems, scalable backend tools, and cloud-native architectures. My expertise spans end-to-end AI systems — from model training and evaluation to retrieval pipelines, agent frameworks, and production deployment. I continuously expand my skill set as new tools, frameworks, and AI patterns emerge, so this list reflects only a snapshot of the technologies I actively work with.”
data/summaries/videos_summary.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ “Above are some of my video tutorials, where I break down complex AI concepts into simple, easy-to-understand explanations. I create educational content focused on RAG systems, agentic AI patterns, embeddings, and practical machine learning ideas — all presented in a clear, beginner-friendly way. These videos represent only a part of what I create, and I continue to produce more content to help others learn modern AI concepts without the jargon.”
data/videos.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "01",
4
+ "title": "RAG and Embeddings Explained in Plain English",
5
+ "description": "Explaining RAG and embeddings in plain English. No jargon. No math. Just the core concepts, broken down so anyone can understand.",
6
+ "thumbnail_url": "images/video_thumbnails/01.png",
7
+ "url": "https://www.linkedin.com/posts/tharushika-abedheera-3396311a4_explaining-rag-and-embeddings-in-plain-english-activity-7379239387819274240-__h6"
8
+ },
9
+ {
10
+ "id": "02",
11
+ "title": "Reflection Design Pattern in AI Agents Explained Simply!",
12
+ "description": "In this short tutorial, I walk through how reflection works in AI Agents. You’ll learn how this pattern forms the foundation for self-improving AI systems.",
13
+ "thumbnail_url": "images/video_thumbnails/02.png",
14
+ "url": "https://www.linkedin.com/posts/tharushika-abedheera-3396311a4_ai-machinelearning-agents-activity-7390137862027005952-iI2u"
15
+ }
16
+ ]
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ google-generativeai
4
+ faiss-cpu
5
+ numpy
6
+ python-dotenv
7
+ requests
8
+ pydantic
scripts/create_vector_db.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import google.generativeai as genai
2
+ import faiss
3
+ import pickle
4
+ import json
5
+ import os
6
+ import numpy as np
7
+ from dotenv import load_dotenv
8
+
9
+ # Load env to get API Key
10
+ load_dotenv()
11
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
12
+
13
+ if not GEMINI_API_KEY:
14
+ print("❌ Error: GEMINI_API_KEY not found in .env")
15
+ exit(1)
16
+
17
+ genai.configure(api_key=GEMINI_API_KEY)
18
+
19
+ # --- Configuration ---
20
+ DATA_DIR = "data"
21
+ SUMMARIES_DIR = os.path.join(DATA_DIR, "summaries")
22
+ OUTPUT_DIR = os.path.join("backend", "vector_store")
23
+ # Google's latest embedding model
24
+ EMBEDDING_MODEL = "models/text-embedding-004"
25
+
26
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
27
+
28
+
29
+ def load_json(filename):
30
+ path = os.path.join(DATA_DIR, filename)
31
+ return json.load(open(path, 'r', encoding='utf-8')) if os.path.exists(path) else []
32
+
33
+
34
+ def load_summary_text(filename):
35
+ path = os.path.join(SUMMARIES_DIR, filename)
36
+ if os.path.exists(path):
37
+ with open(path, 'r', encoding='utf-8') as f:
38
+ return f.read().strip()
39
+ return ""
40
+
41
+
42
+ def get_embedding(text):
43
+ """Wraps Gemini API to get embeddings"""
44
+ result = genai.embed_content(
45
+ model=EMBEDDING_MODEL,
46
+ content=text,
47
+ task_type="retrieval_document"
48
+ )
49
+ return result['embedding']
50
+
51
+
52
+ def main():
53
+ print("🔄 Creating Cloud-Based Vector Index...")
54
+ chunks = []
55
+ metadata = []
56
+
57
+ def add_chunk(text, source):
58
+ if text and len(text) > 5:
59
+ chunks.append(text)
60
+ metadata.append({"source": source})
61
+
62
+ # --- Load Data (Same logic as before) ---
63
+ # 1. Profile
64
+ profile = load_json("profile.json")
65
+ if isinstance(profile, dict):
66
+ contact = profile.get("contact", {})
67
+ c_text = f"Contact Details: Name: {contact.get('name')}. Email: {contact.get('email')}. LinkedIn: {contact.get('linkedin')}."
68
+ add_chunk(c_text, "profile_contact")
69
+ if profile.get("summary"):
70
+ add_chunk(
71
+ f"Professional Summary: {profile.get('summary')}", "profile_summary")
72
+
73
+ # 2. Experience
74
+ experience = load_json("experience.json")
75
+ for exp in experience:
76
+ text = f"Experience: {exp.get('role')} at {exp.get('company')} ({exp.get('duration')}). {exp.get('description')}"
77
+ add_chunk(text, "experience_entry")
78
+
79
+ # 3. Education
80
+ education = load_json("education.json")
81
+ for edu in education:
82
+ text = f"Education: {edu.get('degree')} from {edu.get('institution')}. {edu.get('details')}"
83
+ add_chunk(text, "education_entry")
84
+
85
+ # 4. Skills
86
+ skills = load_json("skills.json")
87
+ for s in skills:
88
+ text = f"Skills in {s.get('category')}: {', '.join(s.get('list', []))}"
89
+ add_chunk(text, "skills_list")
90
+
91
+ # 5. Summaries
92
+ summary_files = {
93
+ "about_summary.txt": "profile_about_me",
94
+ "projects_summary.txt": "ui_trigger_projects",
95
+ "articles_summary.txt": "ui_trigger_articles",
96
+ "videos_summary.txt": "ui_trigger_videos",
97
+ "research_summary.txt": "ui_trigger_research",
98
+ "skills_summary.txt": "ui_trigger_skills",
99
+ "certifications_summary.txt": "ui_trigger_certifications"
100
+ }
101
+
102
+ for filename, tag in summary_files.items():
103
+ text = load_summary_text(filename)
104
+ if text:
105
+ add_chunk(text, tag)
106
+
107
+ # --- Generate Embeddings ---
108
+ if not chunks:
109
+ print("❌ Error: No chunks created.")
110
+ return
111
+
112
+ print(f"🧠 Encoding {len(chunks)} chunks via Gemini API...")
113
+
114
+ # Batch processing is better, but simple loop works for small portfolios
115
+ embeddings = []
116
+ for i, chunk in enumerate(chunks):
117
+ if i % 5 == 0:
118
+ print(f" Processing chunk {i}/{len(chunks)}...")
119
+ emb = get_embedding(chunk)
120
+ embeddings.append(emb)
121
+
122
+ embeddings_np = np.array(embeddings).astype("float32")
123
+
124
+ # Create FAISS index
125
+ index = faiss.IndexFlatL2(embeddings_np.shape[1])
126
+ index.add(embeddings_np)
127
+
128
+ faiss.write_index(index, os.path.join(OUTPUT_DIR, "faiss_index.bin"))
129
+ with open(os.path.join(OUTPUT_DIR, "chunks_metadata.pkl"), "wb") as f:
130
+ pickle.dump({"chunks": chunks, "metadata": metadata}, f)
131
+
132
+ print(f"🎉 Cloud Indexing Complete! Saved to {OUTPUT_DIR}")
133
+
134
+
135
+ if __name__ == "__main__":
136
+ main()
scripts/setup_db.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import sqlite3
4
+
5
+
6
+ # --- Configuration ---
7
+ # Ensures the DB is created inside the 'backend' folder
8
+ DATABASE_FILE = os.path.join("backend", "portfolio.sqlite")
9
+ DATA_DIR = "data"
10
+
11
+
12
+ def create_connection():
13
+ # Ensure the backend directory exists
14
+ os.makedirs(os.path.dirname(DATABASE_FILE), exist_ok=True)
15
+ return sqlite3.connect(DATABASE_FILE)
16
+
17
+
18
+ def setup_database():
19
+ conn = create_connection()
20
+ cursor = conn.cursor()
21
+
22
+ print("🛠️ Building Modular Database...")
23
+
24
+ # --- 1. Create Tables ---
25
+
26
+ # Projects
27
+ cursor.execute('''CREATE TABLE IF NOT EXISTS projects (
28
+ id TEXT PRIMARY KEY,
29
+ title TEXT,
30
+ type TEXT,
31
+ description TEXT,
32
+ technologies TEXT,
33
+ github_url TEXT,
34
+ demo_url TEXT,
35
+ image_path TEXT
36
+ )''')
37
+
38
+ # Articles
39
+ cursor.execute('''CREATE TABLE IF NOT EXISTS articles (
40
+ title TEXT, description TEXT, url TEXT, type TEXT
41
+ )''')
42
+
43
+ # Videos
44
+ cursor.execute('''CREATE TABLE IF NOT EXISTS videos (
45
+ id TEXT, title TEXT, description TEXT, thumbnail_url TEXT, url TEXT
46
+ )''')
47
+
48
+ # Research
49
+ cursor.execute('''CREATE TABLE IF NOT EXISTS research (
50
+ title TEXT, description TEXT, url TEXT, type TEXT
51
+ )''')
52
+
53
+ # Skills (Updated for modular JSON with 3 columns)
54
+ cursor.execute('''CREATE TABLE IF NOT EXISTS skills (
55
+ id TEXT PRIMARY KEY,
56
+ category TEXT,
57
+ skill_list TEXT
58
+ )''')
59
+
60
+ # Experience (New Table)
61
+ cursor.execute('''CREATE TABLE IF NOT EXISTS experience (
62
+ id TEXT PRIMARY KEY,
63
+ role TEXT,
64
+ company TEXT,
65
+ duration TEXT,
66
+ description TEXT
67
+ )''')
68
+
69
+ # Education (New Table)
70
+ cursor.execute('''CREATE TABLE IF NOT EXISTS education (
71
+ id TEXT PRIMARY KEY,
72
+ degree TEXT,
73
+ institution TEXT,
74
+ details TEXT
75
+ )''')
76
+
77
+ # Certifications
78
+ cursor.execute('''CREATE TABLE IF NOT EXISTS certifications (
79
+ name TEXT PRIMARY KEY
80
+ )''')
81
+
82
+ # --- 2. Generic Insert Function ---
83
+ def insert_from_json(filename, table, columns):
84
+ filepath = os.path.join(DATA_DIR, filename)
85
+ if not os.path.exists(filepath):
86
+ print(f" ⚠️ Warning: {filename} not found. Skipping.")
87
+ return
88
+
89
+ with open(filepath, 'r', encoding='utf-8') as f:
90
+ data = json.load(f)
91
+
92
+ count = 0
93
+ for item in data:
94
+ # --- FIX: Pre-process lists into strings ---
95
+
96
+ # 1. Handle 'technologies' list (for projects)
97
+ if 'technologies' in item and isinstance(item['technologies'], list):
98
+ item['technologies'] = json.dumps(item['technologies'])
99
+
100
+ # 2. Handle 'list' -> 'skill_list' mapping (for skills)
101
+ # We look for the key 'list' (from JSON) and convert it to 'skill_list' (for DB)
102
+ if 'list' in item and isinstance(item['list'], list):
103
+ item['skill_list'] = json.dumps(item['list'])
104
+
105
+ # --- FIX: Collect values ---
106
+ values = []
107
+ for col in columns:
108
+ # If the column is 'skill_list', we expect the data might still be in 'list'
109
+ # if we didn't map it above, but we did map it, so we just grab 'skill_list'.
110
+ # We use .get() to avoid errors if a field is missing.
111
+ values.append(item.get(col))
112
+
113
+ placeholders = ",".join(["?"] * len(columns))
114
+ cursor.execute(
115
+ f"INSERT OR REPLACE INTO {table} VALUES ({placeholders})", values)
116
+ count += 1
117
+ print(f" ✅ Loaded {count} items into '{table}'")
118
+
119
+ # --- 3. Run Inserts ---
120
+ print("📥 Inserting modular data...")
121
+
122
+ # Projects
123
+ insert_from_json("projects.json", "projects", [
124
+ "id", "title", "type", "description", "technologies", "github_url", "demo_url", "image_path"])
125
+
126
+ # Standard Content
127
+ insert_from_json("articles.json", "articles", [
128
+ "title", "description", "url", "type"])
129
+ insert_from_json("videos.json", "videos", [
130
+ "id", "title", "description", "thumbnail_url", "url"])
131
+ insert_from_json("research.json", "research", [
132
+ "title", "description", "url", "type"])
133
+
134
+ # New Modular Files
135
+ # Note: 'skill_list' maps to the 'list' key in the json via the fix above
136
+ insert_from_json("skills.json", "skills", ["id", "category", "skill_list"])
137
+ insert_from_json("experience.json", "experience", [
138
+ "id", "role", "company", "duration", "description"])
139
+ insert_from_json("education.json", "education", [
140
+ "id", "degree", "institution", "details"])
141
+ insert_from_json("certifications.json", "certifications", ["name"])
142
+
143
+ conn.commit()
144
+ conn.close()
145
+ print(f"🎉 Database updated at {DATABASE_FILE}")
146
+
147
+
148
+ if __name__ == "__main__":
149
+ setup_database()