Subhadip007 commited on
Commit
99cac84
·
1 Parent(s): 5c095ca

feat: ResearchPilot v2 — Next.js frontend, 358k embeddings pipeline, LaTeX math rendering, dual-GPU Kaggle scaling

Browse files
.gitignore CHANGED
@@ -1,14 +1,46 @@
 
1
  venv/
2
- .env
3
  __pycache__/
4
  *.pyc
 
 
 
 
 
 
 
 
 
 
 
5
  data/raw/
6
  data/processed/
 
7
  data/embeddings/
 
 
 
 
 
8
  *.log
 
 
9
  .DS_Store
10
- data/chunks/
11
- data/processed/
12
- data/raw/
13
- data/embeddings/
14
- data/qdrant_db/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ── Python ──
2
  venv/
 
3
  __pycache__/
4
  *.pyc
5
+ *.pyo
6
+ *.egg-info/
7
+ dist/
8
+ build/
9
+
10
+ # ── Environment & Secrets ──
11
+ .env
12
+ .env.*
13
+ !.env.example
14
+
15
+ # ── Data (too large for GitHub) ──
16
  data/raw/
17
  data/processed/
18
+ data/chunks/
19
  data/embeddings/
20
+ data/qdrant_db/
21
+ existing_chunks.zip
22
+
23
+ # ── Logs ──
24
+ logs/
25
  *.log
26
+
27
+ # ── OS ──
28
  .DS_Store
29
+ Thumbs.db
30
+ desktop.ini
31
+
32
+ # ── IDE ──
33
+ .vscode/
34
+ .idea/
35
+
36
+ # ── Frontend (handled by frontend-next/.gitignore) ──
37
+ frontend-next/node_modules/
38
+ frontend-next/.next/
39
+
40
+ # ── Old frontend ──
41
+ frontend/
42
+
43
+ # ── Misc ──
44
+ *.zip
45
+ output.txt
46
+ Kaggle_Guide.md
config/settings.py CHANGED
@@ -41,12 +41,12 @@ for directory in [RAW_DIR, PROCESSED_DIR, CHUNKS_DIR, EMBEDDINGS_DIR, LOGS_DIR]:
41
  # ------------------------------------------
42
  # DATA INGESTION SETTINGS
43
  # ------------------------------------------
44
- ARXIV_CATEGORIES = ["cs.LG", "cs.AI"] # Machine Learning + AI
45
- MAX_PAPERS_PER_FETCH = 100 # Papers per API call
46
- TOTAL_PAPERS_TARGET = 100 # Total papers to collect
47
- ARXIV_API_DELAY_SECONDS = 3.0 # ArXiv rate limit: be respectful
48
- PDF_DOWNLOAD_TIMEOUT = 30 # Seconds before giving up on a PDF
49
- MAX_DOWNLOAD_RETRIES = 3 # Retry failed downloads N times
50
 
51
  # ------------------------------------------
52
  # DOCUMENT PROCESSING SETTINGS
@@ -57,7 +57,7 @@ MAX_TEXT_LENGTH = 500_000 # Skip papers larger than 100k chars (corrupted)
57
  # ------------------------------------------
58
  # CHUNKING SETTINGS
59
  # ------------------------------------------
60
- CHUNK_SIZE = 512 # Charaters per chunk
61
  CHUNK_OVERLAP = 50 # Overlap between consecutive chunks
62
  MIN_CHUNK_SIZE = 100 # Discard chunks smaller than this
63
 
@@ -82,7 +82,7 @@ TOP_K_RERANK = 5 # Keep top 5 after reranking
82
  GROQ_API_KEY = os.getenv('GROQ_API_KEY') # Loaded from .env
83
  LLM_MODEL_NAME = 'llama-3.3-70b-versatile' # Groq model ID
84
  LLM_TEMPERATURE = 0.1 # Low = More factual/consistent
85
- LLM_MAX_TOKENS = 1024 # Max response tokens
86
 
87
  # ------------------------------------------
88
  # API SETTINGS
 
41
  # ------------------------------------------
42
  # DATA INGESTION SETTINGS
43
  # ------------------------------------------
44
+ ARXIV_CATEGORIES = ["cs.LG", "cs.AI", "stat.ML"] # Machine Learning + AI
45
+ MAX_PAPERS_PER_FETCH = 100 # Papers per API call
46
+ TOTAL_PAPERS_TARGET = 700 # Total papers to collect
47
+ ARXIV_API_DELAY_SECONDS = 3.0 # ArXiv rate limit: be respectful
48
+ PDF_DOWNLOAD_TIMEOUT = 30 # Seconds before giving up on a PDF
49
+ MAX_DOWNLOAD_RETRIES = 3 # Retry failed downloads N times
50
 
51
  # ------------------------------------------
52
  # DOCUMENT PROCESSING SETTINGS
 
57
  # ------------------------------------------
58
  # CHUNKING SETTINGS
59
  # ------------------------------------------
60
+ CHUNK_SIZE = 512 # Characters per chunk
61
  CHUNK_OVERLAP = 50 # Overlap between consecutive chunks
62
  MIN_CHUNK_SIZE = 100 # Discard chunks smaller than this
63
 
 
82
  GROQ_API_KEY = os.getenv('GROQ_API_KEY') # Loaded from .env
83
  LLM_MODEL_NAME = 'llama-3.3-70b-versatile' # Groq model ID
84
  LLM_TEMPERATURE = 0.1 # Low = More factual/consistent
85
+ LLM_MAX_TOKENS = 2048 # Max response tokens
86
 
87
  # ------------------------------------------
88
  # API SETTINGS
frontend-next/.gitignore ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2
+
3
+ # dependencies
4
+ /node_modules
5
+ /.pnp
6
+ .pnp.*
7
+ .yarn/*
8
+ !.yarn/patches
9
+ !.yarn/plugins
10
+ !.yarn/releases
11
+ !.yarn/versions
12
+
13
+ # testing
14
+ /coverage
15
+
16
+ # next.js
17
+ /.next/
18
+ /out/
19
+
20
+ # production
21
+ /build
22
+
23
+ # misc
24
+ .DS_Store
25
+ *.pem
26
+
27
+ # debug
28
+ npm-debug.log*
29
+ yarn-debug.log*
30
+ yarn-error.log*
31
+ .pnpm-debug.log*
32
+
33
+ # env files (can opt-in for committing if needed)
34
+ .env*
35
+
36
+ # vercel
37
+ .vercel
38
+
39
+ # typescript
40
+ *.tsbuildinfo
41
+ next-env.d.ts
frontend-next/AGENTS.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ <!-- BEGIN:nextjs-agent-rules -->
2
+ # This is NOT the Next.js you know
3
+
4
+ This version has breaking changes — APIs, conventions, and file structure may all differ from your training data. Read the relevant guide in `node_modules/next/dist/docs/` before writing any code. Heed deprecation notices.
5
+ <!-- END:nextjs-agent-rules -->
frontend-next/CLAUDE.md ADDED
@@ -0,0 +1 @@
 
 
1
+ @AGENTS.md
frontend-next/README.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
2
+
3
+ ## Getting Started
4
+
5
+ First, run the development server:
6
+
7
+ ```bash
8
+ npm run dev
9
+ # or
10
+ yarn dev
11
+ # or
12
+ pnpm dev
13
+ # or
14
+ bun dev
15
+ ```
16
+
17
+ Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
18
+
19
+ You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
20
+
21
+ This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
22
+
23
+ ## Learn More
24
+
25
+ To learn more about Next.js, take a look at the following resources:
26
+
27
+ - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
28
+ - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
29
+
30
+ You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
31
+
32
+ ## Deploy on Vercel
33
+
34
+ The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
35
+
36
+ Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
frontend-next/app/favicon.ico ADDED
frontend-next/app/globals.css ADDED
@@ -0,0 +1,928 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @import url("https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Outfit:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500;700&family=Dancing+Script:wght@600;700&display=swap");
2
+
3
+ :root {
4
+ --bg: #030508;
5
+ --panel-bg: rgba(10, 14, 23, 0.6);
6
+ --border: rgba(255, 255, 255, 0.08);
7
+ --text-main: #f8fafc;
8
+ --text-muted: #94a3b8;
9
+ --accent: #00f0ff;
10
+ --accent-2: #8a2be2;
11
+ --success: #10b981;
12
+ --danger: #ef4444;
13
+ }
14
+
15
+ * {
16
+ box-sizing: border-box;
17
+ margin: 0;
18
+ padding: 0;
19
+ }
20
+
21
+ body {
22
+ background-color: var(--bg);
23
+ color: var(--text-main);
24
+ font-family: "Outfit", system-ui, sans-serif;
25
+ overflow-x: hidden;
26
+ -webkit-font-smoothing: antialiased;
27
+ }
28
+
29
+ button {
30
+ all: unset;
31
+ cursor: pointer;
32
+ display: inline-flex;
33
+ box-sizing: border-box;
34
+ }
35
+ input,
36
+ textarea,
37
+ select {
38
+ all: unset;
39
+ box-sizing: border-box;
40
+ }
41
+ a {
42
+ text-decoration: none;
43
+ color: inherit;
44
+ }
45
+
46
+ /* ── The Ambient Grid ── */
47
+ .luminous-grid {
48
+ position: fixed;
49
+ inset: 0;
50
+ background:
51
+ linear-gradient(rgba(0, 240, 255, 0.04) 1px, transparent 1px),
52
+ linear-gradient(90deg, rgba(0, 240, 255, 0.04) 1px, transparent 1px);
53
+ background-size: 50px 50px;
54
+ mask-image: radial-gradient(circle at center, black 10%, transparent 80%);
55
+ -webkit-mask-image: radial-gradient(
56
+ circle at center,
57
+ black 10%,
58
+ transparent 80%
59
+ );
60
+ z-index: -1;
61
+ pointer-events: none;
62
+ animation: bg-pan 40s linear infinite;
63
+ }
64
+
65
+ @keyframes bg-pan {
66
+ from {
67
+ background-position: 0 0;
68
+ }
69
+ to {
70
+ background-position: 50px 50px;
71
+ }
72
+ }
73
+
74
+ /* ── Glow Orbs ── */
75
+ .orb {
76
+ position: fixed;
77
+ border-radius: 50%;
78
+ filter: blur(150px);
79
+ z-index: -2;
80
+ pointer-events: none;
81
+ opacity: 0.3;
82
+ }
83
+ .orb-cyan {
84
+ width: 50vw;
85
+ height: 50vw;
86
+ background: var(--accent);
87
+ top: -20%;
88
+ right: -10%;
89
+ animation: pulse-orb 12s alternate infinite;
90
+ }
91
+ .orb-purple {
92
+ width: 40vw;
93
+ height: 40vw;
94
+ background: var(--accent-2);
95
+ bottom: -10%;
96
+ left: -20%;
97
+ animation: pulse-orb 15s alternate-reverse infinite;
98
+ }
99
+ @keyframes pulse-orb {
100
+ 0% {
101
+ transform: scale(0.9);
102
+ opacity: 0.2;
103
+ }
104
+ 100% {
105
+ transform: scale(1.1);
106
+ opacity: 0.5;
107
+ }
108
+ }
109
+
110
+ /* ── Glass Panels ── */
111
+ .cyber-panel {
112
+ background: var(--panel-bg);
113
+ backdrop-filter: blur(24px);
114
+ -webkit-backdrop-filter: blur(24px);
115
+ border: 1px solid var(--border);
116
+ border-radius: 24px;
117
+ box-shadow: 0 10px 40px -10px rgba(0, 0, 0, 0.8);
118
+ transition: all 0.3s ease;
119
+ }
120
+
121
+ /* ── Typography ── */
122
+ .hero-title {
123
+ font-size: min(4.5rem, 10vw);
124
+ font-weight: 800;
125
+ line-height: 1.1;
126
+ letter-spacing: -0.03em;
127
+ margin-bottom: 24px;
128
+ text-align: center;
129
+ }
130
+ .hero-subtitle {
131
+ font-size: 1.1rem;
132
+ color: var(--text-muted);
133
+ text-align: center;
134
+ max-width: 600px;
135
+ margin: 0 auto;
136
+ font-weight: 300;
137
+ line-height: 1.6;
138
+ }
139
+ .text-gradient-2 {
140
+ background: linear-gradient(
141
+ 135deg,
142
+ var(--accent) 20%,
143
+ var(--accent-2) 100%
144
+ );
145
+ -webkit-background-clip: text;
146
+ -webkit-text-fill-color: transparent;
147
+ animation: hue-shift 5s linear infinite;
148
+ }
149
+ @keyframes hue-shift {
150
+ 0% {
151
+ filter: hue-rotate(0deg);
152
+ }
153
+ 100% {
154
+ filter: hue-rotate(360deg);
155
+ }
156
+ }
157
+
158
+ /* ── Layout ── */
159
+ .app-container {
160
+ display: flex;
161
+ flex-direction: column;
162
+ align-items: center;
163
+ min-height: 100vh;
164
+ padding: 0 24px;
165
+ max-width: 1000px;
166
+ margin: 0 auto;
167
+ }
168
+
169
+ /* ── Nav ── */
170
+ .top-nav {
171
+ display: flex;
172
+ justify-content: space-between;
173
+ align-items: center;
174
+ width: 100%;
175
+ padding: 32px 0;
176
+ z-index: 50;
177
+ }
178
+ .brand {
179
+ display: flex;
180
+ align-items: center;
181
+ gap: 16px;
182
+ font-size: 1.5rem;
183
+ font-weight: 800;
184
+ letter-spacing: -0.03em;
185
+ }
186
+ .brand-icon {
187
+ width: 44px;
188
+ height: 44px;
189
+ border-radius: 14px;
190
+ background: rgba(0, 240, 255, 0.1);
191
+ border: 1px solid rgba(0, 240, 255, 0.4);
192
+ display: flex;
193
+ align-items: center;
194
+ justify-content: center;
195
+ color: var(--accent);
196
+ box-shadow: inset 0 0 20px rgba(0, 240, 255, 0.2);
197
+ }
198
+ .nav-status {
199
+ font-size: 0.75rem;
200
+ font-weight: 600;
201
+ text-transform: uppercase;
202
+ letter-spacing: 0.15em;
203
+ padding: 10px 20px;
204
+ border-radius: 99px;
205
+ border: 1px solid var(--border);
206
+ display: flex;
207
+ align-items: center;
208
+ gap: 10px;
209
+ background: rgba(255, 255, 255, 0.03);
210
+ }
211
+ .status-dot {
212
+ width: 8px;
213
+ height: 8px;
214
+ border-radius: 50%;
215
+ }
216
+ .status-online {
217
+ background: var(--success);
218
+ box-shadow: 0 0 12px var(--success);
219
+ }
220
+ .status-offline {
221
+ background: var(--danger);
222
+ box-shadow: 0 0 12px var(--danger);
223
+ }
224
+
225
+ /* ── New ChatGPT/WhatsApp Style Search Box ── */
226
+ .search-wrapper {
227
+ width: 100%;
228
+ position: relative;
229
+ z-index: 20;
230
+ display: flex;
231
+ flex-direction: column;
232
+ gap: 12px;
233
+ }
234
+ .chat-input-wrapper {
235
+ background: rgba(30, 35, 45, 0.9);
236
+ border: 1px solid rgba(255, 255, 255, 0.1);
237
+ border-radius: 30px;
238
+ padding: 6px 6px 6px 20px;
239
+ display: flex;
240
+ align-items: center;
241
+ gap: 12px;
242
+ box-shadow: 0 8px 30px rgba(0, 0, 0, 0.5);
243
+ transition: 0.3s;
244
+ }
245
+ .chat-input-wrapper:focus-within {
246
+ border-color: rgba(0, 240, 255, 0.4);
247
+ box-shadow:
248
+ 0 0 30px rgba(0, 240, 255, 0.15),
249
+ 0 8px 30px rgba(0, 0, 0, 0.5);
250
+ }
251
+ .chat-input {
252
+ flex: 1;
253
+ font-size: 1.05rem;
254
+ color: #fff;
255
+ line-height: 1.5;
256
+ resize: none;
257
+ outline: none;
258
+ padding: 6px 0;
259
+ font-family: inherit;
260
+ }
261
+ .chat-input::placeholder {
262
+ color: rgba(255, 255, 255, 0.4);
263
+ }
264
+ .chat-input::-webkit-scrollbar,
265
+ .info-modal::-webkit-scrollbar {
266
+ width: 6px;
267
+ }
268
+ .chat-input::-webkit-scrollbar-track,
269
+ .info-modal::-webkit-scrollbar-track {
270
+ background: transparent;
271
+ }
272
+ .info-modal::-webkit-scrollbar-track {
273
+ margin-block: 24px;
274
+ }
275
+ .chat-input::-webkit-scrollbar-thumb,
276
+ .info-modal::-webkit-scrollbar-thumb {
277
+ background: rgba(255, 255, 255, 0.2);
278
+ border-radius: 4px;
279
+ }
280
+ .chat-input::-webkit-scrollbar-thumb:hover,
281
+ .info-modal::-webkit-scrollbar-thumb:hover {
282
+ background: rgba(255, 255, 255, 0.4);
283
+ }
284
+
285
+ .send-btn {
286
+ width: 38px;
287
+ height: 38px;
288
+ border-radius: 50%;
289
+ background: var(--text-main);
290
+ color: #000;
291
+ display: flex;
292
+ align-items: center;
293
+ justify-content: center;
294
+ transition: 0.2s cubic-bezier(0.175, 0.885, 0.32, 1.275);
295
+ flex-shrink: 0;
296
+ }
297
+ .send-btn:hover:not(:disabled) {
298
+ transform: scale(1.08);
299
+ background: var(--accent);
300
+ box-shadow: 0 0 15px rgba(0, 240, 255, 0.4);
301
+ }
302
+ .send-btn:disabled {
303
+ opacity: 0.3;
304
+ cursor: not-allowed;
305
+ transform: none;
306
+ background: var(--text-main);
307
+ }
308
+ .spinner-micro {
309
+ width: 18px;
310
+ height: 18px;
311
+ border: 2px solid rgba(0, 0, 0, 0.2);
312
+ border-top-color: #000;
313
+ border-radius: 50%;
314
+ animation: spin 0.8s linear infinite;
315
+ }
316
+
317
+ /* ── Search Controls (Settings) ── */
318
+ .search-controls {
319
+ display: flex;
320
+ align-items: center;
321
+ padding: 4px 8px;
322
+ gap: 16px;
323
+ margin-top: 4px;
324
+ }
325
+ .controls-group {
326
+ display: flex;
327
+ gap: 12px;
328
+ align-items: center;
329
+ flex-wrap: wrap;
330
+ }
331
+ .cyber-select {
332
+ background: rgba(255, 255, 255, 0.04);
333
+ border: 1px solid var(--border);
334
+ border-radius: 12px;
335
+ padding: 8px 14px;
336
+ font-size: 0.85rem;
337
+ color: var(--text-main);
338
+ transition: 0.2s;
339
+ cursor: pointer;
340
+ }
341
+ .cyber-select:hover {
342
+ background: rgba(255, 255, 255, 0.1);
343
+ border-color: rgba(255, 255, 255, 0.3);
344
+ }
345
+
346
+ /* Custom React Dropdown Override */
347
+ .custom-dropdown-menu {
348
+ position: absolute;
349
+ left: 0;
350
+ width: 100%;
351
+ background: rgba(12, 14, 20, 0.95);
352
+ backdrop-filter: blur(24px);
353
+ border: 1px solid rgba(255, 255, 255, 0.1);
354
+ border-radius: 12px;
355
+ overflow: hidden;
356
+ box-shadow: 0 15px 40px rgba(0,0,0,0.8), 0 0 20px rgba(0, 240, 255, 0.15);
357
+ z-index: 100;
358
+ display: flex;
359
+ flex-direction: column;
360
+ }
361
+
362
+ .custom-dropdown-item {
363
+ padding: 12px 16px;
364
+ font-size: 0.85rem;
365
+ color: var(--text-muted);
366
+ text-align: left;
367
+ transition: 0.2s;
368
+ background: transparent;
369
+ width: 100%;
370
+ display: block;
371
+ }
372
+
373
+ .custom-dropdown-item:hover {
374
+ background: rgba(255, 255, 255, 0.05);
375
+ color: #fff;
376
+ }
377
+
378
+ .custom-dropdown-item.active {
379
+ background: rgba(0, 240, 255, 0.1);
380
+ color: var(--accent);
381
+ font-weight: 500;
382
+ border-left: 2px solid var(--accent);
383
+ padding-left: 14px;
384
+ }
385
+
386
+ .cyber-btn-outline {
387
+ padding: 8px 16px;
388
+ border-radius: 12px;
389
+ font-size: 0.85rem;
390
+ font-weight: 500;
391
+ transition: 0.2s;
392
+ border: 1px solid var(--border);
393
+ background: rgba(255, 255, 255, 0.04);
394
+ color: var(--text-muted);
395
+ }
396
+ .cyber-btn-outline.active {
397
+ background: rgba(0, 240, 255, 0.1);
398
+ border-color: rgba(0, 240, 255, 0.4);
399
+ color: var(--accent);
400
+ }
401
+ .cyber-btn-outline:hover {
402
+ border-color: rgba(255, 255, 255, 0.3);
403
+ color: #fff;
404
+ }
405
+
406
+ .cyber-input {
407
+ background: rgba(0, 0, 0, 0.4);
408
+ border: 1px solid var(--border);
409
+ border-radius: 12px;
410
+ padding: 8px 14px;
411
+ color: var(--accent);
412
+ font-family: "JetBrains Mono", monospace;
413
+ font-size: 0.9rem;
414
+ outline: none;
415
+ transition: 0.2s;
416
+ }
417
+ .cyber-input:focus {
418
+ border-color: var(--accent);
419
+ }
420
+
421
+ /* ── Suggestions ── */
422
+ .example-chips {
423
+ display: flex;
424
+ flex-wrap: wrap;
425
+ justify-content: center;
426
+ gap: 10px;
427
+ margin-top: 24px;
428
+ }
429
+ .chip {
430
+ background: rgba(255, 255, 255, 0.03);
431
+ border: 1px solid rgba(255, 255, 255, 0.08);
432
+ padding: 10px 18px;
433
+ border-radius: 99px;
434
+ font-size: 0.85rem;
435
+ color: var(--text-muted);
436
+ transition: 0.3s;
437
+ }
438
+ .chip:hover {
439
+ background: rgba(0, 240, 255, 0.1);
440
+ border-color: rgba(0, 240, 255, 0.3);
441
+ color: #fff;
442
+ transform: translateY(-2px);
443
+ }
444
+
445
+ /* ── Results Container ── */
446
+ .results-area {
447
+ width: 100%;
448
+ padding-bottom: 100px;
449
+ display: flex;
450
+ flex-direction: column;
451
+ gap: 24px;
452
+ margin-top: 32px;
453
+ }
454
+
455
+ /* ── Answer Box (AI Output) ── */
456
+ .answer-box {
457
+ padding: 32px;
458
+ background: linear-gradient(
459
+ 145deg,
460
+ rgba(20, 25, 35, 0.8),
461
+ rgba(5, 7, 10, 0.9)
462
+ );
463
+ }
464
+ .answer-header {
465
+ display: flex;
466
+ justify-content: space-between;
467
+ align-items: center;
468
+ margin-bottom: 24px;
469
+ border-bottom: 1px solid var(--border);
470
+ padding-bottom: 16px;
471
+ }
472
+ .answer-label {
473
+ display: flex;
474
+ align-items: center;
475
+ gap: 10px;
476
+ font-size: 1rem;
477
+ font-weight: 700;
478
+ color: var(--accent);
479
+ letter-spacing: 0.1em;
480
+ text-transform: uppercase;
481
+ }
482
+ .badge-grounded {
483
+ font-family: "JetBrains Mono", monospace;
484
+ font-size: 0.7rem;
485
+ font-weight: 600;
486
+ background: rgba(16, 185, 129, 0.1);
487
+ color: var(--success);
488
+ padding: 6px 12px;
489
+ border-radius: 8px;
490
+ border: 1px solid rgba(16, 185, 129, 0.2);
491
+ display: flex;
492
+ align-items: center;
493
+ gap: 8px;
494
+ }
495
+
496
+ /* ── AI Output Font Update (Inter, refined for math/reading) ── */
497
+ .answer-text {
498
+ font-family: "Inter", system-ui, sans-serif;
499
+ font-size: 1.05rem;
500
+ line-height: 1.85;
501
+ color: #e2e8f0;
502
+ font-weight: 400;
503
+ letter-spacing: 0.01em;
504
+ }
505
+ .answer-text strong {
506
+ color: #fff;
507
+ font-weight: 600;
508
+ }
509
+
510
+ /* ── Stats ── */
511
+ .stats-grid {
512
+ display: grid;
513
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
514
+ gap: 16px;
515
+ }
516
+ .stat-card {
517
+ padding: 20px;
518
+ display: flex;
519
+ flex-direction: column;
520
+ gap: 8px;
521
+ background: rgba(255, 255, 255, 0.02);
522
+ }
523
+ .stat-header {
524
+ display: flex;
525
+ justify-content: space-between;
526
+ align-items: center;
527
+ }
528
+ .stat-label {
529
+ font-size: 0.7rem;
530
+ text-transform: uppercase;
531
+ color: var(--text-muted);
532
+ font-weight: 700;
533
+ letter-spacing: 0.1em;
534
+ }
535
+ .stat-value {
536
+ font-size: 1.5rem;
537
+ font-family: "JetBrains Mono", monospace;
538
+ font-weight: 700;
539
+ color: #fff;
540
+ }
541
+
542
+ /* ── Citations (Smaller Cards) ── */
543
+ .section-title {
544
+ display: flex;
545
+ align-items: center;
546
+ gap: 10px;
547
+ font-size: 1.15rem;
548
+ font-weight: 700;
549
+ color: #fff;
550
+ margin: 16px 0;
551
+ }
552
+ .citations-grid {
553
+ display: grid;
554
+ grid-template-columns: repeat(
555
+ auto-fit,
556
+ minmax(240px, 1fr)
557
+ ); /* Reduced to fit more */
558
+ gap: 16px; /* Reduced gap */
559
+ }
560
+ .citation-card {
561
+ padding: 16px; /* Reduced padding */
562
+ background: rgba(0, 0, 0, 0.5);
563
+ border: 1px solid rgba(255, 255, 255, 0.05);
564
+ border-radius: 16px; /* Reduced radius */
565
+ transition: 0.3s cubic-bezier(0.175, 0.885, 0.32, 1.275);
566
+ display: flex;
567
+ flex-direction: column;
568
+ gap: 12px; /* Reduced gap */
569
+ position: relative;
570
+ }
571
+ .citation-card:hover {
572
+ background: rgba(20, 20, 20, 0.8);
573
+ border-color: var(--accent-2);
574
+ transform: translateY(-4px); /* Smoother smaller lift */
575
+ box-shadow: 0 10px 20px rgba(138, 43, 226, 0.15);
576
+ }
577
+ .citation-open {
578
+ position: absolute;
579
+ top: 16px;
580
+ right: 16px;
581
+ color: var(--accent-2);
582
+ opacity: 0;
583
+ transition: 0.3s;
584
+ transform: translateX(-5px);
585
+ }
586
+ .citation-card:hover .citation-open {
587
+ opacity: 1;
588
+ transform: translateX(0);
589
+ }
590
+
591
+ .citation-meta {
592
+ display: flex;
593
+ gap: 10px;
594
+ align-items: center;
595
+ }
596
+ .citation-id {
597
+ font-family: "JetBrains Mono", monospace;
598
+ font-size: 0.7rem; /* Smaller */
599
+ background: rgba(138, 43, 226, 0.15);
600
+ color: #d8b4fe;
601
+ padding: 4px 8px; /* Smaller */
602
+ border-radius: 6px;
603
+ border: 1px solid rgba(138, 43, 226, 0.3);
604
+ }
605
+ .citation-date {
606
+ font-family: "JetBrains Mono", monospace;
607
+ font-size: 0.7rem; /* Smaller */
608
+ color: var(--text-muted);
609
+ }
610
+ .citation-title {
611
+ font-size: 0.95rem; /* Smaller reading title */
612
+ font-weight: 600;
613
+ color: #f8fafc;
614
+ line-height: 1.4;
615
+ padding-right: 20px;
616
+ }
617
+ .citation-authors {
618
+ font-size: 0.8rem; /* Smaller */
619
+ color: var(--text-muted);
620
+ font-style: italic;
621
+ font-weight: 300;
622
+ }
623
+
624
+ /* ── Loader ── */
625
+ .loader-view {
626
+ padding: 80px 0;
627
+ display: flex;
628
+ flex-direction: column;
629
+ align-items: center;
630
+ gap: 24px;
631
+ text-align: center;
632
+ }
633
+ .ring-spinner {
634
+ width: 70px;
635
+ height: 70px;
636
+ position: relative;
637
+ }
638
+ .ring {
639
+ position: absolute;
640
+ inset: 0;
641
+ border: 2px solid transparent;
642
+ border-radius: 50%;
643
+ }
644
+ .ring-1 {
645
+ border-top-color: var(--accent);
646
+ animation: spin1 1.5s cubic-bezier(0.5, 0, 0.5, 1) infinite;
647
+ }
648
+ .ring-2 {
649
+ border-right-color: var(--accent-2);
650
+ animation: spin2 2s cubic-bezier(0.5, 0, 0.5, 1) infinite;
651
+ }
652
+ @keyframes spin1 {
653
+ to {
654
+ transform: rotate(360deg);
655
+ }
656
+ }
657
+ @keyframes spin2 {
658
+ to {
659
+ transform: rotate(-360deg);
660
+ }
661
+ }
662
+
663
+ /* ── Year Stepper ── */
664
+ .year-stepper {
665
+ display: flex;
666
+ align-items: center;
667
+ background: rgba(0, 0, 0, 0.4);
668
+ border: 1px solid var(--border);
669
+ border-radius: 12px;
670
+ overflow: hidden;
671
+ transition: 0.2s;
672
+ }
673
+ .year-stepper:hover {
674
+ border-color: rgba(255, 255, 255, 0.3);
675
+ }
676
+ .year-stepper:focus-within {
677
+ border-color: var(--accent);
678
+ }
679
+ .stepper-btn {
680
+ padding: 8px 14px;
681
+ background: rgba(255, 255, 255, 0.03);
682
+ color: var(--accent);
683
+ font-weight: 700;
684
+ transition: 0.2s;
685
+ user-select: none;
686
+ }
687
+ .stepper-btn:hover {
688
+ background: rgba(0, 240, 255, 0.15);
689
+ color: #fff;
690
+ }
691
+ .stepper-input {
692
+ width: 50px;
693
+ text-align: center;
694
+ color: #fff;
695
+ font-family: 'JetBrains Mono', monospace;
696
+ font-size: 0.95rem;
697
+ pointer-events: none;
698
+ }
699
+ .stepper-input::-webkit-outer-spin-button,
700
+ .stepper-input::-webkit-inner-spin-button {
701
+ -webkit-appearance: none;
702
+ margin: 0;
703
+ }
704
+ .stepper-input[type=number] {
705
+ -moz-appearance: textfield;
706
+ }
707
+
708
+ /* ── Professional Header Layout ── */
709
+ .top-nav {
710
+ position: fixed;
711
+ top: 0;
712
+ left: 0;
713
+ width: 100vw;
714
+ padding: 24px 2%;
715
+ display: flex;
716
+ justify-content: space-between;
717
+ align-items: center;
718
+ z-index: 5000;
719
+ background: transparent;
720
+ backdrop-filter: blur(16px) !important;
721
+ -webkit-backdrop-filter: blur(16px) !important;
722
+ border-bottom: 1px solid rgba(255, 255, 255, 0.05);
723
+ }
724
+ .brand {
725
+ display: flex;
726
+ align-items: center;
727
+ gap: 12px;
728
+ font-size: 1.25rem;
729
+ font-weight: 700;
730
+ letter-spacing: -0.02em;
731
+ color: #fff;
732
+ }
733
+ .brand-icon {
734
+ display: flex;
735
+ align-items: center;
736
+ justify-content: center;
737
+ width: 36px;
738
+ height: 36px;
739
+ background: rgba(0, 240, 255, 0.1);
740
+ border: 1px solid rgba(0, 240, 255, 0.3);
741
+ border-radius: 10px;
742
+ color: var(--accent);
743
+ }
744
+ .nav-right {
745
+ display: flex;
746
+ align-items: center;
747
+ gap: 16px;
748
+ }
749
+ .github-link {
750
+ color: var(--text-muted);
751
+ transition: 0.2s;
752
+ display: flex;
753
+ align-items: center;
754
+ justify-content: center;
755
+ }
756
+ .github-link:hover {
757
+ color: #fff;
758
+ transform: scale(1.1);
759
+ }
760
+ .nav-status {
761
+ display: flex;
762
+ align-items: center;
763
+ gap: 6px;
764
+ background: rgba(255, 255, 255, 0.05);
765
+ border: 1px solid rgba(255, 255, 255, 0.1);
766
+ padding: 6px 12px;
767
+ border-radius: 100px;
768
+ font-size: 0.7rem;
769
+ font-weight: 600;
770
+ color: var(--text-muted);
771
+ cursor: pointer;
772
+ transition: 0.3s;
773
+ }
774
+ .nav-status:hover {
775
+ background: rgba(255, 255, 255, 0.1);
776
+ color: #fff;
777
+ }
778
+ .status-dot {
779
+ width: 6px;
780
+ height: 6px;
781
+ border-radius: 50%;
782
+ background: #666;
783
+ }
784
+ .status-online {
785
+ background: var(--success);
786
+ box-shadow: 0 0 10px var(--success);
787
+ }
788
+ .status-offline {
789
+ background: var(--danger);
790
+ box-shadow: 0 0 10px var(--danger);
791
+ }
792
+
793
+ /* Custom overrides */
794
+ select {
795
+ -webkit-appearance: none;
796
+ -moz-appearance: none;
797
+ background-image: none;
798
+ }
799
+
800
+ /* ── Info Modal ── */
801
+ .info-modal-backdrop {
802
+ position: fixed;
803
+ inset: 0;
804
+ background: rgba(0, 0, 0, 0.6);
805
+ backdrop-filter: blur(12px);
806
+ -webkit-backdrop-filter: blur(12px);
807
+ display: flex;
808
+ align-items: center;
809
+ justify-content: center;
810
+ z-index: 9999;
811
+ padding: 24px;
812
+ }
813
+ .info-modal {
814
+ position: relative;
815
+ width: 100%;
816
+ max-width: 650px;
817
+ padding: 40px;
818
+ background: linear-gradient(145deg, rgba(12, 16, 24, 0.9), rgba(5, 7, 10, 0.95));
819
+ border: 1px solid rgba(255, 255, 255, 0.08);
820
+ box-shadow: 0 30px 60px rgba(0, 0, 0, 0.8), inset 0 1px 0 rgba(255, 255, 255, 0.1), inset 0 0 80px rgba(0, 240, 255, 0.03);
821
+ border-radius: 24px;
822
+ overflow-y: auto;
823
+ max-height: 85vh;
824
+ }
825
+ .info-modal h2 {
826
+ font-size: 2.2rem;
827
+ font-weight: 800;
828
+ margin-bottom: 6px;
829
+ background: linear-gradient(135deg, #fff 0%, var(--accent) 100%);
830
+ background-clip: text;
831
+ -webkit-background-clip: text;
832
+ -webkit-text-fill-color: transparent;
833
+ letter-spacing: -0.02em;
834
+ }
835
+ .info-modal h3 {
836
+ display: flex;
837
+ align-items: center;
838
+ gap: 12px;
839
+ font-size: 1.15rem;
840
+ font-weight: 700;
841
+ color: #fff;
842
+ margin-top: 32px;
843
+ margin-bottom: 16px;
844
+ padding-bottom: 8px;
845
+ border-bottom: 1px solid rgba(255, 255, 255, 0.05);
846
+ }
847
+ .info-modal h3 svg {
848
+ color: var(--accent);
849
+ }
850
+ .info-modal p, .info-modal ul {
851
+ color: var(--text-muted);
852
+ font-size: 0.95rem;
853
+ line-height: 1.6;
854
+ }
855
+ .info-modal ul {
856
+ list-style-type: none;
857
+ padding-left: 0;
858
+ display: flex;
859
+ flex-direction: column;
860
+ gap: 12px;
861
+ }
862
+ .info-modal li {
863
+ margin: 0;
864
+ padding: 14px 18px;
865
+ background: rgba(255, 255, 255, 0.02);
866
+ border: 1px solid rgba(255, 255, 255, 0.04);
867
+ border-radius: 12px;
868
+ transition: 0.3s;
869
+ font-size: 0.9rem;
870
+ color: var(--text-muted);
871
+ }
872
+ .info-modal li:hover {
873
+ background: rgba(0, 240, 255, 0.05);
874
+ border-color: rgba(0, 240, 255, 0.2);
875
+ color: #fff;
876
+ transform: translateX(4px);
877
+ }
878
+ .info-modal li strong {
879
+ color: var(--accent);
880
+ display: block;
881
+ font-size: 0.95rem;
882
+ margin-bottom: 4px;
883
+ }
884
+ .info-modal hr {
885
+ border: none;
886
+ height: 1px;
887
+ background: linear-gradient(90deg, transparent, rgba(0, 240, 255, 0.4), transparent);
888
+ margin: 30px 0;
889
+ }
890
+ .modal-close {
891
+ position: absolute;
892
+ top: 24px;
893
+ right: 24px;
894
+ color: var(--text-muted);
895
+ background: rgba(255, 255, 255, 0.05);
896
+ border: 1px solid rgba(255, 255, 255, 0.1);
897
+ border-radius: 50%;
898
+ width: 36px;
899
+ height: 36px;
900
+ display: flex;
901
+ align-items: center;
902
+ justify-content: center;
903
+ transition: 0.2s cubic-bezier(0.175, 0.885, 0.32, 1.275);
904
+ }
905
+ .modal-close:hover {
906
+ background: rgba(239, 68, 68, 0.2);
907
+ color: var(--danger);
908
+ border-color: rgba(239, 68, 68, 0.3);
909
+ transform: rotate(90deg);
910
+ }
911
+ .nav-icon-btn {
912
+ display: flex;
913
+ align-items: center;
914
+ justify-content: center;
915
+ width: 36px;
916
+ height: 36px;
917
+ border-radius: 12px;
918
+ background: rgba(255, 255, 255, 0.05);
919
+ border: 1px solid rgba(255, 255, 255, 0.1);
920
+ color: var(--text-muted);
921
+ transition: 0.2s cubic-bezier(0.175, 0.885, 0.32, 1.275);
922
+ }
923
+ .nav-icon-btn:hover {
924
+ background: rgba(0, 240, 255, 0.1);
925
+ border-color: rgba(0, 240, 255, 0.4);
926
+ color: var(--accent);
927
+ transform: translateY(-2px);
928
+ }
frontend-next/app/layout.tsx ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // app/layout.tsx
2
+ import type { Metadata } from "next";
3
+ import { Inter } from "next/font/google";
4
+ import "./globals.css";
5
+
6
+ const inter = Inter({ subsets: ["latin"] });
7
+
8
+ export const metadata: Metadata = {
9
+ title: "ResearchPilot — ML Research Assistant",
10
+ description:
11
+ "AI-powered research assistant for ML papers. Hybrid search + grounded answers.",
12
+ };
13
+
14
+ export default function RootLayout({
15
+ children,
16
+ }: {
17
+ children: React.ReactNode;
18
+ }) {
19
+ return (
20
+ <html lang="en">
21
+ <body className={inter.className}>{children}</body>
22
+ </html>
23
+ );
24
+ }
frontend-next/app/page.tsx ADDED
@@ -0,0 +1,872 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client";
2
+
3
+ import { useState, useRef, useEffect } from "react";
4
+ import { motion, AnimatePresence } from "framer-motion";
5
+ import { InlineMath, BlockMath } from 'react-katex';
6
+ import 'katex/dist/katex.min.css';
7
+ import {
8
+ BookOpen,
9
+ Clock,
10
+ Zap,
11
+ AlertCircle,
12
+ CheckCircle,
13
+ ExternalLink,
14
+ Sparkles,
15
+ Brain,
16
+ ArrowRight,
17
+ Layers,
18
+ Fingerprint,
19
+ Send,
20
+ Info,
21
+ X,
22
+ Server,
23
+ Activity,
24
+ Rocket,
25
+ } from "lucide-react";
26
+
27
+ // ── Types ─────────────────────────────────────────────────
28
+ interface Citation {
29
+ paper_id: string;
30
+ title: string;
31
+ authors: string[];
32
+ published_date: string;
33
+ arxiv_url: string;
34
+ }
35
+
36
+ interface QueryResult {
37
+ answer: string;
38
+ citations: Citation[];
39
+ query: string;
40
+ chunks_used: number;
41
+ retrieval_time_ms: number;
42
+ generation_time_ms: number;
43
+ total_time_ms: number;
44
+ has_context: boolean;
45
+ }
46
+
47
+ // ── Config ────────────────────────────────────────────────
48
+ const API_URL = process.env.NEXT_PUBLIC_API_URL || "http://localhost:8000";
49
+
50
+ const EXAMPLE_QUERIES = [
51
+ "How does LoRA reduce trainable parameters?",
52
+ "What are challenges in multi-agent RL?",
53
+ "Explain diffusion models for images",
54
+ ];
55
+
56
+ const CATEGORY_OPTIONS = [
57
+ { value: "All", label: "All Topics" },
58
+ { value: "cs.LG", label: "cs.LG", indexed: true },
59
+ { value: "cs.AI", label: "cs.AI", indexed: true },
60
+ { value: "stat.ML", label: "stat.ML", indexed: true },
61
+ { value: "cs.CV", label: "cs.CV", indexed: false, disabled: true },
62
+ { value: "cs.CL", label: "cs.CL", indexed: false, disabled: true },
63
+ { value: "cs.RO", label: "cs.RO", indexed: false, disabled: true },
64
+ ];
65
+
66
+ // ── Custom Dropdown Component ─────────────────────────────
67
+ function CustomSelect({
68
+ options,
69
+ value,
70
+ onChange,
71
+ width = '140px',
72
+ }: {
73
+ options: { value: string | number; label: string; disabled?: boolean; indexed?: boolean }[];
74
+ value: string | number;
75
+ onChange: (val: string | number) => void;
76
+ width?: string;
77
+ }) {
78
+ const [isOpen, setIsOpen] = useState(false);
79
+ const [placement, setPlacement] = useState<"top" | "bottom">("bottom");
80
+ const ref = useRef<HTMLDivElement>(null);
81
+
82
+ useEffect(() => {
83
+ const handleClickOutside = (e: MouseEvent) => {
84
+ if (ref.current && !ref.current.contains(e.target as Node)) setIsOpen(false);
85
+ };
86
+ document.addEventListener('mousedown', handleClickOutside);
87
+ return () => document.removeEventListener('mousedown', handleClickOutside);
88
+ }, []);
89
+
90
+ const toggleOpen = () => {
91
+ if (!isOpen && ref.current) {
92
+ const rect = ref.current.getBoundingClientRect();
93
+ // Need ~240px for full menu, pop up if space is tight below
94
+ if (window.innerHeight - rect.bottom < 240) {
95
+ setPlacement('top');
96
+ } else {
97
+ setPlacement('bottom');
98
+ }
99
+ }
100
+ setIsOpen(!isOpen);
101
+ };
102
+
103
+ const activeLabel = options.find((o) => o.value === value)?.label || value;
104
+
105
+ return (
106
+ <div ref={ref} style={{ position: 'relative', width }}>
107
+ <button
108
+ onClick={toggleOpen}
109
+ className="cyber-select"
110
+ style={{ width: '100%', display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}
111
+ >
112
+ <span style={{ overflow: 'hidden', textOverflow: 'ellipsis', whiteSpace: 'nowrap' }}>{activeLabel}</span>
113
+ <span style={{ fontSize: '0.7em', opacity: 0.5, marginLeft: '8px' }}>{isOpen ? '▲' : '▼'}</span>
114
+ </button>
115
+ <AnimatePresence>
116
+ {isOpen && (
117
+ <motion.div
118
+ initial={{ opacity: 0, y: placement === 'top' ? 10 : -10, scale: 0.95 }}
119
+ animate={{ opacity: 1, y: 0, scale: 1 }}
120
+ exit={{ opacity: 0, y: placement === 'top' ? 5 : -5, scale: 0.95 }}
121
+ transition={{ duration: 0.15, ease: 'easeOut' }}
122
+ className="custom-dropdown-menu"
123
+ style={{
124
+ top: placement === 'bottom' ? 'calc(100% + 8px)' : 'auto',
125
+ bottom: placement === 'top' ? 'calc(100% + 8px)' : 'auto'
126
+ }}
127
+ >
128
+ {options.map((opt) => (
129
+ <button
130
+ key={opt.value}
131
+ onClick={() => {
132
+ if (opt.disabled) return;
133
+ onChange(opt.value);
134
+ setIsOpen(false);
135
+ }}
136
+ disabled={opt.disabled}
137
+ className={`custom-dropdown-item ${value === opt.value ? 'active' : ''}`}
138
+ style={opt.disabled ? { opacity: 0.4, cursor: 'not-allowed' } : {}}
139
+ >
140
+ <div style={{ display: "flex", alignItems: "center", justifyContent: "space-between", width: "100%" }}>
141
+ <span>{opt.label}</span>
142
+ {opt.indexed !== undefined && (
143
+ <div style={{
144
+ fontSize: "0.65em",
145
+ fontWeight: 600,
146
+ padding: "2px 6px",
147
+ borderRadius: "12px",
148
+ backgroundColor: opt.indexed ? "rgba(16, 185, 129, 0.15)" : "rgba(156, 163, 175, 0.1)",
149
+ color: opt.indexed ? "var(--success)" : "var(--text-muted)",
150
+ marginLeft: "8px"
151
+ }}>
152
+ {opt.indexed ? "INDEXED" : "UNAVAILABLE"}
153
+ </div>
154
+ )}
155
+ </div>
156
+ </button>
157
+ ))}
158
+ </motion.div>
159
+ )}
160
+ </AnimatePresence>
161
+ </div>
162
+ );
163
+ }
164
+
165
+ function renderLaTeX(text: string) {
166
+ const blockSplit = text.split(/(\$\$[\s\S]+?\$\$)/g);
167
+ return blockSplit.map((blockPart, i) => {
168
+ if (blockPart.startsWith("$$") && blockPart.endsWith("$$")) {
169
+ const math = blockPart.slice(2, -2);
170
+ return <BlockMath key={i} math={math} />;
171
+ }
172
+ const inlineSplit = blockPart.split(/(\$[\s\S]+?\$)/g);
173
+ return (
174
+ <span key={i}>
175
+ {inlineSplit.map((inlinePart, j) => {
176
+ if (inlinePart.startsWith("$") && inlinePart.endsWith("$")) {
177
+ const math = inlinePart.slice(1, -1);
178
+ return <InlineMath key={j} math={math} />;
179
+ }
180
+ return (
181
+ <span key={j} style={{ whiteSpace: "pre-wrap" }}>
182
+ {inlinePart}
183
+ </span>
184
+ );
185
+ })}
186
+ </span>
187
+ );
188
+ });
189
+ }
190
+
191
+ // ── Main Page ─────────────────────────────────────────────
192
+ export default function Home() {
193
+ const [question, setQuestion] = useState("");
194
+ const [result, setResult] = useState<QueryResult | null>(null);
195
+ const [loading, setLoading] = useState(false);
196
+ const [error, setError] = useState("");
197
+
198
+ // Settings
199
+ const [topK, setTopK] = useState(5);
200
+ const [category, setCategory] = useState("All");
201
+ const [yearFilter, setYearFilter] = useState(false);
202
+ const [yearFrom, setYearFrom] = useState(2024);
203
+ const [showSettings, setShowSettings] = useState(false);
204
+
205
+ // System state
206
+ const [apiStatus, setApiStatus] = useState<
207
+ "unknown" | "online" | "offline"
208
+ >("unknown");
209
+ const [showInfo, setShowInfo] = useState(false);
210
+ const [showStatusDetails, setShowStatusDetails] = useState(false);
211
+
212
+ const textareaRef = useRef<HTMLTextAreaElement>(null);
213
+
214
+ useEffect(() => {
215
+ if (textareaRef.current) {
216
+ textareaRef.current.style.height = "auto";
217
+ textareaRef.current.style.height = `${Math.min(textareaRef.current.scrollHeight, 120)}px`;
218
+ }
219
+ }, [question]);
220
+
221
+ useEffect(() => {
222
+ checkHealth();
223
+ }, []);
224
+
225
+ const checkHealth = async () => {
226
+ try {
227
+ const r = await fetch(`${API_URL}/health`, {
228
+ signal: AbortSignal.timeout(5000),
229
+ });
230
+ setApiStatus(r.ok ? "online" : "offline");
231
+ } catch {
232
+ setApiStatus("offline");
233
+ }
234
+ };
235
+
236
+ const handleSearch = async () => {
237
+ if (!question.trim()) return;
238
+ setLoading(true);
239
+ setError("");
240
+ setResult(null);
241
+
242
+ try {
243
+ const payload: Record<string, unknown> = {
244
+ question: question.trim(),
245
+ top_k: topK,
246
+ };
247
+ if (category !== "All") payload.filter_category = category;
248
+ if (yearFilter) payload.filter_year_gte = yearFrom;
249
+
250
+ const response = await fetch(`${API_URL}/query`, {
251
+ method: "POST",
252
+ headers: { "Content-Type": "application/json" },
253
+ body: JSON.stringify(payload),
254
+ });
255
+
256
+ if (!response.ok)
257
+ throw new Error(`API returned ${response.status}`);
258
+ const data: QueryResult = await response.json();
259
+ setResult(data);
260
+ } catch (err) {
261
+ setError(
262
+ err instanceof Error
263
+ ? err.message
264
+ : "Failed to connect to API.",
265
+ );
266
+ } finally {
267
+ setLoading(false);
268
+ }
269
+ };
270
+
271
+ const resetApp = () => {
272
+ setQuestion("");
273
+ setResult(null);
274
+ setError("");
275
+ setLoading(false);
276
+ };
277
+
278
+ const hasSearched = result || loading || error;
279
+
280
+ return (
281
+ <>
282
+ <div className="luminous-grid" />
283
+ <div className="orb orb-cyan" />
284
+ <div className="orb orb-purple" />
285
+
286
+ <AnimatePresence>
287
+ {showInfo && (
288
+ <div className="info-modal-backdrop" onClick={() => setShowInfo(false)}>
289
+ <motion.div
290
+ initial={{ opacity: 0, scale: 0.95, y: 20 }}
291
+ animate={{ opacity: 1, scale: 1, y: 0 }}
292
+ exit={{ opacity: 0, scale: 0.95, y: 20 }}
293
+ onClick={(e) => e.stopPropagation()}
294
+ className="cyber-panel info-modal"
295
+ >
296
+ <button className="modal-close" onClick={() => setShowInfo(false)}>
297
+ <X size={18} />
298
+ </button>
299
+ <h2>ResearchPilot Console</h2>
300
+ <div style={{ display: "flex", alignItems: "center", gap: "12px", marginTop: "16px" }}>
301
+ <div style={{ background: "rgba(138, 43, 226, 0.15)", border: "1px solid rgba(138, 43, 226, 0.4)", padding: "6px 14px", borderRadius: "99px", fontSize: "0.75rem", color: "var(--accent-2)", fontWeight: 700, letterSpacing: "0.05em", textTransform: "uppercase" }}>
302
+ Lead Architect
303
+ </div>
304
+ <span style={{ fontFamily: "'Dancing Script', cursive", fontSize: "1.8rem", fontWeight: 700, background: "linear-gradient(135deg, #fff 20%, var(--accent-2) 100%)", WebkitBackgroundClip: "text", WebkitTextFillColor: "transparent", letterSpacing: "0.05em", transform: "translateY(-2px)" }}>Subhadip Hensh</span>
305
+ </div>
306
+
307
+ <hr />
308
+
309
+ <h3><Server size={18} /> System Overview</h3>
310
+ <p style={{ fontSize: "1rem", lineHeight: 1.7, marginBottom: "16px" }}>ResearchPilot is a high-performance RAG (Retrieval-Augmented Generation) engine tailored for Machine Learning literature. It features hybrid sparse-dense searching, advanced cross-encoder reranking, and GPU-driven vector indexing via Qdrant.</p>
311
+
312
+ <h3><Activity size={18} /> Current Operational Capacity</h3>
313
+ <ul>
314
+ <li><strong>Current Index</strong> Synthesizing 51,019 dense embeddings isolated from ~700 major AI & ML papers.</li>
315
+ <li><strong>Data Categories</strong> Fully indexed on core Machine Learning (cs.LG) and AI (cs.AI).</li>
316
+ </ul>
317
+
318
+ <h3><Layers size={18} /> Core Technology Stack</h3>
319
+ <ul>
320
+ <li><strong>Frontend Application</strong> Next.js 16 (App Router), React, Framer Motion, Vanilla CSS (Glassmorphism).</li>
321
+ <li><strong>Backend Environment</strong> Python, FastAPI, Uvicorn, Pydantic.</li>
322
+ <li><strong>Vector Database Engine</strong> Qdrant (GPU Accelerated Dense Vectors).</li>
323
+ <li><strong>RAG Processing Pipeline</strong> SentenceTransformers (BGE-base), BM25 Sparse Search, Cross-Encoder Reranking, Groq LLM (LLaMA 3.3).</li>
324
+ <li><strong>Mathematics Engine</strong> KaTeX & React-KaTeX for fully dynamic native LaTeX equations.</li>
325
+ </ul>
326
+
327
+ <h3><Rocket size={18} /> Phase 2: In-Progress Architecture</h3>
328
+ <ul>
329
+ <li><strong>Massive Data Expansion</strong> Scaling dataset soon to 10,000+ — 20,000+ ML papers spanning NLP, Computer Vision, and Robotics.</li>
330
+ <li><strong>Distributed Hardware Execution</strong> Scaling ingestion logic to cloud-based GPU clusters for extreme speed.</li>
331
+ <li><strong>Multi-modal Analysis</strong> Soon integrating visual graph and chart processing abilities into the synthesis engine.</li>
332
+ </ul>
333
+ </motion.div>
334
+ </div>
335
+ )}
336
+ </AnimatePresence>
337
+
338
+ {/* ── Top Nav ── */}
339
+ <header className="top-nav">
340
+ <div className="brand" onClick={resetApp} style={{ cursor: "pointer" }}>
341
+ <div className="brand-icon">
342
+ <Brain size={22} />
343
+ </div>
344
+ <span>ResearchPilot</span>
345
+ </div>
346
+ <div className="nav-right">
347
+ <button onClick={() => setShowInfo(true)} className="nav-icon-btn" aria-label="Project Info">
348
+ <Info size={16} />
349
+ </button>
350
+ <div style={{ position: "relative" }}>
351
+ <button
352
+ onClick={async () => {
353
+ await checkHealth();
354
+ setShowStatusDetails(!showStatusDetails);
355
+ }}
356
+ className="nav-status"
357
+ >
358
+ <div
359
+ className={`status-dot ${apiStatus === "online" ? "status-online" : apiStatus === "offline" ? "status-offline" : ""}`}
360
+ />
361
+ {apiStatus === "online"
362
+ ? "Systems Nominal"
363
+ : apiStatus === "offline"
364
+ ? "Offline"
365
+ : "Checking..."}
366
+ </button>
367
+
368
+ <AnimatePresence>
369
+ {showStatusDetails && (
370
+ <motion.div
371
+ initial={{ opacity: 0, y: 10, scale: 0.95 }}
372
+ animate={{ opacity: 1, y: 0, scale: 1 }}
373
+ exit={{ opacity: 0, y: 10, scale: 0.95 }}
374
+ className="cyber-panel custom-dropdown-menu"
375
+ style={{
376
+ position: "absolute",
377
+ top: "calc(100% + 12px)",
378
+ right: "-40px",
379
+ left: "auto",
380
+ width: "260px",
381
+ padding: "16px",
382
+ zIndex: 100,
383
+ display: "flex",
384
+ flexDirection: "column",
385
+ gap: "8px",
386
+ cursor: "default"
387
+ }}
388
+ onClick={(e) => e.stopPropagation()}
389
+ >
390
+ <h4 style={{ fontSize: "0.85rem", color: "#fff", marginBottom: "4px" }}>System Connection Status</h4>
391
+ <p style={{ fontSize: "0.75rem", color: "var(--text-muted)", lineHeight: 1.5 }}>
392
+ {apiStatus === "online"
393
+ ? "🟢 Backend API and Qdrant Vector Database are connected and responding correctly. The system is ready for inference."
394
+ : "🔴 Backend API is unreachable. You need to run 'python run_api.py' in your backend directory to enable RAG functionality."}
395
+ </p>
396
+ <button
397
+ onClick={(e) => {
398
+ e.stopPropagation();
399
+ checkHealth();
400
+ }}
401
+ style={{
402
+ marginTop: "8px",
403
+ fontSize: "0.75rem",
404
+ color: "var(--accent)",
405
+ background: "rgba(0, 240, 255, 0.1)",
406
+ padding: "6px 12px",
407
+ borderRadius: "8px",
408
+ border: "1px solid rgba(0, 240, 255, 0.3)",
409
+ textAlign: "center",
410
+ display: "block",
411
+ width: "100%",
412
+ cursor: "pointer",
413
+ transition: "0.2s"
414
+ }}
415
+ onMouseEnter={(e) => {
416
+ e.currentTarget.style.background = "rgba(0, 240, 255, 0.2)";
417
+ }}
418
+ onMouseLeave={(e) => {
419
+ e.currentTarget.style.background = "rgba(0, 240, 255, 0.1)";
420
+ }}
421
+ >
422
+ Re-verify Connection
423
+ </button>
424
+ </motion.div>
425
+ )}
426
+ </AnimatePresence>
427
+ </div>
428
+ <a
429
+ href="https://github.com/07subhadip"
430
+ target="_blank"
431
+ rel="noopener noreferrer"
432
+ className="github-link"
433
+ aria-label="GitHub Profile"
434
+ >
435
+ <svg viewBox="0 0 24 24" width="24" height="24" fill="currentColor">
436
+ <path d="M12 2C6.477 2 2 6.484 2 12.017c0 4.425 2.865 8.18 6.839 9.504.5.092.682-.217.682-.483 0-.237-.008-.868-.013-1.703-2.782.605-3.369-1.343-3.369-1.343-.454-1.158-1.11-1.466-1.11-1.466-.908-.62.069-.608.069-.608 1.003.07 1.531 1.032 1.531 1.032.892 1.53 2.341 1.088 2.91.832.092-.647.35-1.088.636-1.338-2.22-.253-4.555-1.113-4.555-4.951 0-1.093.39-1.988 1.029-2.688-.103-.253-.446-1.272.098-2.65 0 0 .84-.27 2.75 1.026A9.564 9.564 0 0112 6.844c.85.004 1.705.115 2.504.337 1.909-1.296 2.747-1.027 2.747-1.027.546 1.379.202 2.398.1 2.651.64.7 1.028 1.595 1.028 2.688 0 3.848-2.339 4.695-4.566 4.943.359.309.678.92.678 1.855 0 1.338-.012 2.419-.012 2.747 0 .268.18.58.688.482A10.019 10.019 0 0022 12.017C22 6.484 17.522 2 12 2z"/>
437
+ </svg>
438
+ </a>
439
+ </div>
440
+ </header>
441
+
442
+ <main className="app-container">
443
+ {/* ── Central Hero Block ── */}
444
+ <motion.div
445
+ layout
446
+ className="search-wrapper"
447
+ style={{ marginTop: hasSearched ? "130px" : "15vh" }}
448
+ transition={{ type: "spring", bounce: 0.2, duration: 0.8 }}
449
+ >
450
+ <AnimatePresence>
451
+ {!hasSearched && (
452
+ <motion.div
453
+ layout
454
+ initial={{ opacity: 0, scale: 0.95 }}
455
+ animate={{ opacity: 1, scale: 1 }}
456
+ exit={{ opacity: 0, scale: 0.95 }}
457
+ transition={{ duration: 0.5 }}
458
+ style={{ width: "100%" }}
459
+ >
460
+ <h1 className="hero-title">
461
+ Decipher the latest
462
+ <br />
463
+ <span className="text-gradient-2">
464
+ ML Research
465
+ </span>
466
+ </h1>
467
+ <p className="hero-subtitle">
468
+ Neural hybrid search across ArXiv. <br />
469
+ Cross-encoder reranked. LLM synthesized.
470
+ </p>
471
+ <div style={{ height: "64px" }} />
472
+ </motion.div>
473
+ )}
474
+ </AnimatePresence>
475
+
476
+ {/* ── ChatGPT Style Search Box ── */}
477
+ <motion.div
478
+ layout
479
+ style={{
480
+ width: "100%",
481
+ margin: "0 auto",
482
+ zIndex: 20,
483
+ }}
484
+ >
485
+ <div className="chat-input-wrapper">
486
+ <textarea
487
+ ref={textareaRef}
488
+ value={question}
489
+ onChange={(e) => setQuestion(e.target.value)}
490
+ placeholder="Message ResearchPilot..."
491
+ rows={1}
492
+ className="chat-input"
493
+ style={{
494
+ minHeight: hasSearched ? "20px" : "28px",
495
+ maxHeight: "120px",
496
+ overflowY: "auto",
497
+ }}
498
+ onKeyDown={(e) => {
499
+ if (e.key === "Enter" && !e.shiftKey) {
500
+ e.preventDefault();
501
+ handleSearch();
502
+ }
503
+ }}
504
+ />
505
+ <button
506
+ onClick={handleSearch}
507
+ disabled={loading || !question.trim()}
508
+ className="send-btn"
509
+ >
510
+ {loading ? (
511
+ <div className="spinner-micro" />
512
+ ) : (
513
+ <Send
514
+ size={18}
515
+ strokeWidth={2.5}
516
+ style={{ marginLeft: "-2px" }}
517
+ />
518
+ )}
519
+ </button>
520
+ </div>
521
+
522
+ <div className="search-controls">
523
+ <button
524
+ onClick={() => setShowSettings(!showSettings)}
525
+ className="controls-group"
526
+ style={{
527
+ color: "var(--text-muted)",
528
+ fontSize: "0.8rem",
529
+ fontWeight: 600,
530
+ padding: "4px",
531
+ cursor: "pointer",
532
+ }}
533
+ >
534
+ <Layers size={14} />
535
+ CONFIGURE {showSettings ? "▲" : "▼"}
536
+ </button>
537
+
538
+ <div className="controls-group">
539
+ <AnimatePresence>
540
+ {showSettings && (
541
+ <motion.div
542
+ initial={{ opacity: 0, height: 0 }}
543
+ animate={{
544
+ opacity: 1,
545
+ height: "auto",
546
+ }}
547
+ exit={{ opacity: 0, height: 0 }}
548
+ style={{
549
+ overflow: "visible",
550
+ display: "flex",
551
+ gap: "12px",
552
+ flexWrap: "wrap",
553
+ alignItems: "center",
554
+ }}
555
+ >
556
+ <CustomSelect
557
+ value={topK}
558
+ onChange={(val) => setTopK(Number(val))}
559
+ options={[
560
+ { value: 3, label: 'Top 3 Results' },
561
+ { value: 5, label: 'Top 5 Results' },
562
+ { value: 10, label: 'Top 10 Results' },
563
+ ]}
564
+ />
565
+ <CustomSelect
566
+ value={category}
567
+ onChange={(val) => setCategory(String(val))}
568
+ options={CATEGORY_OPTIONS}
569
+ width="160px"
570
+ />
571
+ <button
572
+ onClick={() =>
573
+ setYearFilter(!yearFilter)
574
+ }
575
+ className={`cyber-btn-outline ${yearFilter ? "active" : ""}`}
576
+ >
577
+ YEAR FILTER{" "}
578
+ {yearFilter ? "ON" : "OFF"}
579
+ </button>
580
+ {yearFilter && (
581
+ <div className="year-stepper">
582
+ <button onClick={() => setYearFrom(y => Math.max(2000, y - 1))} className="stepper-btn">-</button>
583
+ <input
584
+ type="number"
585
+ value={yearFrom}
586
+ readOnly
587
+ className="stepper-input"
588
+ />
589
+ <button onClick={() => setYearFrom(y => Math.min(2026, y + 1))} className="stepper-btn">+</button>
590
+ </div>
591
+ )}
592
+ </motion.div>
593
+ )}
594
+ </AnimatePresence>
595
+ </div>
596
+ </div>
597
+ </motion.div>
598
+
599
+ {/* ── Example Queries ── */}
600
+ <AnimatePresence>
601
+ {!hasSearched && (
602
+ <motion.div
603
+ layout
604
+ initial={{ opacity: 0 }}
605
+ animate={{ opacity: 1 }}
606
+ exit={{ opacity: 0, filter: "blur(5px)" }}
607
+ className="example-chips"
608
+ >
609
+ {EXAMPLE_QUERIES.map((q, i) => (
610
+ <motion.button
611
+ key={q}
612
+ initial={{ opacity: 0, y: 10 }}
613
+ animate={{ opacity: 1, y: 0 }}
614
+ transition={{ delay: 0.1 * i + 0.3 }}
615
+ onClick={() => {
616
+ setQuestion(q);
617
+ setTimeout(
618
+ () => handleSearch(),
619
+ 50,
620
+ );
621
+ }}
622
+ className="chip"
623
+ >
624
+ {q}
625
+ </motion.button>
626
+ ))}
627
+ </motion.div>
628
+ )}
629
+ </AnimatePresence>
630
+ </motion.div>
631
+
632
+ {/* ── Error State ── */}
633
+ <AnimatePresence>
634
+ {error && (
635
+ <motion.div
636
+ initial={{ opacity: 0, y: 10 }}
637
+ animate={{ opacity: 1, y: 0 }}
638
+ className="cyber-panel"
639
+ style={{
640
+ width: "100%",
641
+ padding: "24px",
642
+ borderColor: "var(--danger)",
643
+ marginTop: "24px",
644
+ }}
645
+ >
646
+ <div
647
+ style={{
648
+ display: "flex",
649
+ gap: "16px",
650
+ alignItems: "center",
651
+ }}
652
+ >
653
+ <AlertCircle size={32} color="var(--danger)" />
654
+ <div>
655
+ <h3
656
+ style={{
657
+ color: "var(--danger)",
658
+ fontSize: "1.2rem",
659
+ marginBottom: "4px",
660
+ }}
661
+ >
662
+ Critical Exception
663
+ </h3>
664
+ <p style={{ color: "var(--text-muted)" }}>
665
+ {error}
666
+ </p>
667
+ </div>
668
+ </div>
669
+ </motion.div>
670
+ )}
671
+ </AnimatePresence>
672
+
673
+ {/* ── Loading View ── */}
674
+ <AnimatePresence>
675
+ {loading && (
676
+ <motion.div
677
+ initial={{ opacity: 0 }}
678
+ animate={{ opacity: 1 }}
679
+ exit={{ opacity: 0 }}
680
+ className="loader-view"
681
+ style={{ width: "100%" }}
682
+ >
683
+ <div className="ring-spinner">
684
+ <div className="ring ring-1" />
685
+ <div className="ring ring-2" />
686
+ <Brain
687
+ size={22}
688
+ style={{
689
+ position: "absolute",
690
+ top: "24px",
691
+ left: "24px",
692
+ color: "var(--text-main)",
693
+ }}
694
+ />
695
+ </div>
696
+ <div>
697
+ <h2
698
+ style={{
699
+ fontSize: "1.4rem",
700
+ fontWeight: 600,
701
+ color: "#fff",
702
+ marginBottom: "8px",
703
+ }}
704
+ >
705
+ Synthesizing Knowledge
706
+ </h2>
707
+ <p
708
+ style={{
709
+ color: "var(--text-muted)",
710
+ fontSize: "0.95rem",
711
+ }}
712
+ >
713
+ Running Vector Search & LLM Inference
714
+ </p>
715
+ </div>
716
+ </motion.div>
717
+ )}
718
+ </AnimatePresence>
719
+
720
+ {/* ── Results Output ── */}
721
+ {result && !loading && (
722
+ <motion.div
723
+ initial={{ opacity: 0, y: 20 }}
724
+ animate={{ opacity: 1, y: 0 }}
725
+ transition={{ staggerChildren: 0.1 }}
726
+ className="results-area"
727
+ >
728
+ <motion.div
729
+ className="cyber-panel answer-box"
730
+ initial={{ opacity: 0, y: 20 }}
731
+ animate={{ opacity: 1, y: 0 }}
732
+ >
733
+ <div className="answer-header">
734
+ <div className="answer-label">
735
+ <Sparkles size={20} /> AI Synthesis
736
+ </div>
737
+ {result.has_context ? (
738
+ <div className="badge-grounded">
739
+ <CheckCircle size={14} /> Grounded
740
+ Sources
741
+ </div>
742
+ ) : (
743
+ <div
744
+ className="badge-grounded"
745
+ style={{
746
+ color: "var(--danger)",
747
+ borderColor:
748
+ "rgba(239, 68, 68, 0.3)",
749
+ background:
750
+ "rgba(239, 68, 68, 0.1)",
751
+ }}
752
+ >
753
+ <AlertCircle size={14} /> Hallucination
754
+ Risk
755
+ </div>
756
+ )}
757
+ </div>
758
+ <div className="answer-text">{renderLaTeX(result.answer)}</div>
759
+ </motion.div>
760
+
761
+ <motion.div
762
+ className="stats-grid"
763
+ initial={{ opacity: 0, y: 20 }}
764
+ animate={{ opacity: 1, y: 0 }}
765
+ >
766
+ {[
767
+ {
768
+ l: "Execution Time",
769
+ v: `${(result.total_time_ms / 1000).toFixed(1)}s`,
770
+ i: Clock,
771
+ c: "var(--accent)",
772
+ },
773
+ {
774
+ l: "Vector Data",
775
+ v: `${(result.retrieval_time_ms / 1000).toFixed(1)}s`,
776
+ i: ArrowRight,
777
+ c: "#fff",
778
+ },
779
+ {
780
+ l: "LLM Generation",
781
+ v: `${(result.generation_time_ms / 1000).toFixed(1)}s`,
782
+ i: Zap,
783
+ c: "var(--accent-2)",
784
+ },
785
+ {
786
+ l: "Paper Chunks",
787
+ v: result.chunks_used,
788
+ i: Fingerprint,
789
+ c: "var(--success)",
790
+ },
791
+ ].map((s, i) => (
792
+ <div key={i} className="cyber-panel stat-card">
793
+ <div
794
+ className="stat-header"
795
+ style={{ width: "100%" }}
796
+ >
797
+ <span className="stat-label">
798
+ {s.l}
799
+ </span>
800
+ <s.i
801
+ size={16}
802
+ color={s.c}
803
+ style={{ opacity: 0.8 }}
804
+ />
805
+ </div>
806
+ <div
807
+ className="stat-value"
808
+ style={{
809
+ width: "100%",
810
+ textAlign: "left",
811
+ color: s.c,
812
+ }}
813
+ >
814
+ {s.v}
815
+ </div>
816
+ </div>
817
+ ))}
818
+ </motion.div>
819
+
820
+ {result.citations.length > 0 && (
821
+ <motion.div
822
+ initial={{ opacity: 0, y: 20 }}
823
+ animate={{ opacity: 1, y: 0 }}
824
+ >
825
+ <div className="section-title">
826
+ <BookOpen
827
+ size={18}
828
+ color="var(--accent-2)"
829
+ />
830
+ Extracted Literature
831
+ </div>
832
+ <div className="citations-grid">
833
+ {result.citations.map((cite, i) => (
834
+ <a
835
+ href={cite.arxiv_url}
836
+ target="_blank"
837
+ rel="noopener noreferrer"
838
+ key={i}
839
+ className="cyber-panel citation-card"
840
+ >
841
+ <div className="citation-open">
842
+ <ExternalLink size={16} />
843
+ </div>
844
+ <div className="citation-meta">
845
+ <span className="citation-id">
846
+ {cite.paper_id}
847
+ </span>
848
+ <span className="citation-date">
849
+ {cite.published_date}
850
+ </span>
851
+ </div>
852
+ <h4 className="citation-title">
853
+ {cite.title}
854
+ </h4>
855
+ <div className="citation-authors">
856
+ {cite.authors
857
+ .slice(0, 3)
858
+ .join(", ")}
859
+ {cite.authors.length > 3 &&
860
+ ` +${cite.authors.length - 3} more`}
861
+ </div>
862
+ </a>
863
+ ))}
864
+ </div>
865
+ </motion.div>
866
+ )}
867
+ </motion.div>
868
+ )}
869
+ </main>
870
+ </>
871
+ );
872
+ }
frontend-next/eslint.config.mjs ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { defineConfig, globalIgnores } from "eslint/config";
2
+ import nextVitals from "eslint-config-next/core-web-vitals";
3
+ import nextTs from "eslint-config-next/typescript";
4
+
5
+ const eslintConfig = defineConfig([
6
+ ...nextVitals,
7
+ ...nextTs,
8
+ // Override default ignores of eslint-config-next.
9
+ globalIgnores([
10
+ // Default ignores of eslint-config-next:
11
+ ".next/**",
12
+ "out/**",
13
+ "build/**",
14
+ "next-env.d.ts",
15
+ ]),
16
+ ]);
17
+
18
+ export default eslintConfig;
frontend-next/next.config.ts ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import type { NextConfig } from "next";
2
+
3
+ const nextConfig: NextConfig = {
4
+ /* config options here */
5
+ };
6
+
7
+ export default nextConfig;
frontend-next/package-lock.json ADDED
The diff for this file is too large to render. See raw diff
 
frontend-next/package.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "frontend-next",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "scripts": {
6
+ "dev": "next dev",
7
+ "build": "next build",
8
+ "start": "next start",
9
+ "lint": "eslint"
10
+ },
11
+ "dependencies": {
12
+ "clsx": "^2.1.1",
13
+ "framer-motion": "^12.38.0",
14
+ "katex": "^0.16.45",
15
+ "lucide-react": "^1.7.0",
16
+ "next": "16.2.2",
17
+ "react": "19.2.4",
18
+ "react-dom": "19.2.4",
19
+ "react-katex": "^3.1.0",
20
+ "tailwind-merge": "^3.5.0"
21
+ },
22
+ "devDependencies": {
23
+ "@tailwindcss/postcss": "^4",
24
+ "@types/katex": "^0.16.8",
25
+ "@types/node": "^20",
26
+ "@types/react": "^19",
27
+ "@types/react-dom": "^19",
28
+ "@types/react-katex": "^3.0.4",
29
+ "eslint": "^9",
30
+ "eslint-config-next": "16.2.2",
31
+ "tailwindcss": "^4",
32
+ "typescript": "^5"
33
+ }
34
+ }
frontend-next/postcss.config.mjs ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ const config = {
2
+ plugins: {
3
+ "@tailwindcss/postcss": {},
4
+ },
5
+ };
6
+
7
+ export default config;
frontend-next/public/file.svg ADDED
frontend-next/public/globe.svg ADDED
frontend-next/public/next.svg ADDED
frontend-next/public/vercel.svg ADDED
frontend-next/public/window.svg ADDED
frontend-next/tsconfig.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2017",
4
+ "lib": ["dom", "dom.iterable", "esnext"],
5
+ "allowJs": true,
6
+ "skipLibCheck": true,
7
+ "strict": true,
8
+ "noEmit": true,
9
+ "esModuleInterop": true,
10
+ "module": "esnext",
11
+ "moduleResolution": "bundler",
12
+ "resolveJsonModule": true,
13
+ "isolatedModules": true,
14
+ "jsx": "react-jsx",
15
+ "incremental": true,
16
+ "plugins": [
17
+ {
18
+ "name": "next"
19
+ }
20
+ ],
21
+ "paths": {
22
+ "@/*": ["./*"]
23
+ }
24
+ },
25
+ "include": [
26
+ "next-env.d.ts",
27
+ "**/*.ts",
28
+ "**/*.tsx",
29
+ ".next/types/**/*.ts",
30
+ ".next/dev/types/**/*.ts",
31
+ "**/*.mts"
32
+ ],
33
+ "exclude": ["node_modules"]
34
+ }
merge_embeddings.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Merge old (local) and new (Kaggle) embeddings into a single set.
3
+
4
+ This script:
5
+ 1. Loads existing embeddings.npy + chunk_ids.npy (your ~51k chunks)
6
+ 2. Loads new_embeddings.npy + new_chunk_ids.npy (from Kaggle batch)
7
+ 3. Deduplicates by chunk_id
8
+ 4. Saves the merged result back as embeddings.npy + chunk_ids.npy
9
+ 5. Backs up the originals first
10
+
11
+ Run from project root:
12
+ python merge_embeddings.py
13
+ """
14
+
15
+ import json
16
+ import shutil
17
+ import numpy as np
18
+ from pathlib import Path
19
+
20
+ EMBEDDINGS_DIR = Path("data/embeddings")
21
+
22
+ # File paths
23
+ old_emb_file = EMBEDDINGS_DIR / "embeddings.npy"
24
+ old_ids_file = EMBEDDINGS_DIR / "chunk_ids.npy"
25
+ new_emb_file = EMBEDDINGS_DIR / "new_embeddings.npy"
26
+ new_ids_file = EMBEDDINGS_DIR / "new_chunk_ids.npy"
27
+
28
+ # Backup dir
29
+ backup_dir = EMBEDDINGS_DIR / "backup_before_merge"
30
+ backup_dir.mkdir(exist_ok=True)
31
+
32
+
33
+ def main():
34
+ print("=" * 60)
35
+ print(" RESEARCHPILOT — EMBEDDING MERGE TOOL")
36
+ print("=" * 60)
37
+
38
+ # ── Step 1: Validate files exist ──
39
+ for f in [old_emb_file, old_ids_file, new_emb_file, new_ids_file]:
40
+ if not f.exists():
41
+ print(f"❌ Missing file: {f}")
42
+ return
43
+ print("✅ All required files found.\n")
44
+
45
+ # ── Step 2: Load old embeddings ──
46
+ print("Loading OLD embeddings...")
47
+ old_embeddings = np.load(str(old_emb_file))
48
+ old_ids = list(np.load(str(old_ids_file), allow_pickle=True))
49
+ print(f" Old: {old_embeddings.shape[0]:,} chunks, dim={old_embeddings.shape[1]}")
50
+
51
+ # ── Step 3: Load new embeddings ──
52
+ print("Loading NEW embeddings (from Kaggle)...")
53
+ new_embeddings = np.load(str(new_emb_file))
54
+ new_ids = list(np.load(str(new_ids_file), allow_pickle=True))
55
+ print(f" New: {new_embeddings.shape[0]:,} chunks, dim={new_embeddings.shape[1]}")
56
+
57
+ # ── Step 4: Deduplicate ──
58
+ print("\nDeduplicating...")
59
+ old_id_set = set(old_ids)
60
+ keep_indices = []
61
+ for i, cid in enumerate(new_ids):
62
+ if cid not in old_id_set:
63
+ keep_indices.append(i)
64
+
65
+ unique_new_embeddings = new_embeddings[keep_indices]
66
+ unique_new_ids = [new_ids[i] for i in keep_indices]
67
+ duplicates_removed = len(new_ids) - len(unique_new_ids)
68
+ print(f" Duplicates skipped: {duplicates_removed}")
69
+ print(f" Unique new chunks: {len(unique_new_ids):,}")
70
+
71
+ # ── Step 5: Merge ──
72
+ print("\nMerging...")
73
+ merged_embeddings = np.vstack([old_embeddings, unique_new_embeddings])
74
+ merged_ids = old_ids + unique_new_ids
75
+ print(f" MERGED TOTAL: {merged_embeddings.shape[0]:,} chunks")
76
+
77
+ # ── Step 6: Backup originals ──
78
+ print("\nBacking up originals...")
79
+ shutil.copy2(old_emb_file, backup_dir / "embeddings_old.npy")
80
+ shutil.copy2(old_ids_file, backup_dir / "chunk_ids_old.npy")
81
+ print(f" Backed up to: {backup_dir}")
82
+
83
+ # ── Step 7: Save merged files ──
84
+ print("\nSaving merged embeddings...")
85
+ np.save(str(old_emb_file), merged_embeddings)
86
+ np.save(str(old_ids_file), np.array(merged_ids, dtype=object))
87
+
88
+ # Update the index file
89
+ index = {
90
+ "total_embeddings": len(merged_ids),
91
+ "embedding_dimension": int(merged_embeddings.shape[1]),
92
+ "model_name": "BAAI/bge-base-en-v1.5",
93
+ "chunk_id_sample": merged_ids[:5],
94
+ }
95
+ with open(EMBEDDINGS_DIR / "embedding_index.json", "w") as f:
96
+ json.dump(index, f, indent=2)
97
+
98
+ print(f" ✅ embeddings.npy → {merged_embeddings.shape}")
99
+ print(f" ✅ chunk_ids.npy → {len(merged_ids):,} IDs")
100
+ print(f" ✅ embedding_index.json updated")
101
+
102
+ # ── Summary ──
103
+ size_mb = (EMBEDDINGS_DIR / "embeddings.npy").stat().st_size / 1e6
104
+ print(f"\n{'=' * 60}")
105
+ print(f" MERGE COMPLETE!")
106
+ print(f" Old: {len(old_ids):,} chunks")
107
+ print(f" + New: {len(unique_new_ids):,} chunks")
108
+ print(f" = Total: {len(merged_ids):,} chunks")
109
+ print(f" File size: {size_mb:.0f} MB")
110
+ print(f"{'=' * 60}")
111
+ print(f"\n👉 Now run: python run_indexing.py --recreate")
112
+
113
+
114
+ if __name__ == "__main__":
115
+ main()
reindex_light.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Re-index Qdrant with a LIMITED number of chunks (for low-RAM machines).
3
+
4
+ Your full 358k embeddings stay on disk untouched.
5
+ This only controls how many get loaded into the Qdrant search index.
6
+
7
+ Usage:
8
+ python reindex_light.py (default: 100,000 chunks)
9
+ python reindex_light.py --limit 50000
10
+ """
11
+
12
+ import sys
13
+ import json
14
+ import numpy as np
15
+ from pathlib import Path
16
+
17
+ from src.utils.logger import setup_logger, get_logger
18
+ from src.vectorstore.qdrant_store import QdrantStore
19
+ from src.embeddings.embedding_cache import EmbeddingCache
20
+ from config.settings import CHUNKS_DIR
21
+
22
+ setup_logger()
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ def main():
27
+ # Parse limit from args
28
+ limit = 100_000
29
+ if "--limit" in sys.argv:
30
+ idx = sys.argv.index("--limit")
31
+ limit = int(sys.argv[idx + 1])
32
+
33
+ print(f"{'=' * 60}")
34
+ print(f" LIGHTWEIGHT RE-INDEXER (RAM-safe)")
35
+ print(f" Chunk limit: {limit:,}")
36
+ print(f"{'=' * 60}\n")
37
+
38
+ # Step 1: Load chunk files from disk (only up to limit)
39
+ print("Step 1: Loading chunk files...")
40
+ chunk_ids = []
41
+ texts = []
42
+ metadata = []
43
+
44
+ chunk_files = sorted(CHUNKS_DIR.glob("*_semantic.json"))
45
+ print(f" Found {len(chunk_files)} chunk files on disk")
46
+
47
+ for cf in chunk_files:
48
+ if len(chunk_ids) >= limit:
49
+ break
50
+
51
+ with open(cf, 'r', encoding='utf-8') as f:
52
+ raw = json.load(f)
53
+
54
+ # Handle both formats
55
+ if isinstance(raw, dict) and "chunks" in raw:
56
+ chunk_list = raw["chunks"]
57
+ elif isinstance(raw, list):
58
+ chunk_list = raw
59
+ else:
60
+ continue
61
+
62
+ for chunk in chunk_list:
63
+ if len(chunk_ids) >= limit:
64
+ break
65
+ chunk_ids.append(chunk['chunk_id'])
66
+ texts.append(chunk['text'])
67
+ metadata.append({k: v for k, v in chunk.items() if k != 'text'})
68
+
69
+ print(f" Loaded {len(chunk_ids):,} chunks (limit: {limit:,})\n")
70
+
71
+ # Step 2: Load embeddings and match to chunks
72
+ print("Step 2: Loading embedding cache...")
73
+ cache = EmbeddingCache()
74
+ cache.load()
75
+ embeddings_matrix, cached_ids = cache.get_all()
76
+ print(f" Cache has {len(cached_ids):,} embeddings")
77
+
78
+ # Build lookup
79
+ id_to_row = {cid: i for i, cid in enumerate(cached_ids)}
80
+
81
+ # Match chunks to embeddings
82
+ valid = [(i, id_to_row[cid]) for i, cid in enumerate(chunk_ids) if cid in id_to_row]
83
+ print(f" Matched {len(valid):,} chunks with embeddings\n")
84
+
85
+ chunk_indices = [v[0] for v in valid]
86
+ embed_indices = [v[1] for v in valid]
87
+
88
+ final_chunk_ids = [chunk_ids[i] for i in chunk_indices]
89
+ final_texts = [texts[i] for i in chunk_indices]
90
+ final_metadata = [metadata[i] for i in chunk_indices]
91
+ final_embeddings = embeddings_matrix[embed_indices]
92
+
93
+ # Step 3: Recreate Qdrant collection
94
+ print("Step 3: Rebuilding Qdrant collection...")
95
+ store = QdrantStore()
96
+ store.create_collection(recreate=True)
97
+
98
+ # Step 4: Index
99
+ print(f"Step 4: Indexing {len(final_chunk_ids):,} chunks into Qdrant...")
100
+ total = store.index_chunks(
101
+ embeddings=final_embeddings,
102
+ chunk_ids=final_chunk_ids,
103
+ metadata=final_metadata,
104
+ texts=final_texts,
105
+ )
106
+
107
+ print(f"\n{'=' * 60}")
108
+ print(f" ✅ INDEXING COMPLETE")
109
+ print(f" Chunks indexed: {total:,}")
110
+ print(f" Collection: {store.get_collection_info()}")
111
+ print(f" RAM usage: ~{total * 768 * 4 / 1e6:.0f} MB (vectors only)")
112
+ print(f"{'=' * 60}")
113
+ print(f"\n 👉 Now run: python run_api.py")
114
+
115
+
116
+ if __name__ == "__main__":
117
+ main()
run_api.py CHANGED
@@ -22,6 +22,6 @@ if __name__ == "__main__":
22
  "src.api.main:app",
23
  host = API_HOST,
24
  port = API_PORT,
25
- reload = API_RELOAD, # Auto-restart on code changes (dev only)
26
- workers = 1, # Single worker for dev (no GPU sharing issues)
27
  )
 
22
  "src.api.main:app",
23
  host = API_HOST,
24
  port = API_PORT,
25
+ reload = False, # Disable auto-reload (saves ~10s scanning 3000+ data files)
26
+ workers = 1,
27
  )
src/ingestion/arxiv_fetcher.py CHANGED
@@ -43,7 +43,7 @@ logger = get_logger(__name__)
43
 
44
  class PaperMetadata(BaseModel):
45
  """
46
- Pydantic model defininf the exact schema for a paper's metadata.
47
 
48
  WHY PYDANTIC:
49
  Pydantic enforces data types at runtime. If ArXiv returns a date
@@ -212,7 +212,7 @@ class ArXivFetcher:
212
  abstract = result.summary,
213
  authors = [str(a) for a in result.authors],
214
  categories = result.categories,
215
- primary_category = str(result.primary_category) if result.primary_category else result.categories[0] if result.categories else "cs.LG",
216
  published_date = result.published.strftime("%Y-%m-%d"),
217
  updated_date = result.updated.strftime("%Y-%m-%d"),
218
  arxiv_url = result.entry_id,
@@ -267,7 +267,7 @@ class ArXivFetcher:
267
  search = arxiv.Search(
268
  query = category_query,
269
  max_results = max_papers * 2, # Fetch extra account for skips
270
- sort_by = arxiv.SortCriterion.SubmittedDate,
271
  sort_order = arxiv.SortOrder.Descending,
272
  )
273
 
 
43
 
44
  class PaperMetadata(BaseModel):
45
  """
46
+ Pydantic model defining the exact schema for a paper's metadata.
47
 
48
  WHY PYDANTIC:
49
  Pydantic enforces data types at runtime. If ArXiv returns a date
 
212
  abstract = result.summary,
213
  authors = [str(a) for a in result.authors],
214
  categories = result.categories,
215
+ primary_categories = str(result.primary_category) if result.primary_category else result.categories[0] if result.categories else "cs.LG",
216
  published_date = result.published.strftime("%Y-%m-%d"),
217
  updated_date = result.updated.strftime("%Y-%m-%d"),
218
  arxiv_url = result.entry_id,
 
267
  search = arxiv.Search(
268
  query = category_query,
269
  max_results = max_papers * 2, # Fetch extra account for skips
270
+ sort_by = arxiv.SortCriterion.Relevance,
271
  sort_order = arxiv.SortOrder.Descending,
272
  )
273
 
src/rag/prompt_templates.py CHANGED
@@ -13,20 +13,29 @@ Key principles we apply:
13
  """
14
 
15
  SYSTEM_PROMPT = """You are ResearchPilot, an expert AI research assistant
16
- specialized in machine learning and AI research papers.
17
 
18
- Your job is to answer questions based EXCLUSIVELY on the research paper
19
- excerpts provided in the context below.
20
 
21
  STRICT RULES:
22
- 1. Only use information from the provided context excerpts
23
- 2. Always cite the paper title and ID when using information from it
24
- 3. If the context does not contain enough information to answer,
25
- say "The provided papers do not contain sufficient information
26
- to answer this question" - do NOT make up information
27
- 4. Be precise and technical - your users are ML researchers and engineers
28
- 5. When multiple papers discuss the same topic, synthesize their findings
29
- 6. Keep answers focused and well-structured
 
 
 
 
 
 
 
 
 
30
  """
31
 
32
 
 
13
  """
14
 
15
  SYSTEM_PROMPT = """You are ResearchPilot, an expert AI research assistant
16
+ specialized in machine learning, AI, and statistics.
17
 
18
+ Your job is to answer questions based on the research paper excerpts
19
+ provided in the context below.
20
 
21
  STRICT RULES:
22
+ 1. Only use information from the provided context excerpts
23
+ 2. Always cite the paper title and ID when using information from it
24
+ 3. If the context does not contain enough information to answer,
25
+ clearly state what IS available and what is missing - do NOT fabricate
26
+ 4. Be precise and technical your users are ML researchers and engineers
27
+ 5. When multiple papers discuss the same topic, synthesize their findings
28
+
29
+ FORMATTING RULES:
30
+ 6. For ALL mathematical expressions, use LaTeX notation:
31
+ - Inline math: $expression$ (e.g. $\\hat{y} = \\sigma(Wx + b)$)
32
+ - Block math: $$expression$$ for standalone equations
33
+ - Examples:
34
+ Loss function: $$\\mathcal{L} = -\\sum_{i} y_i \\log(\\hat{y}_i)$$
35
+ Attention: $$\\text{Attention}(Q,K,V) = \\text{softmax}\\left(\\frac{QK^T}{\\sqrt{d_k}}\\right)V$$
36
+ 7. Use markdown formatting: **bold** for key terms, numbered lists for steps
37
+ 8. For algorithm explanations, structure as: Intuition -> Math -> Steps
38
+ 9. Write comprehensive, detailed answers — do not truncate explanations
39
  """
40
 
41
 
src/vectorstore/indexer.py CHANGED
@@ -80,14 +80,24 @@ class VectorIndexer:
80
 
81
  for cf in chunk_files:
82
  with open(cf, 'r', encoding = "utf-8") as f:
83
- chunks = json.load(f)
84
-
85
- for chunk in chunks:
 
 
 
 
 
 
 
 
 
 
 
86
  chunk_ids.append(chunk['chunk_id'])
87
  texts.append(chunk["text"])
88
 
89
-
90
- # Everything expect that goes into metadata
91
  metadata.append(
92
  {
93
  k: v for k, v in chunk.items()
 
80
 
81
  for cf in chunk_files:
82
  with open(cf, 'r', encoding = "utf-8") as f:
83
+ raw = json.load(f)
84
+
85
+ # Handle both formats:
86
+ # Old local format: [{chunk_id: ..., text: ...}, ...]
87
+ # New Kaggle format: {"paper_id": "...", "chunks": [...]}
88
+ if isinstance(raw, dict) and "chunks" in raw:
89
+ chunk_list = raw["chunks"]
90
+ elif isinstance(raw, list):
91
+ chunk_list = raw
92
+ else:
93
+ logger.warning(f"Unexpected format in {cf.name}, skipping")
94
+ continue
95
+
96
+ for chunk in chunk_list:
97
  chunk_ids.append(chunk['chunk_id'])
98
  texts.append(chunk["text"])
99
 
100
+ # Everything except text goes into metadata
 
101
  metadata.append(
102
  {
103
  k: v for k, v in chunk.items()