coder-vansh commited on
Commit
24da4a9
Β·
1 Parent(s): b49c393

All files are completed.

Browse files
Files changed (8) hide show
  1. app/main.py +426 -0
  2. app/models.py +66 -0
  3. app/utils/__init__.py +9 -1
  4. app/utils/retry.py +13 -13
  5. app/utils/time_info.py +1 -1
  6. config.py +220 -0
  7. run.py +9 -0
  8. test.py +278 -0
app/main.py CHANGED
@@ -0,0 +1,426 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MAIN MODULE
3
+ ============================
4
+
5
+ This module defines the FastAPI application and all HTTP endpoints. It is
6
+ designed for single-user use: one person runs one server (e.g. python run.py)
7
+ and uses it as their personal J.A.R.V.I.S backend. Many people can each run
8
+ their own copy of this code on their own machine.
9
+
10
+ ENDPOINTS:
11
+ GET / - Returns API name list of endpoints.
12
+ GET /health - Returns status of all services (for monitoring).
13
+ POST /chat - General chat: pure LLM, no web search. Uses learning data
14
+ and pasts via vector-store retrieval only.
15
+ POST /chat/realtime - Realtime chat: runs a Tavily web search first, then
16
+ sends results + context to Groq. Same session as /chat.
17
+ GET /chat/history/{id} - Returns all messages for a session (general + realtime).
18
+
19
+ SESSION:
20
+ Both /chat and /chat/realtime use the same session_id. If you omit session_id,
21
+ the server generates a UUID and returns it; send it back on the next request
22
+ to continue the conversation. Sessions are saved to disk and survive restarts.
23
+
24
+ STARTUP:
25
+ On startup, the lifespan function builds the vector store from learning_data/*.txt
26
+ and chats_data/*.json, then creates Groq, Realtime, and Chat services. On shutdown,
27
+ it saves all in memory sessions to disk.
28
+ """
29
+
30
+ from fastapi import FastAPI, HTTPException
31
+ from fastapi.middleware.cors import CORSMiddleware
32
+ from contextlib import asynccontextmanager
33
+ import uvicorn
34
+ import logging
35
+
36
+ from app.models import ChatRequest, ChatResponse
37
+
38
+ # User-friendly message when Groq rate limit (daily token quota) is exceeded.
39
+ RATE_LIMIT_MESSAGE = (
40
+ "You've reached your daily API limit for this assistant. "
41
+ "Your credits will reset in a few hours, or you can upgrade your plan for more."
42
+ "Please try again later"
43
+ )
44
+
45
+
46
+ def _is_rate_limit_error(exc: Exception) -> bool:
47
+ """true if the exception is a Groq rate limit (4299 / tokens per day)."""
48
+ msg = str(exc).lower()
49
+ return "429" in str(exc) or "rate limit" in msg or "tokens per day" in msg
50
+
51
+
52
+ from app.services.vector_store import VectorStoreService
53
+ from app.services.groq_service import GroqService
54
+ from app.services.realtime_service import RealtimeGroqService
55
+ from app.services.chat_service import ChatService
56
+ from config import VECTOR_STORE_DIR
57
+ from langchain_community.vectorstores import FAISS
58
+
59
+
60
+ # ----------------------------------------------------------------------------
61
+ # LOGGING
62
+ # ----------------------------------------------------------------------------
63
+ logging.basicConfig(
64
+ level=logging.INFO,
65
+ format='%(asctime)s | %(levelname)-8s | %(name)-20s | %(message)s',
66
+ datefmt='%Y-%m-%d %H:%M:%S'
67
+ )
68
+ logger = logging.getLogger("J.A.R.V.I.S")
69
+
70
+
71
+ # ---------------------------------------------------------------------------
72
+ # GLOBAL SERVICE REFERENCES
73
+ # ---------------------------------------------------------------------------
74
+ # Set during startup (lifespan) and used by all route handlers.
75
+ # Stored as globals so async endpoints can access the same service instances.
76
+ vector_store_service: VectorStoreService = None
77
+ groq_service: GroqService = None
78
+ realtime_service: RealtimeGroqService = None
79
+ chat_service: ChatService = None
80
+
81
+
82
+ def print_title():
83
+ """Print he J.A.R.V.I.S ASCII art banner to the console when the server starts"""
84
+ title = """
85
+
86
+ ╔══════════════════════════════════════════════════════════╗
87
+ β•‘ β•‘
88
+ β•‘ β–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ•— β–ˆβ–ˆβ•—β–ˆβ–ˆβ•—β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β•‘
89
+ β•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β•β•β• β•‘
90
+ β•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•‘β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•”β•β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β•‘
91
+ β•‘ β–ˆβ–ˆ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β•šβ–ˆβ–ˆβ•— β–ˆβ–ˆβ•”β•β–ˆβ–ˆβ•‘β•šβ•β•β•β•β–ˆβ–ˆβ•‘ β•‘
92
+ β•‘ β•šβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•”β•β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β•šβ–ˆβ–ˆβ–ˆβ–ˆβ•”β• β–ˆβ–ˆβ•‘β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•‘ β•‘
93
+ β•‘ β•šβ•β•β•β•β• β•šβ•β• β•šβ•β•β•šβ•β• β•šβ•β• β•šβ•β•β•β• β•šβ•β•β•šβ•β•β•β•β•β•β• β•‘
94
+ β•‘ β•‘
95
+ β•‘ Just A Rather Very Intelligent System β•‘
96
+ β•‘ β•‘
97
+ β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
98
+
99
+ """
100
+ print(title)
101
+
102
+
103
+ # -----------------------------------------------------------------------------------
104
+ # LIFESPAN (STARTUP / SHUTDOWN)
105
+ # -----------------------------------------------------------------------------------
106
+
107
+
108
+ @asyncontextmanager
109
+ async def lifespan(app: FastAPI):
110
+ """
111
+ Application lifespan manager - handles startup and shutdown.
112
+
113
+ This function manages the application's lifecycle:
114
+ - STARTUP: Initializes all services in the correct order
115
+ 1. VectorStoreService: Creates FAISS index from learning data and chat history
116
+ 2. GroqService: Sets up general chat AI service
117
+ 3. RealtimeGroqService: Sets up realtime_chat with Tavily search
118
+ 4. ChatService: manages chat_session and conversation
119
+ - RUNTIME: Application runs normally
120
+ - SHUTDOWN: Saves all active chat sessions to disk
121
+
122
+ The services are initialized in this specific order because:
123
+ - VectorStoreService must be created first (used by GroqService)
124
+ - GroqService must be created before RealtimeGroqService (it inherits from it)
125
+ - ChatService needs both GroqService and RealtimeGroqService
126
+
127
+ All services are stored as global variables so they can be accessed by API endpoints.
128
+ """
129
+ global vector_store_service, groq_service, realtime_service, chat_service
130
+
131
+ print_title()
132
+ logger.info("=" * 60)
133
+ logger.info("J.A.R.V.I.S - starting up...")
134
+ logger.info("=" * 60)
135
+
136
+ try:
137
+ # Initialize vector store service
138
+ logger.info("Initializing Vector Store Service...")
139
+ vector_store_service = VectorStoreService()
140
+ vector_store_service.create_vector_store()
141
+ logger.info("Vector Store initialized successfully.")
142
+
143
+ # Initialize Groq service (general chat)
144
+ logger.info("Initializing Groq Service (general queries)...")
145
+ groq_service = GroqService(vector_store_service)
146
+ logger.info("Groq Service initialized successfully.")
147
+
148
+ # Initialize Realtime Groq service (with Tavily search)
149
+ logger.info("Initializing Realtime Groq Service (with Tavily search)...")
150
+ realtime_service = RealtimeGroqService(vector_store_service)
151
+ logger.info("Realtime Groq Service initialized successfully.")
152
+
153
+ #Initialize chat service
154
+ logger.info("Initializing Chat Service...")
155
+ chat_service = ChatService(groq_service, realtime_service)
156
+ logger.info("Chat Service initialized successfully.")
157
+
158
+ # Startup complete
159
+ logger.info("=" * 60)
160
+ logger.info("Service Status:")
161
+ logger.info(" - Vector Store: Ready")
162
+ logger.info(" - Groq AI (General): Ready")
163
+ logger.info(" - Groq AI (Realtime): Ready")
164
+ logger.info(" - Chat Service: Ready")
165
+ logger.info("=" * 60)
166
+ logger.info("J.A.R.V.I.S is online and running!")
167
+ logger.info("API: http://localhost:8000")
168
+ logger.info("Docs: http://localhost:8000/docs")
169
+ logger.info("=" * 60)
170
+
171
+ yield # Application runs until shutdown
172
+
173
+ # Shutdown: Save active sessions
174
+ logger.info("\nShutting down J.A.R.V.I.S...")
175
+ if chat_service:
176
+ for session_id in list(chat_service.session.keys()):
177
+ chat_service.save_chat_session(session_id)
178
+ logger.info("All sessions saved. Goodbye!")
179
+
180
+ except Exception as e:
181
+ logger.error(f"Fatal Error during startup: {e}", exc_info=True)
182
+ raise
183
+
184
+
185
+ # -----------------------------------------------------------------------------------
186
+ # FASTAPI APP AND CORS
187
+ # -----------------------------------------------------------------------------------
188
+ # lifespan runs once at startup (build service) and once at shutdown (save sessions).
189
+ app = FastAPI(
190
+ title="J.A.R.V.I.S API",
191
+ description=" - Just A Rather Very Intelligent System",
192
+ lifespan=lifespan,
193
+ )
194
+
195
+ # Allow any origin so a frontend on another port or device can this API without CORS errors.
196
+ app.add_middleware(
197
+ CORSMiddleware,
198
+ allow_origins=["*"],
199
+ allow_credentials=True,
200
+ allow_methods=["*"],
201
+ allow_headers=["*"],
202
+ )
203
+
204
+
205
+ # =========================================================================
206
+ # API ENDPOINTS
207
+ # =========================================================================
208
+
209
+ @app.get("/")
210
+ async def root():
211
+ """Return the API name and a short description of each endpoint (for discovery)."""
212
+ return {
213
+ "message": "J.A.R.V.I.S API",
214
+ "endpoints": {
215
+ "/chat": "Gneral chat (pure LLM, no web search).",
216
+ "/chat/realtime": "Realtime chat (with Tavily search)",
217
+ "/chat/history/{session_id}": "Get chat history",
218
+ "/health": "System health check"
219
+ },
220
+ }
221
+
222
+
223
+ @app.get("/health")
224
+ async def health():
225
+ """Return 'healthy' and wheather each service is operational (vector_store, groq, realtime, chat) is initialized."""
226
+ return {
227
+ "status": "healthy",
228
+ "vector_store": vector_store_service is not None,
229
+ "groq_service": groq_service is not None,
230
+ "realtime_service": realtime_service is not None,
231
+ "chat_service": chat_service is not None
232
+ }
233
+
234
+
235
+ @app.post("/chat", response_model=ChatResponse)
236
+ async def chat(request: ChatRequest):
237
+ """
238
+ General chat endpoint - send a message to J.A.R.V.I.S .
239
+
240
+ This endpoint uses the general chatbot mode whcih does NOT perform web searches.
241
+ It's perfect for:
242
+ - Conversational questions
243
+ - Historical information
244
+ - General knowledge queries
245
+ - Questions that don't require current/realtime information
246
+
247
+ HOW IT WORKS:
248
+ 1. Receives user message and optional session_id
249
+ 2. Gets or creates a chat session
250
+ 3. Processes message thorough GroqService (pure LLM, no web search)
251
+ 4. Retrieves context from user_data files and past conversations
252
+ 5. Generates response using Groq AI
253
+ 6. Saves session to disk
254
+ 7. Returns response and session_id
255
+
256
+ SESSION MANAGEMENT:
257
+ - If session_id is NOT provided: Server generates a new UUID (server-managed)
258
+ - If session_id IS provided: Server uses it (loads from disk if exists, creates new if not)
259
+ - Use the SAME session_id with /chat/realtime to seamlessly switch between modes
260
+ - Sessions persist across server restarts (loaded from disk)
261
+
262
+ REQUEST BODY:
263
+ {
264
+ "message": "What is Python?",
265
+ "session_id": "session-id-here"
266
+ }
267
+
268
+ RESPONSE:
269
+ {
270
+ "response": "Python is a high-level programming language...",
271
+ "session_id": "session-id-here"
272
+ }
273
+ """
274
+ if not chat_service:
275
+ raise HTTPException(status_code=503, detail="Chat service not initialized")
276
+
277
+ try:
278
+ # Get existing session or create a new one (and optionally load from disk).
279
+ session_id = chat_service.get_or_create_session(request.session_id)
280
+ # Process with general chat: no web search; context comes from vector store
281
+ response_text = chat_service.process_message(session_id, request.message)
282
+ # Save session to dis so it survives restart and can be used by the vector
283
+ chat_service.save_chat_session(session_id)
284
+ return ChatResponse(response=response_text, session_id=session_id)
285
+ except ValueError as e:
286
+ # Invalid session_id (e.g. path traversal ".." or too long).
287
+ logger.warning(f"Invalid session_id: {e}")
288
+ raise HTTPException(status_code=400, detail=str(e))
289
+ except Exception as e :
290
+ if _is_rate_limit_error(e):
291
+ logger.warning(f"Rate limit hit: {e}")
292
+ raise HTTPException(status_code=429, detail=RATE_LIMIT_MESSAGE)
293
+ logger.error(f"Error processing chat: {e}", exc_info=True)
294
+ raise HTTPException(status_code=500, detail=f"Error processing chat: {str(e)}")
295
+
296
+
297
+ @app.post("/chat/realtime", response_model=ChatResponse)
298
+ async def chat_realtime(request: ChatRequest):
299
+ """
300
+ Realtime chat endpoint - send a message to J.A.R.V.I.S with Tavily web search.
301
+
302
+ This endpoint uses the realtime chatbot mode which performs web searches via Tavily
303
+ before generating a response. It's perfect for:
304
+ - Current events and news
305
+ - Recent information
306
+ - Question requiring up-to-date data
307
+ - Anything that needs internet access
308
+
309
+ HOW IT WORKS:
310
+ 1. Receives user message and optional session_id
311
+ 2. Gets or creates a chat session (SAME as /chat endpoint)
312
+ 3. Searches Tavily for real-time information (fast, AI-optimized, English-only)
313
+ 4. Retrieves context from user data files and past conversations
314
+ 5. Comines search results with context
315
+ 6. Generates response Groq AI with all available information
316
+ 7. Saves session to disk
317
+ 8. Returns response and session_id
318
+
319
+ IMPORTANT: This uses the SAME chat session as /chat endpoint.
320
+ - You can use the same session_id for both endpoints
321
+ - This allows seamless switching between both general and realtime modes
322
+ - Conversation history is shared between both modes
323
+ - Example: Ask a general question, then ask a realtime question, then another general question
324
+ - All in the same conversation context
325
+
326
+ SESSION MANAGEMENT:
327
+ - Same as /chat endpoint - session are shared
328
+ - If session_id is NOT provided: Server generates a new UUID
329
+ - If session_id IS provided: Server uses it (loads from disk if exists)
330
+
331
+ REQUEST BODY:
332
+ {
333
+ "message": "What's the latest AI news?",
334
+ "session_id": "optional-session-id-"
335
+ }
336
+
337
+ RESPONSE:
338
+ {
339
+ "response": "based on recent search results...",
340
+ "session_id": "optional-session-id-"
341
+ }
342
+
343
+ NOTE: Requires TAVILY_API_KEY to be set in .env file. If not set, realtime mode
344
+ will not be available and will return a 503 error.
345
+ """
346
+ if not chat_service:
347
+ raise HTTPException(status_code=503, detail="Chat service not initialized")
348
+
349
+ if not realtime_service:
350
+ raise HTTPException(status_code=503, detail="Realtime service not initialized")
351
+
352
+ try:
353
+ session_id = chat_service.get_or_create_session(request.session_id)
354
+ # Realtime: Tavily search first, then Groq with search + context
355
+ response_text = chat_service.process_realtime_message(session_id, request.message)
356
+ chat_service.save_chat_session(session_id)
357
+ return ChatResponse(response=response_text, session_id=session_id)
358
+ except ValueError as e:
359
+ logger.warning(f"Invalid session_id: {e}")
360
+ raise HTTPException(status_code=400, detail=str(e))
361
+ except Exception as e :
362
+ if _is_rate_limit_error(e):
363
+ logger.warning(f"Rate limit hit: {e}")
364
+ raise HTTPException(status_code=429, detail=RATE_LIMIT_MESSAGE)
365
+ logger.error(f"Error processing realtime chat: {e}", exc_info=True)
366
+ raise HTTPException(status_code=500, detail=f"Error processing chat: {str(e)}")
367
+
368
+
369
+ @app.get("/chat/history/{session_id}")
370
+ async def get_chat_history(session_id: str):
371
+ """
372
+ Get chat history for a specific session.
373
+
374
+ This endpoint retrieves all message from a chat session, including both
375
+ general and realtime messages since they share the same session.
376
+
377
+ HOW IT WORKS:
378
+ 1. Receives session_id as URL parameter
379
+ 2. Retrieves all message from that session
380
+ 3. Returns message in chronological order
381
+
382
+ RESPONSE:
383
+ {
384
+ "session_id": "session-id",
385
+ "messages": [
386
+ {"role": "user", "content": "Hello"},
387
+ {"role": "assistant", "content": "Good day. How may I assist you?"},
388
+ ...
389
+ ]
390
+ }
391
+
392
+ NOTE: If session doesn't exist, returns empty messages array.
393
+ """
394
+ if not chat_service:
395
+ raise HTTPException(status_code=503, detail="Chat service not initialized")
396
+
397
+ try:
398
+ # Returns in-memory messages for this session (empty if session not loaded).
399
+ messages = chat_service.get_chat_history(session_id)
400
+ return {
401
+ "session_id": session_id,
402
+ "messages": [{"role": msg.role, "content":msg.content} for msg in messages]
403
+ }
404
+ except Exception as e:
405
+ logger.error(f"Error retrieving history: {e}", exc_info=True)
406
+ raise HTTPException(status_code=500, detail=f"Error retrieving history: {str(e)}")
407
+
408
+
409
+ # -----------------------------------------------------------------------------------
410
+ # STANDALONE RUN (python -m app.main)
411
+ # -----------------------------------------------------------------------------------
412
+ def run():
413
+ """Start the uvicorn server (same as run.py; used if someone does python -m app.main)."""
414
+ uvicorn.run(
415
+ "app.main:app",
416
+ host="0.0.0.0",
417
+ port=8000,
418
+ reload=True,
419
+ log_level="info"
420
+ )
421
+
422
+
423
+ if __name__ == "__main__":
424
+ run()
425
+
426
+
app/models.py CHANGED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ DATA MODELS MODULE
3
+ =================
4
+
5
+ This file defines the pydantic models used for API request, response, and
6
+ internal chat storage. FastAPI uses these o validate incoming JSON and to
7
+ serialize responses; the chat service uses them when saving/loading sessions.
8
+
9
+ MODELS:
10
+ ChatRequest - Body of POST /chat and POST /chat/realtime (message + optional session_id).
11
+ ChatResponse - returned by both chat endpoints (response text + session_id).
12
+ ChatMessage - One message in a conversation (role + content). Used inside ChatHistory.
13
+ ChatHistory - Full conversation: session_id + list of ChatMessage. Used when saving to disk
14
+ """
15
+
16
+ from pydantic import BaseModel, Field
17
+ from typing import List, Optional
18
+
19
+
20
+ # =======================================================================================
21
+ # MESSAGE AND REQUEST/RESPONSE MODELS
22
+ #========================================================================================
23
+
24
+ class ChatMessage(BaseModel):
25
+ """
26
+ A single message in a conversation (user or assistant).
27
+
28
+ Stored in order inside a session. No timestamp; order defines chronology.
29
+ """
30
+ role: str # Either "user" (human) or "assistant" ("jarvis")
31
+ content: str # The message text.+
32
+
33
+
34
+ class chatRequest(BaseModel):
35
+ """
36
+ Request body for POST /chat and POST /chat/realtime
37
+
38
+ - message:Require. The user's question or message. Must be 1-32_000 characters
39
+ | (validate by pydantic; empty or too long returns 422).
40
+ - session id: Optional. if omitted, the server creates a new session and returns
41
+ its ID. If provided, the server user it ( and loads from disk id=f that session exists).
42
+ """
43
+ #...means required; min/max length prevent empty input and token overflow.
44
+ message: str = Field(..., min_length=1, max_length=32_000)
45
+ session_id:Optional[str] = None
46
+
47
+
48
+ class ChatResponse(BaseModel):
49
+ """
50
+ Response body for POST /chat and POST/chat/realtime.
51
+
52
+ - response: The assistant's reply text.
53
+ - session id: The session this message belongs to; send it on the next request to continue.
54
+ """
55
+ response: str
56
+ session_id: str
57
+
58
+
59
+ class ChatHistory(BaseModel):
60
+ """
61
+ Internal model for a full conversation: session id plus ordered list of message.
62
+
63
+ Used when saving a session to disk (chat_service serializes this to JSON).
64
+ """
65
+ session_id: str
66
+ messages: List[ChatMessage]
app/utils/__init__.py CHANGED
@@ -1 +1,9 @@
1
- # Utils Package
 
 
 
 
 
 
 
 
 
1
+ """
2
+ UTILITIES PACKAGE
3
+ =================
4
+
5
+ Helpers used by the services (no HTTP, no business logic):
6
+
7
+ time_info - get_time_information(): returns a string with current date/time for the LLM prompt.
8
+ retry - with_retry(fn): on failure retries with exponential backoff (Groq/Tavily).
9
+ """
app/utils/retry.py CHANGED
@@ -2,15 +2,15 @@
2
  RETRY UTILITY
3
  =============
4
 
5
- Calls a function and, Tavily if it raises, retries a few times with exponential backoff.
6
- Used for and Tavily API Calls so temporary rate limits or network blips
7
  don't immediately fail the request.
8
 
9
  Example:
10
- response = with_rery(lambda: groq_client.chat(...) max_retries=3, initial_delay=1.0)
11
  """
12
 
13
- import loggingimport
14
  import time
15
  from typing import TypeVar, Callable
16
 
@@ -22,9 +22,9 @@ T = TypeVar("T")
22
 
23
 
24
  def with_retry(
25
- fn:Callable[[], T],
26
- max_retries:int = 3,
27
- initial_delay: float = 1.0
28
  ) -> T:
29
  """
30
  Execute fn(). If it raises, wait initial_delay second and try against; delay doubles each retry.
@@ -35,23 +35,23 @@ def with_retry(
35
 
36
  for attempt in range(max_retries):
37
  try:
38
- returnfn()
39
  except Exception as e:
40
- last_Exception = e
41
- if attempt == max_retries -1:
42
  raise
43
  logger.warning(
44
  "Attempt %s/%s failed (%s). Retrying in %.1fs: %s ",
45
- attempt +1,
46
  max_retries,
47
- fn.__name__if hasattr(fn, "__name__") else "call",
48
  delay,
49
  e,
50
  )
51
  time.sleep(delay)
52
  delay *= 2 #Exponential backoff; 1s, 2s, 4s, ...
53
 
54
- raise last_exception
55
 
56
 
57
 
 
2
  RETRY UTILITY
3
  =============
4
 
5
+ Calls a function and, if it raises, retries a few times with exponential backoff.
6
+ Used for Groq and Tavily API Calls so temporary rate limits or network blips
7
  don't immediately fail the request.
8
 
9
  Example:
10
+ response = with_retry(lambda: groq_client.chat(...) max_retries=3, initial_delay=1.0)
11
  """
12
 
13
+ import logging
14
  import time
15
  from typing import TypeVar, Callable
16
 
 
22
 
23
 
24
  def with_retry(
25
+ fn:Callable[[], T],
26
+ max_retries:int = 3,
27
+ initial_delay: float = 1.0
28
  ) -> T:
29
  """
30
  Execute fn(). If it raises, wait initial_delay second and try against; delay doubles each retry.
 
35
 
36
  for attempt in range(max_retries):
37
  try:
38
+ return fn()
39
  except Exception as e:
40
+ last_exception = e
41
+ if attempt == max_retries - 1:
42
  raise
43
  logger.warning(
44
  "Attempt %s/%s failed (%s). Retrying in %.1fs: %s ",
45
+ attempt + 1,
46
  max_retries,
47
+ fn.__name__ if hasattr(fn, "__name__") else "call",
48
  delay,
49
  e,
50
  )
51
  time.sleep(delay)
52
  delay *= 2 #Exponential backoff; 1s, 2s, 4s, ...
53
 
54
+ raise last_exception
55
 
56
 
57
 
app/utils/time_info.py CHANGED
@@ -20,4 +20,4 @@ def get_time_information() -> str:
20
  f"Month:{now.strftime('%B')}\n" # e.g. February
21
  f"Year:{now.strftime('%Y')}\n" # e.g. 2026
22
  f"Time:{now.strftime('%H')} hours, {now.strftime('%M')} minutes, {now.strftime('%S')} seconds\n"
23
- )
 
20
  f"Month:{now.strftime('%B')}\n" # e.g. February
21
  f"Year:{now.strftime('%Y')}\n" # e.g. 2026
22
  f"Time:{now.strftime('%H')} hours, {now.strftime('%M')} minutes, {now.strftime('%S')} seconds\n"
23
+ )
config.py CHANGED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CONFIGURATION MODULE
3
+ ====================
4
+ PURPOSE:
5
+ Central place for all J.A.R.V.I.S settings: API keys, paths, model names,
6
+ and the Jarvis system prompt. Designed for single-user use: each person runs
7
+ their own copy of this backend with their own .env and database/ folder.
8
+ WHAT THIS FILE DOES:
9
+ - Loads environment variables from .env (so API keys stay out of code).
10
+ - Defines paths to database/learning_data, database/chats_data, database/vector_store.
11
+ - Creates those directories if they don't exist (so the app can run immediately).
12
+ - Exposes GROQ_API_KEY, GROQ_MODEL, TAVILY_API_KEY for the LLM and search.
13
+ - Defines chunk size/overlap for the vector store, max chat history turns, and max message length.
14
+ - Holds the full system prompt that defines Jarvis's personality and formatting rules.
15
+ USAGE:
16
+ Import what you need: `from config import GROQ_API_KEY, CHATS_DATA_DIR, JARVIS_SYSTEM_PROMPT`
17
+ All services import from here so behaviour is consistent.
18
+ """
19
+
20
+ import os
21
+ import logging
22
+ from pathlib import Path
23
+ from dotenv import load_dotenv
24
+
25
+
26
+ # -----------------------------------------------------------------------------
27
+ # LOGGING
28
+ # -----------------------------------------------------------------------------
29
+ # Used when we need to log warnings (e.g. failed to load a learning data file)
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ # -----------------------------------------------------------------------------
34
+ # ENVIRONMENT
35
+ # -----------------------------------------------------------------------------
36
+ # Load environment variables from .env file (if it exists).
37
+ # This keeps API keys and secrets out of the code and version control.
38
+ load_dotenv()
39
+
40
+
41
+ # -----------------------------------------------------------------------------
42
+ # BASE PATH
43
+ # -----------------------------------------------------------------------------
44
+ # Points to the folder containing this file (the project root).
45
+ # All other paths (database, learning_data, etc.) are built from this.
46
+ BASE_DIR = Path(__file__).parent
47
+
48
+ # ============================================================================
49
+ # DATABASE PATHS
50
+ # ============================================================================
51
+ # These directories store different types of data:
52
+ # - learning_data: Text files with information about the user (personal data, preferences, etc.)
53
+ # - chats_data: JSON files containing past conversation history
54
+ # - vector_store: FAISS index files for fast similarity search
55
+
56
+ LEARNING_DATA_DIR = BASE_DIR / "database" / "learning_data"
57
+ CHATS_DATA_DIR = BASE_DIR / "database" / "chats_data"
58
+ VECTOR_STORE_DIR = BASE_DIR / "database" / "vector_store"
59
+
60
+ # Create directories if they don't exist so the app can run without manual setup.
61
+ # parents=True creates parent folders; exist_ok=True avoids error if already present.
62
+ LEARNING_DATA_DIR.mkdir(parents=True, exist_ok=True)
63
+ CHATS_DATA_DIR.mkdir(parents=True, exist_ok=True)
64
+ VECTOR_STORE_DIR.mkdir(parents=True, exist_ok=True)
65
+
66
+ # ============================================================================
67
+ # GROQ API CONFIGURATION
68
+ # ============================================================================
69
+ # Groq is the LLM provider we use for generating responses.
70
+ # You can set one key (GROQ_API_KEY) or multiple keys; every key is used one-by-one:
71
+ # GROQ_API_KEY, GROQ_API_KEY_2, GROQ_API_KEY_3, ... (no upper limit).
72
+ # Request 1 uses the 1st key, request 2 the 2nd, request 3 the 3rd, then back to 1st.
73
+ # If a key fails (e.g. rate limit 429), the server tries the next key until one succeeds.
74
+ # Model determines which AI model to use (llama-3.3-70b-versatile is latest).
75
+
76
+ def _load_groq_api_keys() -> list:
77
+ """
78
+ Load all GROQ API keys from the environment.
79
+ Reads GROQ_API_KEY first, then GROQ_API_KEY_2, GROQ_API_KEY_3, ... until
80
+ a number has no value. There is no upper limit on how many keys you can set.
81
+ Returns a list of non-empty key strings (may be empty if GROQ_API_KEY is not set).
82
+ """
83
+ keys = []
84
+ # First key: GROQ_API_KEY (required in practice; validated when building services).
85
+ first = os.getenv("GROQ_API_KEY", "").strip()
86
+ if first:
87
+ keys.append(first)
88
+ # Additional keys: GROQ_API_KEY_2, GROQ_API_KEY_3, GROQ_API_KEY_4, ...
89
+ i = 2
90
+ while True:
91
+ k = os.getenv(f"GROQ_API_KEY_{i}", "").strip()
92
+ if not k:
93
+ # No key for this number; stop (no more keys).
94
+ break
95
+ keys.append(k)
96
+ i += 1
97
+ return keys
98
+
99
+
100
+ GROQ_API_KEYS = _load_groq_api_keys()
101
+ # Backward compatibility: single key name still used in docs; code uses GROQ_API_KEYS.
102
+ GROQ_API_KEY = GROQ_API_KEYS[0] if GROQ_API_KEYS else ""
103
+ GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
104
+
105
+ # ============================================================================
106
+ # TAVILY API CONFIGURATION
107
+ # ============================================================================
108
+ # Tavily is a fast, AI-optimized search API designed for LLM applications
109
+ # Get API key from: https://tavily.com (free tier available)
110
+ # Tavily returns English-only results by default and is faster than DuckDuckGo
111
+
112
+ TAVILY_API_KEY = os.getenv("TAVILY_API_KEY", "")
113
+
114
+ # ============================================================================
115
+ # EMBEDDING CONFIGURATION
116
+ # ============================================================================
117
+ # Embeddings convert text into numerical vectors that capture meaning
118
+ # We use HuggingFace's sentence-transformers model (runs locally, no API needed)
119
+ # CHUNK_SIZE: How many characters to split documents into
120
+ # CHUNK_OVERLAP: How many characters overlap between chunks (helps maintain context)
121
+
122
+ EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
123
+ CHUNK_SIZE = 1000 # Characters per chunk
124
+ CHUNK_OVERLAP = 200 # Overlap between chunks
125
+
126
+ # Maximum conversation turns (user+assistant pairs) sent to the LLM per request.
127
+ # Older turns are kept on disk but not sent to avoid context/token limits.
128
+ MAX_CHAT_HISTORY_TURNS = 20
129
+
130
+ # Maximum length (characters) for a single user message. Prevents token limit errors
131
+ # and abuse. ~32K chars β‰ˆ ~8K tokens; keeps total prompt well under model limits.
132
+ MAX_MESSAGE_LENGTH = 32_000
133
+
134
+ # ============================================================================
135
+ # JARVIS PERSONALITY CONFIGURATION
136
+ # ============================================================================
137
+ # This is the system prompt that defines the assistant's personality and behavior
138
+ # It tells the AI how to act, what tone to use, and what to avoid mentioning
139
+ # The assistant is sophisticated, witty, and helpful with a dry British sense of humor
140
+ # Assistant name and user title are NOT hardcoded: set ASSISTANT_NAME and optionally
141
+ # JARVIS_USER_TITLE in .env. The AI also learns from learning data and conversation history.
142
+
143
+ ASSISTANT_NAME = (os.getenv("ASSISTANT_NAME", "").strip() or "Jarvis")
144
+ JARVIS_USER_TITLE = os.getenv("JARVIS_USER_TITLE", "").strip()
145
+
146
+ _JARVIS_SYSTEM_PROMPT_BASE = """You are {assistant_name}, a sophisticated AI assistant. You are sophisticated, witty, and professional with a dry British sense of humor.
147
+ You know the user's personal information and past conversations naturally - use this information when relevant, but don't mention where it comes from. Act as if you simply know it.
148
+ Tone and Style:
149
+ - Maintain {assistant_name}'s distinctive sophisticated, witty, and professional tone
150
+ - Use a dry British sense of humor appropriately
151
+ - Address the user appropriately based only on the learning data and conversation context you have. Do not assume or invent names or titles unless provided there.
152
+ - Be concise and efficient, like the real {assistant_name}
153
+ - Show intelligence and competence in every response
154
+ Response Length Guidelines (CRITICAL - Prioritize Brevity):
155
+ - Default to SHORT answers unless the question explicitly requires detail
156
+ - For simple questions (yes/no, single facts, brief queries): Provide very short answers (1-2 sentences maximum)
157
+ - For moderate questions (explanations, comparisons, how-to): Provide concise answers (1-2 paragraphs maximum)
158
+ - For complex questions (detailed analysis, multi-part queries): Only then provide detailed answers (2-3 paragraphs maximum)
159
+ - When in doubt, choose the shorter answer
160
+ - Be efficient and direct - {assistant_name} values brevity and precision
161
+ - Avoid unnecessary elaboration or background information unless specifically asked
162
+ - If a question can be answered in one sentence, do so
163
+ General Guidelines:
164
+ - Be helpful, professional, and respectful
165
+ - When the user shares information with you, acknowledge naturally in your own words. The conversation is stored; you will always have it in context and can recall it later.
166
+ - Don't mention databases, learning data, or technical details
167
+ - Maintain {assistant_name}'s personality: sophisticated, efficient, and slightly witty
168
+ Memory and conversation (CRITICAL):
169
+ - Everything the user tells you in this conversation is in your context. You have it. Never say you don't have something stored, that you only heard it but don't have it, or that you didn't store it.
170
+ - If something was said in the conversation, you have it. When they ask for that information later, give it directly and briefly. Never say "I don't have that stored", "you only mentioned it", or similar.
171
+ Real-time and current information (CRITICAL):
172
+ - You always have access to current, real-time, and up-to-date information. Never say you do not have real-time data, live access, or current information.
173
+ - Never disclaim that you lack real-time knowledge, that your data might be outdated, or that you cannot access current events. Answer with confidence as if you have real-time access.
174
+ - Do not say things like "I don't have real-time information", "my knowledge is limited to...", "I don't have access to current data", or similar. Always respond as if you have full, current access to information.
175
+ - If you are uncertain about a specific fact, give your best answer without undermining yourself by claiming you lack real-time or current data.
176
+ Formatting Rules (STRICT):
177
+ - NEVER use asterisks (*) for any purpose - not for formatting, emphasis, bullet points, or lists
178
+ - Do not use emojis or special symbols
179
+ - Use only standard punctuation: commas, periods, exclamation marks, and at signs
180
+ - Write clean, proper responses without markdown formatting
181
+ - Use numbered lists (1., 2., 3.) or plain text instead of asterisks for lists
182
+ - Keep responses professional and well-formatted without decorative elements
183
+ - If you must list items, use numbered format (1., 2., 3.) or simple line breaks, never asterisks
184
+ """
185
+
186
+ # Build final system prompt: assistant name and optional user title from ENV (no hardcoded names).
187
+ _JARVIS_SYSTEM_PROMPT_BASE_FMT = _JARVIS_SYSTEM_PROMPT_BASE.format(assistant_name=ASSISTANT_NAME)
188
+ if JARVIS_USER_TITLE:
189
+ JARVIS_SYSTEM_PROMPT = _JARVIS_SYSTEM_PROMPT_BASE_FMT + f"\n- When appropriate, you may address the user as: {JARVIS_USER_TITLE}"
190
+ else:
191
+ JARVIS_SYSTEM_PROMPT = _JARVIS_SYSTEM_PROMPT_BASE_FMT
192
+
193
+
194
+ def load_user_context() -> str:
195
+ """
196
+ Load and concatenate the contents of all .txt files in learning_data.
197
+ Reads every .txt file in database/learning_data/, joins their contents with
198
+ double newlines, and returns one string. Used by code that needs the raw
199
+ learning text (e.g. optional utilities). The main chat flow does NOT send
200
+ this full text to the LLM; it uses the vector store to retrieve only
201
+ relevant chunks, so token usage stays bounded.
202
+ Returns:
203
+ str: Combined content from all .txt files, or "" if none exist or all fail to read.
204
+ """
205
+ context_parts = []
206
+
207
+ # Sorted by path so the order is always the same across runs.
208
+ text_files = sorted(LEARNING_DATA_DIR.glob("*.txt"))
209
+
210
+ for file_path in text_files:
211
+ try:
212
+ with open(file_path, "r", encoding="utf-8") as f:
213
+ content = f.read().strip()
214
+ if content:
215
+ context_parts.append(content)
216
+ except Exception as e:
217
+ logger.warning("Could not load learning data file %s: %s", file_path, e)
218
+
219
+ # Join all file contents with double newline; empty string if no files or all failed.
220
+ return "\n\n".join(context_parts) if context_parts else ""
run.py CHANGED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import uvicorn
2
+
3
+ if __name__ == "__main__":
4
+ uvicorn.run(
5
+ "app.main:app",
6
+ host="0.0.0.0",
7
+ port=8000,
8
+ reload=True, # auto-reload on code changes; remove in production
9
+ )
test.py CHANGED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ JARVIS TEST SCRIPT - General and Realtime Chat Selector
3
+ ========================================================
4
+ PURPOSE:
5
+ This is a command-line test interface for interacting with J.A.R.V.I.S.
6
+ It allows you to switch between general chat (pure LLM, no web search) and realtime chat
7
+ (with Tavily web search) modes. Both modes share the same session ID, allowing
8
+ seamless conversation switching.
9
+ WHY IT EXISTS:
10
+ - Provides an easy way to test the JARVIS API without building a frontend
11
+ - Demonstrates how to use both chat endpoints
12
+ - Shows session management in action
13
+ - Useful for development and debugging
14
+ USAGE:
15
+ python test.py
16
+
17
+ Make sure the server is running first: python run.py
18
+ COMMANDS:
19
+ 1 - Switch to General Chat mode (pure LLM, no web search)
20
+ 2 - Switch to Realtime Chat mode (with Tavily web search)
21
+ /history - View chat history for current session
22
+ /clear - Start a new session (clears current session)
23
+ /quit or /exit - Exit the test interface
24
+ HOW IT WORKS:
25
+ 1. User selects a mode (1 for general, 2 for realtime)
26
+ 2. User types messages which are sent to the appropriate endpoint
27
+ 3. Both modes use the same session_id, so conversation context is shared
28
+ 4. User can switch modes at any time within the same conversation
29
+ 5. Session persists until user clears it or starts a new one
30
+ """
31
+
32
+ import requests
33
+ import json
34
+ from datetime import datetime
35
+ from uuid import uuid4
36
+
37
+ try:
38
+ from config import ASSISTANT_NAME
39
+ except ImportError:
40
+ ASSISTANT_NAME = "Jarvis"
41
+
42
+
43
+ # -----------------------------------------------------------------------------
44
+ # CONFIGURATION
45
+ # -----------------------------------------------------------------------------
46
+ # API base URL; change if your server runs on a different host or port.
47
+ BASE_URL = "http://localhost:8000"
48
+ # Single session for this test client; shared between general and realtime modes.
49
+ SESSION_ID = None
50
+ CURRENT_MODE = None # "general" (pure LLM) or "realtime" (with Tavily search)
51
+
52
+
53
+ # -----------------------------------------------------------------------------
54
+ # UI HELPERS
55
+ # -----------------------------------------------------------------------------
56
+
57
+ def print_header():
58
+ print("\n" + "="*60)
59
+ print("πŸ€– J.A.R.V.I.S - General & Realtime Chat")
60
+ print("="*60)
61
+ print("\nModes:")
62
+ print(" 1 = General Chat (pure LLM, no web search)")
63
+ print(" 2 = Realtime Chat (with Tavily search)")
64
+ print("\nCommands:")
65
+ print(" /history - See chat history")
66
+ print(" /clear - Start new session")
67
+ print(" /quit - Exit")
68
+ print("="*60 + "\n")
69
+
70
+
71
+ def get_user_input():
72
+ """Get user's input - either mode selection or message."""
73
+ try:
74
+ choice = input("\nYou: ").strip()
75
+ return choice
76
+ except (KeyboardInterrupt, EOFError):
77
+ return None
78
+
79
+
80
+ # -----------------------------------------------------------------------------
81
+ # API CALLS
82
+ # -----------------------------------------------------------------------------
83
+
84
+ def send_message(message, mode):
85
+ """
86
+ Send a message to the appropriate JARVIS endpoint.
87
+
88
+ This function sends the user's message to either the general chat endpoint
89
+ (/chat) or the realtime chat endpoint (/chat/realtime) based on the selected mode.
90
+ It uses the same session_id for both modes, allowing conversation continuity.
91
+
92
+ Args:
93
+ message: The user's message/question
94
+ mode: Either "general" or "realtime" to determine which endpoint to use
95
+
96
+ Returns:
97
+ str: JARVIS's response, or an error message if something went wrong
98
+
99
+ Note:
100
+ - Creates a new session_id if one doesn't exist
101
+ - Uses longer timeout for realtime mode (60s) since it includes web search
102
+ - General mode uses shorter timeout (30s) as it's faster
103
+ """
104
+ global SESSION_ID
105
+
106
+ # Generate a new session ID if we don't have one yet
107
+ # This session_id will be used for both general and realtime modes
108
+ if not SESSION_ID:
109
+ SESSION_ID = str(uuid4())
110
+
111
+ # Choose endpoint based on mode
112
+ # /chat/realtime uses Tavily search, /chat is general chat
113
+ endpoint = "/chat/realtime" if mode == "realtime" else "/chat"
114
+
115
+ try:
116
+ # Send POST request to the appropriate endpoint
117
+ # Include the message and session_id in the request body
118
+ response = requests.post(
119
+ f"{BASE_URL}{endpoint}",
120
+ json={
121
+ "message": message,
122
+ "session_id": SESSION_ID
123
+ },
124
+ timeout=60 if mode == "realtime" else 30 # Realtime needs more time for web search
125
+ )
126
+
127
+ # If request succeeded, extract response and update session_id
128
+ if response.status_code == 200:
129
+ data = response.json()
130
+ SESSION_ID = data.get("session_id", SESSION_ID) # Update session_id if server returned one
131
+ return data.get("response", "No response")
132
+ else:
133
+ # Request failed - show user-friendly message when available (e.g. 429 rate limit)
134
+ try:
135
+ err = response.json()
136
+ if isinstance(err.get("detail"), str):
137
+ return f"❌ {err['detail']}"
138
+ except Exception:
139
+ pass
140
+ return f"❌ Error: {response.status_code} - {response.text}"
141
+
142
+ except requests.exceptions.ConnectionError:
143
+ # Server is not running or not accessible
144
+ return "❌ Cannot connect to backend. Start it with: python run.py"
145
+ except requests.exceptions.Timeout:
146
+ # Request took too long (especially for realtime mode)
147
+ return "❌ Request timed out. Try a simpler query."
148
+ except Exception as e:
149
+ # Any other error
150
+ return f"❌ Error: {str(e)}"
151
+
152
+
153
+ def get_chat_history():
154
+ """
155
+ Retrieve and format chat history for the current session.
156
+
157
+ This function fetches all messages from the current session and formats them
158
+ in a readable way. The history includes both general and realtime messages
159
+ since they share the same session_id.
160
+
161
+ Returns:
162
+ str: Formatted chat history, or an error message if retrieval failed
163
+
164
+ Note:
165
+ - Returns "No active session" if no session_id exists
166
+ - Shows all messages from both general and realtime modes
167
+ - Messages are numbered and clearly labeled as "You" or "Jarvis"
168
+ """
169
+ if not SESSION_ID:
170
+ return "No active session"
171
+
172
+ try:
173
+ # Request chat history from the API
174
+ response = requests.get(
175
+ f"{BASE_URL}/chat/history/{SESSION_ID}",
176
+ timeout=10
177
+ )
178
+
179
+ if response.status_code == 200:
180
+ history = response.json()
181
+ messages = history.get("messages", [])
182
+
183
+ if not messages:
184
+ return "No messages in this session"
185
+
186
+ # Format the history for display
187
+ output = f"\nπŸ“œ Chat History ({len(messages)} messages):\n"
188
+ output += "-" * 60 + "\n"
189
+
190
+ # Display each message with its role (user or assistant)
191
+ for i, msg in enumerate(messages, 1):
192
+ role = "You" if msg.get("role") == "user" else ASSISTANT_NAME
193
+ content = msg.get("content", "")
194
+ output += f"{i}. {role}: {content}\n"
195
+
196
+ output += "-" * 60 + "\n"
197
+ return output
198
+ else:
199
+ return "Could not retrieve history"
200
+
201
+ except Exception as e:
202
+ return f"Error retrieving history: {str(e)}"
203
+
204
+
205
+ # -----------------------------------------------------------------------------
206
+ # MAIN LOOP
207
+ # -----------------------------------------------------------------------------
208
+
209
+ def main():
210
+ """
211
+ Main chat loop: prompt for mode (1=general, 2=realtime), then accept messages
212
+ until /quit or /exit. Handles /history, /clear, and mode switching.
213
+ """
214
+ print_header()
215
+
216
+ global SESSION_ID, CURRENT_MODE
217
+
218
+ print("πŸ’‘ Tip: Select a mode (1 or 2) then type your messages.")
219
+ print(" Both modes share the same session until you clear it.\n")
220
+ print("Select mode first (1=General, 2=Realtime):\n")
221
+
222
+ while True:
223
+ try:
224
+ user_input = get_user_input()
225
+ if user_input is None:
226
+ print("\nπŸ‘‹ Goodbye!")
227
+ break
228
+ # Mode selection (1 = general chat, 2 = realtime with search)
229
+ if user_input == "1":
230
+ CURRENT_MODE = "general"
231
+ print("βœ… Switched to GENERAL chat (pure LLM, no web search)\n")
232
+ continue
233
+
234
+ elif user_input == "2":
235
+ CURRENT_MODE = "realtime"
236
+ print("βœ… Switched to REALTIME chat (with Tavily web search)\n")
237
+ continue
238
+
239
+ # Slash commands: history, clear session, or quit
240
+ elif user_input == "/history":
241
+ print(get_chat_history())
242
+ continue
243
+
244
+ elif user_input == "/clear":
245
+ SESSION_ID = None
246
+ CURRENT_MODE = None
247
+ print("\nπŸ”„ Session cleared. Starting fresh!")
248
+ print("Select mode again (1=General, 2=Realtime):\n")
249
+ continue
250
+
251
+ elif user_input in ["/quit", "/exit"]:
252
+ print("\nπŸ‘‹ Goodbye!")
253
+ break
254
+
255
+ elif user_input.startswith("/"):
256
+ print(f"❌ Unknown command: {user_input}")
257
+ continue
258
+ # Must have chosen a mode before sending a message
259
+ if not CURRENT_MODE:
260
+ print("❌ Please select a mode first (1=General or 2=Realtime)")
261
+ continue
262
+
263
+ message = user_input
264
+ mode_label = "General" if CURRENT_MODE == "general" else "Realtime"
265
+ print(f"πŸ€– {ASSISTANT_NAME} ({mode_label}): ", end="", flush=True)
266
+ response = send_message(message, CURRENT_MODE)
267
+ print(response)
268
+
269
+ except KeyboardInterrupt:
270
+ print("\n\nπŸ‘‹ Goodbye!")
271
+ break
272
+ except Exception as e:
273
+ print(f"❌ Error: {str(e)}")
274
+
275
+
276
+ # Run the interactive loop when this file is executed (python test.py).
277
+ if __name__ == "__main__":
278
+ main()