Spaces:

coder-vansh
/

JARVIS

Running

App Files Files Community

coder-vansh commited on Feb 14

Commit

24da4a9

1 Parent(s): b49c393

All files are completed.

Browse files

Files changed (8) hide show

app/main.py +426 -0
app/models.py +66 -0
app/utils/__init__.py +9 -1
app/utils/retry.py +13 -13
app/utils/time_info.py +1 -1
config.py +220 -0
run.py +9 -0
test.py +278 -0

app/main.py CHANGED Viewed

	@@ -0,0 +1,426 @@

+"""
+MAIN MODULE
+============================
+This module defines the FastAPI application and all HTTP endpoints. It is
+designed for single-user use: one person runs one server (e.g. python run.py)
+and uses it as their personal J.A.R.V.I.S backend. Many people can each run
+their own copy of this code on their own machine.
+ENDPOINTS:
+  GET  /                    - Returns API name list of endpoints.
+  GET  /health              - Returns status of all services  (for monitoring).
+  POST /chat                - General chat: pure LLM, no web search. Uses learning data
+                              and pasts via vector-store retrieval only.
+  POST /chat/realtime       - Realtime chat: runs a Tavily web search first, then
+                              sends results + context to Groq. Same session as /chat.
+  GET  /chat/history/{id}   - Returns all messages for a session (general + realtime).
+SESSION:
+  Both /chat and /chat/realtime use the same session_id. If you omit session_id,
+  the server generates a UUID and returns it; send it back on the next request
+  to continue the conversation. Sessions are saved to disk and survive restarts.
+STARTUP:
+  On startup, the lifespan function builds the vector store from learning_data/*.txt
+  and chats_data/*.json, then creates Groq, Realtime, and Chat services. On shutdown,
+  it saves all in memory sessions to disk.
+"""
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from contextlib import asynccontextmanager
+import uvicorn
+import logging
+from app.models import ChatRequest, ChatResponse
+# User-friendly message when Groq rate limit (daily token quota) is exceeded.
+RATE_LIMIT_MESSAGE = (
+    "You've reached your daily API limit for this assistant. "
+    "Your credits will reset in a few hours, or you can upgrade your plan for more."
+    "Please try again later"
+)
+def _is_rate_limit_error(exc: Exception) -> bool:
+    """true if the exception is a Groq rate limit (4299 / tokens per day)."""
+    msg = str(exc).lower()
+    return "429" in str(exc) or "rate limit" in msg or "tokens per day" in msg
+from app.services.vector_store import VectorStoreService
+from app.services.groq_service import GroqService
+from app.services.realtime_service import RealtimeGroqService
+from app.services.chat_service import ChatService
+from config import VECTOR_STORE_DIR
+from langchain_community.vectorstores import FAISS
+# ----------------------------------------------------------------------------
+# LOGGING
+# ----------------------------------------------------------------------------
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s | %(levelname)-8s | %(name)-20s | %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
+logger = logging.getLogger("J.A.R.V.I.S")
+# ---------------------------------------------------------------------------
+# GLOBAL SERVICE REFERENCES
+# ---------------------------------------------------------------------------
+# Set during startup (lifespan) and used by all route handlers.
+# Stored as globals so async endpoints can access the same service instances.
+vector_store_service: VectorStoreService = None
+groq_service: GroqService = None
+realtime_service: RealtimeGroqService = None
+chat_service: ChatService = None
+def print_title():
+    """Print he J.A.R.V.I.S ASCII art banner to the console when the server starts"""
+    title = """
+   ╔══════════════════════════════════════════════════════════╗
+   ║                                                          ║
+   ║         ██╗ █████╗ ██████╗ ██╗   ██╗██╗███████╗          ║
+   ║         ██║██╔══██╗██╔══██╗██║   ██║██║██╔════╝          ║
+   ║         ██║███████║██████╔╝██║   ██║██║███████╗          ║
+   ║    ██   ██║██╔══██║██╔══██╗╚██╗ ██╔╝██║╚════██║          ║
+   ║    ╚█████╔╝██║  ██║██║  ██║ ╚████╔╝ ██║███████║          ║
+   ║     ╚════╝ ╚═╝  ╚═╝╚═╝  ╚═╝  ╚═══╝  ╚═╝╚══════╝          ║
+   ║                                                          ║
+   ║          Just A Rather Very Intelligent System           ║
+   ║                                                          ║
+   ╚══════════════════════════════════════════════════════════╝
+    """
+    print(title)
+# -----------------------------------------------------------------------------------
+# LIFESPAN (STARTUP / SHUTDOWN)
+# -----------------------------------------------------------------------------------
+@asyncontextmanager
+async def lifespan(app: FastAPI):
+    """
+    Application lifespan manager - handles startup and shutdown.
+    This function manages the application's lifecycle:
+    - STARTUP: Initializes all services in the correct order
+      1. VectorStoreService: Creates FAISS index from learning data and chat history
+      2. GroqService: Sets up general chat AI service
+      3. RealtimeGroqService: Sets up realtime_chat with Tavily search
+      4. ChatService: manages chat_session and conversation
+    - RUNTIME: Application runs normally
+    - SHUTDOWN: Saves all active chat sessions to disk
+    The services are initialized in this specific order because:
+    - VectorStoreService must be created first (used by GroqService)
+    - GroqService must be created before RealtimeGroqService (it inherits from it)
+    - ChatService needs both GroqService and RealtimeGroqService
+    All services are stored as global variables so they can be accessed by API endpoints.
+    """
+    global vector_store_service, groq_service, realtime_service, chat_service
+    print_title()
+    logger.info("=" * 60)
+    logger.info("J.A.R.V.I.S - starting up...")
+    logger.info("=" * 60)
+    try:
+        # Initialize vector store service
+        logger.info("Initializing Vector Store Service...")
+        vector_store_service = VectorStoreService()
+        vector_store_service.create_vector_store()
+        logger.info("Vector Store initialized successfully.")
+        # Initialize Groq service (general chat)
+        logger.info("Initializing Groq Service (general queries)...")
+        groq_service = GroqService(vector_store_service)
+        logger.info("Groq Service initialized successfully.")
+        # Initialize Realtime Groq service (with Tavily search)
+        logger.info("Initializing Realtime Groq Service (with Tavily search)...")
+        realtime_service = RealtimeGroqService(vector_store_service)
+        logger.info("Realtime Groq Service initialized successfully.")
+        #Initialize chat service
+        logger.info("Initializing Chat Service...")
+        chat_service = ChatService(groq_service, realtime_service)
+        logger.info("Chat Service initialized successfully.")
+        # Startup complete
+        logger.info("=" * 60)
+        logger.info("Service Status:")
+        logger.info("  - Vector Store: Ready")
+        logger.info("  - Groq AI (General): Ready")
+        logger.info("  - Groq AI (Realtime): Ready")
+        logger.info("  - Chat Service: Ready")
+        logger.info("=" * 60)
+        logger.info("J.A.R.V.I.S is online and running!")
+        logger.info("API: http://localhost:8000")
+        logger.info("Docs: http://localhost:8000/docs")
+        logger.info("=" * 60)
+        yield  # Application runs until shutdown
+        # Shutdown: Save active sessions
+        logger.info("\nShutting down J.A.R.V.I.S...")
+        if chat_service:
+            for session_id in list(chat_service.session.keys()):
+                chat_service.save_chat_session(session_id)
+        logger.info("All sessions saved. Goodbye!")
+    except Exception as e:
+        logger.error(f"Fatal Error during startup: {e}", exc_info=True)
+        raise
+# -----------------------------------------------------------------------------------
+# FASTAPI APP AND CORS
+# -----------------------------------------------------------------------------------
+# lifespan runs once at startup (build service) and once at shutdown (save sessions).
+app = FastAPI(
+    title="J.A.R.V.I.S API",
+    description=" - Just A Rather Very Intelligent System",
+    lifespan=lifespan,
+)
+# Allow any origin so a frontend on another port or device can this API without CORS errors.
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# =========================================================================
+# API ENDPOINTS
+# =========================================================================
+@app.get("/")
+async def root():
+    """Return the API name and a short description of each endpoint (for discovery)."""
+    return {
+        "message": "J.A.R.V.I.S API",
+        "endpoints": {
+           "/chat": "Gneral chat (pure LLM, no web search).",
+           "/chat/realtime": "Realtime chat (with Tavily search)",
+           "/chat/history/{session_id}": "Get chat history",
+           "/health": "System health check"
+        },
+    }
+@app.get("/health")
+async def health():
+    """Return 'healthy' and wheather each service is operational (vector_store, groq, realtime, chat) is initialized."""
+    return {
+        "status": "healthy",
+        "vector_store": vector_store_service is not None,
+        "groq_service": groq_service is not None,
+        "realtime_service": realtime_service is not None,
+        "chat_service": chat_service is not None
+    }
+@app.post("/chat", response_model=ChatResponse)
+async def chat(request: ChatRequest):
+    """
+    General chat endpoint - send a message to J.A.R.V.I.S .
+    This endpoint uses the general chatbot mode whcih does NOT perform web searches.
+    It's perfect for:
+    - Conversational questions
+    - Historical information
+    - General knowledge queries
+    - Questions that don't require current/realtime information
+    HOW IT WORKS:
+    1. Receives user message and optional session_id
+    2. Gets or creates a chat session
+    3. Processes message thorough GroqService (pure LLM, no web search)
+    4. Retrieves context from user_data files and past conversations
+    5. Generates response using Groq AI
+    6. Saves session to disk
+    7. Returns response and session_id
+    SESSION MANAGEMENT:
+    - If session_id is NOT provided: Server generates a new UUID (server-managed)
+    - If session_id IS provided: Server uses it (loads from disk if exists, creates new if not)
+    - Use the SAME session_id with /chat/realtime to seamlessly switch between modes
+    - Sessions persist across server restarts (loaded from disk)
+    REQUEST BODY:
+    {
+        "message": "What is Python?",
+        "session_id": "session-id-here"
+    }
+    RESPONSE:
+    {
+        "response": "Python is a high-level programming language...",
+        "session_id": "session-id-here"
+    }
+    """
+    if not chat_service:
+        raise HTTPException(status_code=503, detail="Chat service not initialized")
+    try:
+        # Get existing session or create a new one (and optionally load from disk).
+        session_id = chat_service.get_or_create_session(request.session_id)
+        # Process with general chat: no web search; context comes from vector store
+        response_text = chat_service.process_message(session_id, request.message)
+        # Save session to dis so it survives restart and can be used by the vector
+        chat_service.save_chat_session(session_id)
+        return ChatResponse(response=response_text, session_id=session_id)
+    except ValueError as e:
+        # Invalid session_id (e.g. path traversal ".." or too long).
+        logger.warning(f"Invalid session_id: {e}")
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e :
+        if _is_rate_limit_error(e):
+            logger.warning(f"Rate limit hit: {e}")
+            raise HTTPException(status_code=429, detail=RATE_LIMIT_MESSAGE)
+        logger.error(f"Error processing chat: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Error processing chat: {str(e)}")
+@app.post("/chat/realtime", response_model=ChatResponse)
+async def chat_realtime(request: ChatRequest):
+    """
+    Realtime chat endpoint - send a message to J.A.R.V.I.S with Tavily web search.
+    This endpoint uses the realtime chatbot mode which performs web searches via Tavily
+    before generating a response. It's perfect for:
+    - Current events and news
+    - Recent information
+    - Question requiring up-to-date data
+    - Anything that needs internet access
+    HOW IT WORKS:
+    1. Receives user message and optional session_id
+    2. Gets or creates a chat session (SAME as /chat endpoint)
+    3. Searches Tavily for real-time information (fast, AI-optimized, English-only)
+    4. Retrieves context from user data files and past conversations
+    5. Comines search results with context
+    6. Generates response Groq AI with all available information
+    7. Saves session to disk
+    8. Returns response and session_id
+    IMPORTANT: This uses the SAME chat session as /chat endpoint.
+    - You can use the same session_id for both endpoints
+    - This allows seamless switching between both general and realtime modes
+    - Conversation history is shared between both modes
+    - Example: Ask a general question, then ask a realtime question, then another general question
+      - All in the same conversation context
+    SESSION MANAGEMENT:
+      - Same as /chat endpoint - session are shared
+      - If session_id is NOT provided: Server generates a new UUID
+      - If session_id IS provided: Server uses it (loads from disk if exists)
+    REQUEST BODY:
+    {
+        "message": "What's the latest AI news?",
+        "session_id": "optional-session-id-"
+    }
+    RESPONSE:
+    {
+        "response": "based on recent search results...",
+        "session_id": "optional-session-id-"
+    }
+    NOTE: Requires TAVILY_API_KEY to be set in .env file. If not set, realtime mode
+    will not be available and will return a 503 error.
+    """
+    if not chat_service:
+        raise HTTPException(status_code=503, detail="Chat service not initialized")
+    if not realtime_service:
+        raise HTTPException(status_code=503, detail="Realtime service not initialized")
+    try:
+        session_id = chat_service.get_or_create_session(request.session_id)
+        # Realtime: Tavily search first, then Groq with search + context
+        response_text = chat_service.process_realtime_message(session_id, request.message)
+        chat_service.save_chat_session(session_id)
+        return ChatResponse(response=response_text, session_id=session_id)
+    except ValueError as e:
+        logger.warning(f"Invalid session_id: {e}")
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e :
+        if _is_rate_limit_error(e):
+            logger.warning(f"Rate limit hit: {e}")
+            raise HTTPException(status_code=429, detail=RATE_LIMIT_MESSAGE)
+        logger.error(f"Error processing realtime chat: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Error processing chat: {str(e)}")
+@app.get("/chat/history/{session_id}")
+async def get_chat_history(session_id: str):
+    """
+    Get chat history for a specific session.
+    This endpoint retrieves all message from a chat session, including both
+    general and realtime messages since they share the same session.
+    HOW IT WORKS:
+    1. Receives session_id as URL parameter
+    2. Retrieves all message from that session
+    3. Returns message in chronological order
+    RESPONSE:
+    {
+        "session_id": "session-id",
+        "messages": [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Good day. How may I assist you?"},
+            ...
+        ]
+    }
+    NOTE: If session doesn't exist, returns empty messages array.
+    """
+    if not chat_service:
+        raise HTTPException(status_code=503, detail="Chat service not initialized")
+    try:
+        # Returns in-memory messages for this session (empty if session not loaded).
+        messages = chat_service.get_chat_history(session_id)
+        return {
+            "session_id": session_id,
+            "messages": [{"role": msg.role, "content":msg.content} for msg in messages]
+        }
+    except Exception as e:
+        logger.error(f"Error retrieving history: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Error retrieving history: {str(e)}")
+# -----------------------------------------------------------------------------------
+# STANDALONE RUN (python -m app.main)
+# -----------------------------------------------------------------------------------
+def run():
+    """Start the uvicorn server (same as run.py; used if someone does python -m app.main)."""
+    uvicorn.run(
+        "app.main:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True,
+        log_level="info"
+    )
+if __name__ == "__main__":
+    run()

app/models.py CHANGED Viewed

	@@ -0,0 +1,66 @@

+"""
+DATA MODELS MODULE
+=================
+This file defines the pydantic models used for API request, response, and
+internal chat storage. FastAPI uses these o validate incoming JSON and to
+serialize responses; the chat service uses them when saving/loading sessions.
+MODELS:
+   ChatRequest     - Body of POST /chat and POST /chat/realtime (message + optional session_id).
+   ChatResponse    - returned by both chat endpoints (response text + session_id).
+   ChatMessage     - One message in a conversation (role + content). Used inside ChatHistory.
+   ChatHistory     - Full conversation: session_id + list of  ChatMessage. Used when saving to disk
+"""
+from pydantic import BaseModel, Field
+from typing import List, Optional
+# =======================================================================================
+# MESSAGE AND REQUEST/RESPONSE MODELS
+#========================================================================================
+class ChatMessage(BaseModel):
+    """
+    A single message in a conversation (user or assistant).
+    Stored in order inside a session. No timestamp; order defines chronology.
+    """
+    role: str    # Either "user" (human) or "assistant" ("jarvis")
+    content: str  # The message text.+
+class chatRequest(BaseModel):
+    """
+    Request body for POST /chat and POST /chat/realtime
+     - message:Require. The user's question or message. Must be 1-32_000 characters
+     |  (validate by pydantic; empty or too long returns 422).
+     - session id: Optional. if omitted, the server creates a new session and returns
+     its ID. If provided, the server user it ( and loads from disk id=f that session exists).
+     """
+    #...means required; min/max length prevent empty input and token overflow.
+    message: str = Field(..., min_length=1, max_length=32_000)
+    session_id:Optional[str] = None
+class ChatResponse(BaseModel):
+    """
+    Response body for POST /chat and POST/chat/realtime.
+    - response: The assistant's reply text.
+    - session id: The session this message belongs to; send it on the next request to continue.
+    """
+    response: str
+    session_id: str
+class ChatHistory(BaseModel):
+    """
+    Internal model for a full conversation: session id plus ordered list of message.
+    Used when saving a session to disk (chat_service serializes this to JSON).
+    """
+    session_id: str
+    messages: List[ChatMessage]

app/utils/__init__.py CHANGED Viewed

	@@ -1 +1,9 @@
1	- ~~# Utils Package~~

+"""
+UTILITIES PACKAGE
+=================
+Helpers used by the services (no HTTP, no business logic):
+   time_info  - get_time_information(): returns a string with current date/time for the LLM prompt.
+   retry      - with_retry(fn): on failure retries with exponential backoff (Groq/Tavily).
+"""

app/utils/retry.py CHANGED Viewed

@@ -2,15 +2,15 @@
 RETRY UTILITY
 =============
-Calls a function and, Tavily if it raises, retries a few times with exponential backoff.
-Used for and Tavily API Calls so temporary rate limits or network blips
 don't immediately fail the request.
 Example:
- response = with_rery(lambda: groq_client.chat(...) max_retries=3, initial_delay=1.0)
 """
-import loggingimport
 import time
 from typing import TypeVar, Callable
@@ -22,9 +22,9 @@ T = TypeVar("T")
 def with_retry(
-        fn:Callable[[], T],
-        max_retries:int = 3,
-        initial_delay: float = 1.0
 ) -> T:
     """
     Execute fn(). If it raises, wait initial_delay second and try against; delay doubles each retry.
@@ -35,23 +35,23 @@ def with_retry(
     for attempt in range(max_retries):
         try:
-            returnfn()
         except Exception as e:
-            last_Exception = e
-            if attempt == max_retries -1:
                 raise
             logger.warning(
                 "Attempt %s/%s failed (%s). Retrying in %.1fs: %s ",
-                attempt +1,
                 max_retries,
-                fn.__name__if hasattr(fn, "__name__") else "call",
                 delay,
                 e,
             )
             time.sleep(delay)
             delay *= 2 #Exponential backoff; 1s, 2s, 4s, ...
-            raise last_exception

 RETRY UTILITY
 =============
+Calls a function and, if it raises, retries a few times with exponential backoff.
+Used for Groq and Tavily API Calls so temporary rate limits or network blips
 don't immediately fail the request.
 Example:
+ response = with_retry(lambda: groq_client.chat(...) max_retries=3, initial_delay=1.0)
 """
+import logging
 import time
 from typing import TypeVar, Callable
 def with_retry(
+    fn:Callable[[], T],
+    max_retries:int = 3,
+    initial_delay: float = 1.0
 ) -> T:
     """
     Execute fn(). If it raises, wait initial_delay second and try against; delay doubles each retry.
     for attempt in range(max_retries):
         try:
+            return fn()
         except Exception as e:
+            last_exception = e
+            if attempt == max_retries - 1:
                 raise
             logger.warning(
                 "Attempt %s/%s failed (%s). Retrying in %.1fs: %s ",
+                attempt + 1,
                 max_retries,
+                fn.__name__ if hasattr(fn, "__name__") else "call",
                 delay,
                 e,
             )
             time.sleep(delay)
             delay *= 2 #Exponential backoff; 1s, 2s, 4s, ...
+    raise last_exception

app/utils/time_info.py CHANGED Viewed

@@ -20,4 +20,4 @@ def get_time_information() -> str:
         f"Month:{now.strftime('%B')}\n"       # e.g. February
         f"Year:{now.strftime('%Y')}\n"        # e.g. 2026
         f"Time:{now.strftime('%H')} hours,  {now.strftime('%M')} minutes, {now.strftime('%S')} seconds\n"
-    )

         f"Month:{now.strftime('%B')}\n"       # e.g. February
         f"Year:{now.strftime('%Y')}\n"        # e.g. 2026
         f"Time:{now.strftime('%H')} hours,  {now.strftime('%M')} minutes, {now.strftime('%S')} seconds\n"
+    )

config.py CHANGED Viewed

	@@ -0,0 +1,220 @@

+"""
+CONFIGURATION MODULE
+====================
+PURPOSE:
+  Central place for all J.A.R.V.I.S settings: API keys, paths, model names,
+  and the Jarvis system prompt. Designed for single-user use: each person runs
+  their own copy of this backend with their own .env and database/ folder.
+WHAT THIS FILE DOES:
+  - Loads environment variables from .env (so API keys stay out of code).
+  - Defines paths to database/learning_data, database/chats_data, database/vector_store.
+  - Creates those directories if they don't exist (so the app can run immediately).
+  - Exposes GROQ_API_KEY, GROQ_MODEL, TAVILY_API_KEY for the LLM and search.
+  - Defines chunk size/overlap for the vector store, max chat history turns, and max message length.
+  - Holds the full system prompt that defines Jarvis's personality and formatting rules.
+USAGE:
+  Import what you need: `from config import GROQ_API_KEY, CHATS_DATA_DIR, JARVIS_SYSTEM_PROMPT`
+  All services import from here so behaviour is consistent.
+"""
+import os
+import logging
+from pathlib import Path
+from dotenv import load_dotenv
+# -----------------------------------------------------------------------------
+# LOGGING
+# -----------------------------------------------------------------------------
+# Used when we need to log warnings (e.g. failed to load a learning data file)
+logger = logging.getLogger(__name__)
+# -----------------------------------------------------------------------------
+# ENVIRONMENT
+# -----------------------------------------------------------------------------
+# Load environment variables from .env file (if it exists).
+# This keeps API keys and secrets out of the code and version control.
+load_dotenv()
+# -----------------------------------------------------------------------------
+# BASE PATH
+# -----------------------------------------------------------------------------
+# Points to the folder containing this file (the project root).
+# All other paths (database, learning_data, etc.) are built from this.
+BASE_DIR = Path(__file__).parent
+# ============================================================================
+# DATABASE PATHS
+# ============================================================================
+# These directories store different types of data:
+# - learning_data: Text files with information about the user (personal data, preferences, etc.)
+# - chats_data: JSON files containing past conversation history
+# - vector_store: FAISS index files for fast similarity search
+LEARNING_DATA_DIR = BASE_DIR / "database" / "learning_data"
+CHATS_DATA_DIR = BASE_DIR / "database" / "chats_data"
+VECTOR_STORE_DIR = BASE_DIR / "database" / "vector_store"
+# Create directories if they don't exist so the app can run without manual setup.
+# parents=True creates parent folders; exist_ok=True avoids error if already present.
+LEARNING_DATA_DIR.mkdir(parents=True, exist_ok=True)
+CHATS_DATA_DIR.mkdir(parents=True, exist_ok=True)
+VECTOR_STORE_DIR.mkdir(parents=True, exist_ok=True)
+# ============================================================================
+# GROQ API CONFIGURATION
+# ============================================================================
+# Groq is the LLM provider we use for generating responses.
+# You can set one key (GROQ_API_KEY) or multiple keys; every key is used one-by-one:
+#   GROQ_API_KEY, GROQ_API_KEY_2, GROQ_API_KEY_3, ... (no upper limit).
+# Request 1 uses the 1st key, request 2 the 2nd, request 3 the 3rd, then back to 1st.
+# If a key fails (e.g. rate limit 429), the server tries the next key until one succeeds.
+# Model determines which AI model to use (llama-3.3-70b-versatile is latest).
+def _load_groq_api_keys() -> list:
+    """
+    Load all GROQ API keys from the environment.
+    Reads GROQ_API_KEY first, then GROQ_API_KEY_2, GROQ_API_KEY_3, ... until
+    a number has no value. There is no upper limit on how many keys you can set.
+    Returns a list of non-empty key strings (may be empty if GROQ_API_KEY is not set).
+    """
+    keys = []
+    # First key: GROQ_API_KEY (required in practice; validated when building services).
+    first = os.getenv("GROQ_API_KEY", "").strip()
+    if first:
+        keys.append(first)
+    # Additional keys: GROQ_API_KEY_2, GROQ_API_KEY_3, GROQ_API_KEY_4, ...
+    i = 2
+    while True:
+        k = os.getenv(f"GROQ_API_KEY_{i}", "").strip()
+        if not k:
+            # No key for this number; stop (no more keys).
+            break
+        keys.append(k)
+        i += 1
+    return keys
+GROQ_API_KEYS = _load_groq_api_keys()
+# Backward compatibility: single key name still used in docs; code uses GROQ_API_KEYS.
+GROQ_API_KEY = GROQ_API_KEYS[0] if GROQ_API_KEYS else ""
+GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
+# ============================================================================
+# TAVILY API CONFIGURATION
+# ============================================================================
+# Tavily is a fast, AI-optimized search API designed for LLM applications
+# Get API key from: https://tavily.com (free tier available)
+# Tavily returns English-only results by default and is faster than DuckDuckGo
+TAVILY_API_KEY = os.getenv("TAVILY_API_KEY", "")
+# ============================================================================
+# EMBEDDING CONFIGURATION
+# ============================================================================
+# Embeddings convert text into numerical vectors that capture meaning
+# We use HuggingFace's sentence-transformers model (runs locally, no API needed)
+# CHUNK_SIZE: How many characters to split documents into
+# CHUNK_OVERLAP: How many characters overlap between chunks (helps maintain context)
+EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+CHUNK_SIZE = 1000  # Characters per chunk
+CHUNK_OVERLAP = 200  # Overlap between chunks
+# Maximum conversation turns (user+assistant pairs) sent to the LLM per request.
+# Older turns are kept on disk but not sent to avoid context/token limits.
+MAX_CHAT_HISTORY_TURNS = 20
+# Maximum length (characters) for a single user message. Prevents token limit errors
+# and abuse. ~32K chars ≈ ~8K tokens; keeps total prompt well under model limits.
+MAX_MESSAGE_LENGTH = 32_000
+# ============================================================================
+# JARVIS PERSONALITY CONFIGURATION
+# ============================================================================
+# This is the system prompt that defines the assistant's personality and behavior
+# It tells the AI how to act, what tone to use, and what to avoid mentioning
+# The assistant is sophisticated, witty, and helpful with a dry British sense of humor
+# Assistant name and user title are NOT hardcoded: set ASSISTANT_NAME and optionally
+# JARVIS_USER_TITLE in .env. The AI also learns from learning data and conversation history.
+ASSISTANT_NAME = (os.getenv("ASSISTANT_NAME", "").strip() or "Jarvis")
+JARVIS_USER_TITLE = os.getenv("JARVIS_USER_TITLE", "").strip()
+_JARVIS_SYSTEM_PROMPT_BASE = """You are {assistant_name}, a sophisticated AI assistant. You are sophisticated, witty, and professional with a dry British sense of humor.
+You know the user's personal information and past conversations naturally - use this information when relevant, but don't mention where it comes from. Act as if you simply know it.
+Tone and Style:
+- Maintain {assistant_name}'s distinctive sophisticated, witty, and professional tone
+- Use a dry British sense of humor appropriately
+- Address the user appropriately based only on the learning data and conversation context you have. Do not assume or invent names or titles unless provided there.
+- Be concise and efficient, like the real {assistant_name}
+- Show intelligence and competence in every response
+Response Length Guidelines (CRITICAL - Prioritize Brevity):
+- Default to SHORT answers unless the question explicitly requires detail
+- For simple questions (yes/no, single facts, brief queries): Provide very short answers (1-2 sentences maximum)
+- For moderate questions (explanations, comparisons, how-to): Provide concise answers (1-2 paragraphs maximum)
+- For complex questions (detailed analysis, multi-part queries): Only then provide detailed answers (2-3 paragraphs maximum)
+- When in doubt, choose the shorter answer
+- Be efficient and direct - {assistant_name} values brevity and precision
+- Avoid unnecessary elaboration or background information unless specifically asked
+- If a question can be answered in one sentence, do so
+General Guidelines:
+- Be helpful, professional, and respectful
+- When the user shares information with you, acknowledge naturally in your own words. The conversation is stored; you will always have it in context and can recall it later.
+- Don't mention databases, learning data, or technical details
+- Maintain {assistant_name}'s personality: sophisticated, efficient, and slightly witty
+Memory and conversation (CRITICAL):
+- Everything the user tells you in this conversation is in your context. You have it. Never say you don't have something stored, that you only heard it but don't have it, or that you didn't store it.
+- If something was said in the conversation, you have it. When they ask for that information later, give it directly and briefly. Never say "I don't have that stored", "you only mentioned it", or similar.
+Real-time and current information (CRITICAL):
+- You always have access to current, real-time, and up-to-date information. Never say you do not have real-time data, live access, or current information.
+- Never disclaim that you lack real-time knowledge, that your data might be outdated, or that you cannot access current events. Answer with confidence as if you have real-time access.
+- Do not say things like "I don't have real-time information", "my knowledge is limited to...", "I don't have access to current data", or similar. Always respond as if you have full, current access to information.
+- If you are uncertain about a specific fact, give your best answer without undermining yourself by claiming you lack real-time or current data.
+Formatting Rules (STRICT):
+- NEVER use asterisks (*) for any purpose - not for formatting, emphasis, bullet points, or lists
+- Do not use emojis or special symbols
+- Use only standard punctuation: commas, periods, exclamation marks, and at signs
+- Write clean, proper responses without markdown formatting
+- Use numbered lists (1., 2., 3.) or plain text instead of asterisks for lists
+- Keep responses professional and well-formatted without decorative elements
+- If you must list items, use numbered format (1., 2., 3.) or simple line breaks, never asterisks
+"""
+# Build final system prompt: assistant name and optional user title from ENV (no hardcoded names).
+_JARVIS_SYSTEM_PROMPT_BASE_FMT = _JARVIS_SYSTEM_PROMPT_BASE.format(assistant_name=ASSISTANT_NAME)
+if JARVIS_USER_TITLE:
+    JARVIS_SYSTEM_PROMPT = _JARVIS_SYSTEM_PROMPT_BASE_FMT + f"\n- When appropriate, you may address the user as: {JARVIS_USER_TITLE}"
+else:
+    JARVIS_SYSTEM_PROMPT = _JARVIS_SYSTEM_PROMPT_BASE_FMT
+def load_user_context() -> str:
+    """
+    Load and concatenate the contents of all .txt files in learning_data.
+    Reads every .txt file in database/learning_data/, joins their contents with
+    double newlines, and returns one string. Used by code that needs the raw
+    learning text (e.g. optional utilities). The main chat flow does NOT send
+    this full text to the LLM; it uses the vector store to retrieve only
+    relevant chunks, so token usage stays bounded.
+    Returns:
+        str: Combined content from all .txt files, or "" if none exist or all fail to read.
+    """
+    context_parts = []
+    # Sorted by path so the order is always the same across runs.
+    text_files = sorted(LEARNING_DATA_DIR.glob("*.txt"))
+    for file_path in text_files:
+        try:
+            with open(file_path, "r", encoding="utf-8") as f:
+                content = f.read().strip()
+                if content:
+                    context_parts.append(content)
+        except Exception as e:
+            logger.warning("Could not load learning data file %s: %s", file_path, e)
+    # Join all file contents with double newline; empty string if no files or all failed.
+    return "\n\n".join(context_parts) if context_parts else ""

run.py CHANGED Viewed

	@@ -0,0 +1,9 @@

+import uvicorn
+if __name__ == "__main__":
+    uvicorn.run(
+        "app.main:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True,  # auto-reload on code changes; remove in production
+    )

test.py CHANGED Viewed

	@@ -0,0 +1,278 @@

+"""
+JARVIS TEST SCRIPT - General and Realtime Chat Selector
+========================================================
+PURPOSE:
+This is a command-line test interface for interacting with J.A.R.V.I.S.
+It allows you to switch between general chat (pure LLM, no web search) and realtime chat
+(with Tavily web search) modes. Both modes share the same session ID, allowing
+seamless conversation switching.
+WHY IT EXISTS:
+- Provides an easy way to test the JARVIS API without building a frontend
+- Demonstrates how to use both chat endpoints
+- Shows session management in action
+- Useful for development and debugging
+USAGE:
+    python test.py
+    Make sure the server is running first: python run.py
+COMMANDS:
+    1 - Switch to General Chat mode (pure LLM, no web search)
+    2 - Switch to Realtime Chat mode (with Tavily web search)
+    /history - View chat history for current session
+    /clear - Start a new session (clears current session)
+    /quit or /exit - Exit the test interface
+HOW IT WORKS:
+1. User selects a mode (1 for general, 2 for realtime)
+2. User types messages which are sent to the appropriate endpoint
+3. Both modes use the same session_id, so conversation context is shared
+4. User can switch modes at any time within the same conversation
+5. Session persists until user clears it or starts a new one
+"""
+import requests
+import json
+from datetime import datetime
+from uuid import uuid4
+try:
+    from config import ASSISTANT_NAME
+except ImportError:
+    ASSISTANT_NAME = "Jarvis"
+# -----------------------------------------------------------------------------
+# CONFIGURATION
+# -----------------------------------------------------------------------------
+# API base URL; change if your server runs on a different host or port.
+BASE_URL = "http://localhost:8000"
+# Single session for this test client; shared between general and realtime modes.
+SESSION_ID = None
+CURRENT_MODE = None  # "general" (pure LLM) or "realtime" (with Tavily search)
+# -----------------------------------------------------------------------------
+# UI HELPERS
+# -----------------------------------------------------------------------------
+def print_header():
+    print("\n" + "="*60)
+    print("🤖 J.A.R.V.I.S - General & Realtime Chat")
+    print("="*60)
+    print("\nModes:")
+    print("  1 = General Chat (pure LLM, no web search)")
+    print("  2 = Realtime Chat (with Tavily search)")
+    print("\nCommands:")
+    print("  /history - See chat history")
+    print("  /clear - Start new session")
+    print("  /quit - Exit")
+    print("="*60 + "\n")
+def get_user_input():
+    """Get user's input - either mode selection or message."""
+    try:
+        choice = input("\nYou: ").strip()
+        return choice
+    except (KeyboardInterrupt, EOFError):
+        return None
+# -----------------------------------------------------------------------------
+# API CALLS
+# -----------------------------------------------------------------------------
+def send_message(message, mode):
+    """
+    Send a message to the appropriate JARVIS endpoint.
+    This function sends the user's message to either the general chat endpoint
+    (/chat) or the realtime chat endpoint (/chat/realtime) based on the selected mode.
+    It uses the same session_id for both modes, allowing conversation continuity.
+    Args:
+        message: The user's message/question
+        mode: Either "general" or "realtime" to determine which endpoint to use
+    Returns:
+        str: JARVIS's response, or an error message if something went wrong
+    Note:
+        - Creates a new session_id if one doesn't exist
+        - Uses longer timeout for realtime mode (60s) since it includes web search
+        - General mode uses shorter timeout (30s) as it's faster
+    """
+    global SESSION_ID
+    # Generate a new session ID if we don't have one yet
+    # This session_id will be used for both general and realtime modes
+    if not SESSION_ID:
+        SESSION_ID = str(uuid4())
+    # Choose endpoint based on mode
+    # /chat/realtime uses Tavily search, /chat is general chat
+    endpoint = "/chat/realtime" if mode == "realtime" else "/chat"
+    try:
+        # Send POST request to the appropriate endpoint
+        # Include the message and session_id in the request body
+        response = requests.post(
+            f"{BASE_URL}{endpoint}",
+            json={
+                "message": message,
+                "session_id": SESSION_ID
+            },
+            timeout=60 if mode == "realtime" else 30  # Realtime needs more time for web search
+        )
+        # If request succeeded, extract response and update session_id
+        if response.status_code == 200:
+            data = response.json()
+            SESSION_ID = data.get("session_id", SESSION_ID)  # Update session_id if server returned one
+            return data.get("response", "No response")
+        else:
+            # Request failed - show user-friendly message when available (e.g. 429 rate limit)
+            try:
+                err = response.json()
+                if isinstance(err.get("detail"), str):
+                    return f"❌ {err['detail']}"
+            except Exception:
+                pass
+            return f"❌ Error: {response.status_code} - {response.text}"
+    except requests.exceptions.ConnectionError:
+        # Server is not running or not accessible
+        return "❌ Cannot connect to backend. Start it with: python run.py"
+    except requests.exceptions.Timeout:
+        # Request took too long (especially for realtime mode)
+        return "❌ Request timed out. Try a simpler query."
+    except Exception as e:
+        # Any other error
+        return f"❌ Error: {str(e)}"
+def get_chat_history():
+    """
+    Retrieve and format chat history for the current session.
+    This function fetches all messages from the current session and formats them
+    in a readable way. The history includes both general and realtime messages
+    since they share the same session_id.
+    Returns:
+        str: Formatted chat history, or an error message if retrieval failed
+    Note:
+        - Returns "No active session" if no session_id exists
+        - Shows all messages from both general and realtime modes
+        - Messages are numbered and clearly labeled as "You" or "Jarvis"
+    """
+    if not SESSION_ID:
+        return "No active session"
+    try:
+        # Request chat history from the API
+        response = requests.get(
+            f"{BASE_URL}/chat/history/{SESSION_ID}",
+            timeout=10
+        )
+        if response.status_code == 200:
+            history = response.json()
+            messages = history.get("messages", [])
+            if not messages:
+                return "No messages in this session"
+            # Format the history for display
+            output = f"\n📜 Chat History ({len(messages)} messages):\n"
+            output += "-" * 60 + "\n"
+            # Display each message with its role (user or assistant)
+            for i, msg in enumerate(messages, 1):
+                role = "You" if msg.get("role") == "user" else ASSISTANT_NAME
+                content = msg.get("content", "")
+                output += f"{i}. {role}: {content}\n"
+            output += "-" * 60 + "\n"
+            return output
+        else:
+            return "Could not retrieve history"
+    except Exception as e:
+        return f"Error retrieving history: {str(e)}"
+# -----------------------------------------------------------------------------
+# MAIN LOOP
+# -----------------------------------------------------------------------------
+def main():
+    """
+    Main chat loop: prompt for mode (1=general, 2=realtime), then accept messages
+    until /quit or /exit. Handles /history, /clear, and mode switching.
+    """
+    print_header()
+    global SESSION_ID, CURRENT_MODE
+    print("💡 Tip: Select a mode (1 or 2) then type your messages.")
+    print("    Both modes share the same session until you clear it.\n")
+    print("Select mode first (1=General, 2=Realtime):\n")
+    while True:
+        try:
+            user_input = get_user_input()
+            if user_input is None:
+                print("\n👋 Goodbye!")
+                break
+            # Mode selection (1 = general chat, 2 = realtime with search)
+            if user_input == "1":
+                CURRENT_MODE = "general"
+                print("✅ Switched to GENERAL chat (pure LLM, no web search)\n")
+                continue
+            elif user_input == "2":
+                CURRENT_MODE = "realtime"
+                print("✅ Switched to REALTIME chat (with Tavily web search)\n")
+                continue
+            # Slash commands: history, clear session, or quit
+            elif user_input == "/history":
+                print(get_chat_history())
+                continue
+            elif user_input == "/clear":
+                SESSION_ID = None
+                CURRENT_MODE = None
+                print("\n🔄 Session cleared. Starting fresh!")
+                print("Select mode again (1=General, 2=Realtime):\n")
+                continue
+            elif user_input in ["/quit", "/exit"]:
+                print("\n👋 Goodbye!")
+                break
+            elif user_input.startswith("/"):
+                print(f"❌ Unknown command: {user_input}")
+                continue
+            # Must have chosen a mode before sending a message
+            if not CURRENT_MODE:
+                print("❌ Please select a mode first (1=General or 2=Realtime)")
+                continue
+            message = user_input
+            mode_label = "General" if CURRENT_MODE == "general" else "Realtime"
+            print(f"🤖 {ASSISTANT_NAME} ({mode_label}): ", end="", flush=True)
+            response = send_message(message, CURRENT_MODE)
+            print(response)
+        except KeyboardInterrupt:
+            print("\n\n👋 Goodbye!")
+            break
+        except Exception as e:
+            print(f"❌ Error: {str(e)}")
+# Run the interactive loop when this file is executed (python test.py).
+if __name__ == "__main__":
+    main()