Spaces:
Sleeping
Sleeping
Update config.py
Browse files
config.py
CHANGED
|
@@ -47,16 +47,10 @@ BASE_DIR = Path(__file__).parent
|
|
| 47 |
# ============================================================================
|
| 48 |
# DATABASE PATHS
|
| 49 |
# ============================================================================
|
| 50 |
-
# These directories store different types of data:
|
| 51 |
-
# - learning_data: Text files with information about the user (personal data, preferences, etc.)
|
| 52 |
-
# - chats_data: JSON files containing past conversation history
|
| 53 |
-
# - vector_store: FAISS index files for fast similarity search
|
| 54 |
LEARNING_DATA_DIR = BASE_DIR / "database" / "learning_data"
|
| 55 |
CHATS_DATA_DIR = BASE_DIR / "database" / "chats_data"
|
| 56 |
VECTOR_STORE_DIR = BASE_DIR / "database" / "vector_store"
|
| 57 |
|
| 58 |
-
# Create directories if they don't exist so the app can run without manual setup.
|
| 59 |
-
# parents=True creates parent folders; exist_ok=True avoids error if already present.
|
| 60 |
LEARNING_DATA_DIR.mkdir(parents=True, exist_ok=True)
|
| 61 |
CHATS_DATA_DIR.mkdir(parents=True, exist_ok=True)
|
| 62 |
VECTOR_STORE_DIR.mkdir(parents=True, exist_ok=True)
|
|
@@ -64,87 +58,47 @@ VECTOR_STORE_DIR.mkdir(parents=True, exist_ok=True)
|
|
| 64 |
# ============================================================================
|
| 65 |
# GROQ API CONFIGURATION
|
| 66 |
# ============================================================================
|
| 67 |
-
# Groq is the LLM provider we use for generating responses.
|
| 68 |
-
# You can set one key (GROQ_API_KEY) or multiple keys for fallback:
|
| 69 |
-
# GROQ_API_KEY, GROQ_API_KEY_2, GROQ_API_KEY_3, ... (no upper limit).
|
| 70 |
-
# PRIMARY-FIRST: Every request tries the first key first. If it fails (rate limit,
|
| 71 |
-
# timeout, etc.), the server tries the second, then third, until one succeeds.
|
| 72 |
-
# If all keys fail, the user receives a clear error message.
|
| 73 |
-
# Model determines which AI model to use (llama-3.3-70b-versatile is latest).
|
| 74 |
-
|
| 75 |
def _load_groq_api_keys() -> list:
|
| 76 |
-
"""
|
| 77 |
-
Load all GROQ API keys from the environment.
|
| 78 |
-
Reads GROQ_API_KEY first, then GROQ_API_KEY_2, GROQ_API_KEY_3, ... until
|
| 79 |
-
a number has no value. There is no upper limit on how many keys you can set.
|
| 80 |
-
Returns a list of non-empty key strings (may be empty if GROQ_API_KEY is not set).
|
| 81 |
-
"""
|
| 82 |
keys = []
|
| 83 |
-
# First key: GROQ_API_KEY (required in practice; validated when building services).
|
| 84 |
first = os.getenv("GROQ_API_KEY", "").strip()
|
| 85 |
if first:
|
| 86 |
keys.append(first)
|
| 87 |
-
|
| 88 |
-
# Additional keys: GROQ_API_KEY_2, GROQ_API_KEY_3, GROQ_API_KEY_4, ...
|
| 89 |
i = 2
|
| 90 |
while True:
|
| 91 |
k = os.getenv(f"GROQ_API_KEY_{i}", "").strip()
|
| 92 |
if not k:
|
| 93 |
-
# No key for this number; stop (no more keys).
|
| 94 |
break
|
| 95 |
keys.append(k)
|
| 96 |
i += 1
|
| 97 |
-
|
| 98 |
return keys
|
| 99 |
|
| 100 |
GROQ_API_KEYS = _load_groq_api_keys()
|
| 101 |
-
# Backward compatibility: single key name still used in docs; code uses GROQ_API_KEYS.
|
| 102 |
GROQ_API_KEY = GROQ_API_KEYS[0] if GROQ_API_KEYS else ""
|
| 103 |
-
|
| 104 |
GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
|
| 105 |
|
| 106 |
# ============================================================================
|
| 107 |
# TAVILY API CONFIGURATION
|
| 108 |
# ============================================================================
|
| 109 |
-
# Tavily is a fast, AI-optimized search API designed for LLM applications
|
| 110 |
-
# Get API key from: https://tavily.com (free tier available)
|
| 111 |
-
# Tavily returns English-only results by default and is faster than DuckDuckGo
|
| 112 |
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY", "")
|
| 113 |
|
| 114 |
# ============================================================================
|
| 115 |
# BRAIN MODEL (Query Classification — Jarvis Mode)
|
| 116 |
# ============================================================================
|
| 117 |
-
# The brain classifies each query as "general" or "realtime" using Groq.
|
| 118 |
-
# Uses the same GROQ_API_KEYS with rotation (brain and chat never use the same key).
|
| 119 |
GROQ_BRAIN_MODEL = os.getenv("GROQ_BRAIN_MODEL", "llama-3.1-8b-instant")
|
| 120 |
|
| 121 |
# ============================================================================
|
| 122 |
# TTS (TEXT-TO-SPEECH) CONFIGURATION
|
| 123 |
# ============================================================================
|
| 124 |
-
# edge-tts uses Microsoft Edge's free cloud TTS. No API key needed.
|
| 125 |
-
# Voice list: run `edge-tts --list-voices` to see all available voices.
|
| 126 |
-
# Default: en-GB-RyanNeural (male British voice, fitting for JARVIS).
|
| 127 |
-
# Override via TTS_VOICE in .env (e.g. TTS_VOICE=en-US-ChristopherNeural).
|
| 128 |
TTS_VOICE = os.getenv("TTS_VOICE", "en-GB-RyanNeural")
|
| 129 |
TTS_RATE = os.getenv("TTS_RATE", "+22%")
|
| 130 |
|
| 131 |
# ============================================================================
|
| 132 |
# EMBEDDING CONFIGURATION
|
| 133 |
# ============================================================================
|
| 134 |
-
# Embeddings convert text into numerical vectors that capture meaning
|
| 135 |
-
# We use HuggingFace's sentence-transformers model (runs locally, no API needed)
|
| 136 |
-
# CHUNK_SIZE: How many characters to split documents into
|
| 137 |
-
# CHUNK_OVERLAP: How many characters overlap between chunks (helps maintain context)
|
| 138 |
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
| 139 |
-
CHUNK_SIZE = 1000
|
| 140 |
-
CHUNK_OVERLAP = 200
|
| 141 |
-
|
| 142 |
-
# Maximum conversation turns (user+assistant pairs) sent to the LLM per request.
|
| 143 |
-
# Older turns are kept on disk but not sent to avoid context/token limits.
|
| 144 |
MAX_CHAT_HISTORY_TURNS = 20
|
| 145 |
-
|
| 146 |
-
# Maximum length (characters) for a single user message. Prevents token limit errors
|
| 147 |
-
# and abuse. ~32K chars ≈ ~8K tokens; keeps total prompt well under model limits.
|
| 148 |
MAX_MESSAGE_LENGTH = 32_000
|
| 149 |
|
| 150 |
# ============================================================================
|
|
@@ -235,16 +189,9 @@ def load_user_context() -> str:
|
|
| 235 |
"""
|
| 236 |
Load and concatenate the contents of all .txt files in learning_data.
|
| 237 |
Reads every .txt file in database/learning_data/, joins their contents with
|
| 238 |
-
double newlines, and returns one string.
|
| 239 |
-
learning text (e.g. optional utilities). The main chat flow does NOT send
|
| 240 |
-
this full text to the LLM; it uses the vector store to retrieve only
|
| 241 |
-
relevant chunks, so token usage stays bounded.
|
| 242 |
-
|
| 243 |
-
Returns:
|
| 244 |
-
str: Combined content from all .txt files, or "" if none exist or all fail to read.
|
| 245 |
"""
|
| 246 |
context_parts = []
|
| 247 |
-
# Sorted by path so the order is always the same across runs.
|
| 248 |
text_files = sorted(LEARNING_DATA_DIR.glob("*.txt"))
|
| 249 |
for file_path in text_files:
|
| 250 |
try:
|
|
@@ -254,6 +201,4 @@ def load_user_context() -> str:
|
|
| 254 |
context_parts.append(content)
|
| 255 |
except Exception as e:
|
| 256 |
logger.warning("Could not load learning data file %s: %s", file_path, e)
|
| 257 |
-
|
| 258 |
-
# Join all file contents with double newline; empty string if no files or all failed.
|
| 259 |
return "\n\n".join(context_parts) if context_parts else ""
|
|
|
|
| 47 |
# ============================================================================
|
| 48 |
# DATABASE PATHS
|
| 49 |
# ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
LEARNING_DATA_DIR = BASE_DIR / "database" / "learning_data"
|
| 51 |
CHATS_DATA_DIR = BASE_DIR / "database" / "chats_data"
|
| 52 |
VECTOR_STORE_DIR = BASE_DIR / "database" / "vector_store"
|
| 53 |
|
|
|
|
|
|
|
| 54 |
LEARNING_DATA_DIR.mkdir(parents=True, exist_ok=True)
|
| 55 |
CHATS_DATA_DIR.mkdir(parents=True, exist_ok=True)
|
| 56 |
VECTOR_STORE_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
| 58 |
# ============================================================================
|
| 59 |
# GROQ API CONFIGURATION
|
| 60 |
# ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
def _load_groq_api_keys() -> list:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
keys = []
|
|
|
|
| 63 |
first = os.getenv("GROQ_API_KEY", "").strip()
|
| 64 |
if first:
|
| 65 |
keys.append(first)
|
|
|
|
|
|
|
| 66 |
i = 2
|
| 67 |
while True:
|
| 68 |
k = os.getenv(f"GROQ_API_KEY_{i}", "").strip()
|
| 69 |
if not k:
|
|
|
|
| 70 |
break
|
| 71 |
keys.append(k)
|
| 72 |
i += 1
|
|
|
|
| 73 |
return keys
|
| 74 |
|
| 75 |
GROQ_API_KEYS = _load_groq_api_keys()
|
|
|
|
| 76 |
GROQ_API_KEY = GROQ_API_KEYS[0] if GROQ_API_KEYS else ""
|
|
|
|
| 77 |
GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
|
| 78 |
|
| 79 |
# ============================================================================
|
| 80 |
# TAVILY API CONFIGURATION
|
| 81 |
# ============================================================================
|
|
|
|
|
|
|
|
|
|
| 82 |
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY", "")
|
| 83 |
|
| 84 |
# ============================================================================
|
| 85 |
# BRAIN MODEL (Query Classification — Jarvis Mode)
|
| 86 |
# ============================================================================
|
|
|
|
|
|
|
| 87 |
GROQ_BRAIN_MODEL = os.getenv("GROQ_BRAIN_MODEL", "llama-3.1-8b-instant")
|
| 88 |
|
| 89 |
# ============================================================================
|
| 90 |
# TTS (TEXT-TO-SPEECH) CONFIGURATION
|
| 91 |
# ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
TTS_VOICE = os.getenv("TTS_VOICE", "en-GB-RyanNeural")
|
| 93 |
TTS_RATE = os.getenv("TTS_RATE", "+22%")
|
| 94 |
|
| 95 |
# ============================================================================
|
| 96 |
# EMBEDDING CONFIGURATION
|
| 97 |
# ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
| 99 |
+
CHUNK_SIZE = 1000
|
| 100 |
+
CHUNK_OVERLAP = 200
|
|
|
|
|
|
|
|
|
|
| 101 |
MAX_CHAT_HISTORY_TURNS = 20
|
|
|
|
|
|
|
|
|
|
| 102 |
MAX_MESSAGE_LENGTH = 32_000
|
| 103 |
|
| 104 |
# ============================================================================
|
|
|
|
| 189 |
"""
|
| 190 |
Load and concatenate the contents of all .txt files in learning_data.
|
| 191 |
Reads every .txt file in database/learning_data/, joins their contents with
|
| 192 |
+
double newlines, and returns one string.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
"""
|
| 194 |
context_parts = []
|
|
|
|
| 195 |
text_files = sorted(LEARNING_DATA_DIR.glob("*.txt"))
|
| 196 |
for file_path in text_files:
|
| 197 |
try:
|
|
|
|
| 201 |
context_parts.append(content)
|
| 202 |
except Exception as e:
|
| 203 |
logger.warning("Could not load learning data file %s: %s", file_path, e)
|
|
|
|
|
|
|
| 204 |
return "\n\n".join(context_parts) if context_parts else ""
|