NitinBot001 commited on
Commit
6faaecb
·
verified ·
1 Parent(s): 57610f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -61
app.py CHANGED
@@ -21,7 +21,7 @@ from langchain_community.vectorstores import FAISS
21
  from langchain.chains import RetrievalQA
22
  from langchain.prompts import PromptTemplate
23
  from langchain.callbacks.base import BaseCallbackHandler
24
- from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
25
  import tiktoken
26
 
27
  # Configure logging
@@ -55,18 +55,30 @@ is_initialized = False
55
 
56
  # Configuration
57
  class Config:
58
- GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "")
59
- CHUNK_SIZE = 500 # Reduced chunk size to create fewer embeddings
60
- CHUNK_OVERLAP = 50 # Reduced overlap
61
- MAX_RETRIES = 5 # Increased retries
62
- RATE_LIMIT_DELAY = 2.0 # Increased delay
63
- EMBEDDING_BATCH_SIZE = 5 # Process embeddings in small batches
64
- EMBEDDING_DELAY = 1.5 # Delay between embedding batches
65
- MODEL_NAME = "gemma-3-27b-it"
66
- EMBEDDING_MODEL = "models/embedding-001"
 
 
 
 
 
 
 
 
 
 
67
  TEMPERATURE = 0.5
68
- MAX_OUTPUT_TOKENS = 20000
69
- RETRIEVER_K = 15 # Reduced retrieval count
 
 
70
  INDEX_PATH = "faiss_maize_index"
71
  DATA_PATH = "data/maize_data.txt"
72
 
@@ -74,7 +86,7 @@ config = Config()
74
 
75
  # Request/Response Models
76
  class QueryRequest(BaseModel):
77
- query: str = Field(..., min_length=1, max_length=100000)
78
 
79
  class QueryResponse(BaseModel):
80
  answer: str
@@ -88,12 +100,16 @@ class SystemStatus(BaseModel):
88
  is_initialized: bool
89
  model_name: str
90
  embedding_model: str
 
91
  vector_store_ready: bool
92
  total_chunks: int = 0
93
  api_key_configured: bool
94
 
95
  class InitializeRequest(BaseModel):
96
  api_key: str = Field(..., min_length=1)
 
 
 
97
 
98
  # Token counting utilities
99
  try:
@@ -104,7 +120,10 @@ except:
104
 
105
  def estimate_tokens(text: str) -> int:
106
  """Estimates token count for a given text."""
107
- return len(tokenizer.encode(text))
 
 
 
108
 
109
  # Rate limiting helper functions
110
  async def rate_limited_embedding_creation(chunks, embeddings):
@@ -141,7 +160,12 @@ async def rate_limited_embedding_creation(chunks, embeddings):
141
  retry_count += 1
142
  delay = config.EMBEDDING_DELAY * (2 ** retry_count) + random.uniform(0, 1)
143
  logger.warning(f"Batch {i//batch_size + 1} failed (attempt {retry_count}): {str(e)}")
144
- logger.info(f"Waiting {delay:.2f} seconds before retry...")
 
 
 
 
 
145
  await asyncio.sleep(delay)
146
 
147
  if retry_count >= max_retries:
@@ -149,7 +173,7 @@ async def rate_limited_embedding_creation(chunks, embeddings):
149
 
150
  # Delay between batches to respect rate limits
151
  if i + batch_size < len(chunks):
152
- delay = config.EMBEDDING_DELAY + random.uniform(0.5, 1.0)
153
  logger.info(f"Waiting {delay:.2f} seconds before next batch...")
154
  await asyncio.sleep(delay)
155
 
@@ -164,9 +188,9 @@ async def rate_limited_embedding_creation(chunks, embeddings):
164
  logger.info("Successfully created and merged all embeddings")
165
  return final_vector_store
166
 
167
- # Custom Callback Handler
168
  class TokenUsageCallbackHandler(BaseCallbackHandler):
169
- """Callback handler to track token usage in LLM calls."""
170
 
171
  def __init__(self):
172
  super().__init__()
@@ -179,14 +203,15 @@ class TokenUsageCallbackHandler(BaseCallbackHandler):
179
  self.last_call_tokens = {}
180
 
181
  def on_llm_end(self, response, **kwargs):
182
- """Collect token usage from the LLM response."""
183
  self.total_llm_calls += 1
184
  llm_output = response.llm_output
185
 
186
- if llm_output and 'usage_metadata' in llm_output:
187
- usage = llm_output['usage_metadata']
188
- prompt_tokens = usage.get('prompt_token_count', 0)
189
- completion_tokens = usage.get('candidates_token_count', 0)
 
190
 
191
  self.total_prompt_tokens += prompt_tokens
192
  self.total_completion_tokens += completion_tokens
@@ -198,6 +223,9 @@ class TokenUsageCallbackHandler(BaseCallbackHandler):
198
  }
199
 
200
  logger.info(f"Token usage - Prompt: {prompt_tokens}, Completion: {completion_tokens}")
 
 
 
201
 
202
  def get_last_call_usage(self):
203
  return self.last_call_tokens
@@ -211,19 +239,32 @@ class TokenUsageCallbackHandler(BaseCallbackHandler):
211
  }
212
 
213
  # RAG System Functions
214
- async def initialize_rag_system(api_key: str = None):
215
- """Initialize or reinitialize the RAG system."""
216
  global vector_store, qa_chain, token_callback_handler, is_initialized, config
217
 
218
  try:
219
- # Use provided API key or environment variable
220
  if api_key:
221
- config.GOOGLE_API_KEY = api_key
222
- os.environ["GOOGLE_API_KEY"] = api_key
223
- elif not config.GOOGLE_API_KEY:
224
- raise ValueError("Google API key not provided")
 
 
 
 
 
 
 
 
 
 
225
 
226
- logger.info("Initializing RAG system...")
 
 
 
227
 
228
  # Initialize token callback handler
229
  token_callback_handler = TokenUsageCallbackHandler()
@@ -232,26 +273,39 @@ async def initialize_rag_system(api_key: str = None):
232
  if not os.path.exists(config.DATA_PATH):
233
  raise FileNotFoundError(f"Data file not found: {config.DATA_PATH}")
234
 
235
- loader = TextLoader(config.DATA_PATH)
236
  documents = loader.load()
237
 
238
  text_splitter = RecursiveCharacterTextSplitter(
239
  chunk_size=config.CHUNK_SIZE,
240
- chunk_overlap=config.CHUNK_OVERLAP
 
241
  )
242
  chunks = text_splitter.split_documents(documents)
243
  logger.info(f"Document split into {len(chunks)} chunks")
244
 
245
  # Check if we have too many chunks that might cause rate limiting
246
- if len(chunks) > 100:
247
- logger.warning(f"Large number of chunks ({len(chunks)}). Consider increasing chunk_size or reducing document size to avoid rate limits.")
248
 
249
- # Initialize embeddings with retry logic
250
- embeddings = GoogleGenerativeAIEmbeddings(
251
  model=config.EMBEDDING_MODEL,
252
- google_api_key=config.GOOGLE_API_KEY
 
 
253
  )
254
 
 
 
 
 
 
 
 
 
 
 
255
  # Create or load FAISS index with rate limiting
256
  if os.path.exists(config.INDEX_PATH):
257
  try:
@@ -272,22 +326,33 @@ async def initialize_rag_system(api_key: str = None):
272
  vector_store.save_local(config.INDEX_PATH)
273
  logger.info(f"Created new FAISS index at '{config.INDEX_PATH}'")
274
 
275
- # Initialize LLM with retry and rate limiting
276
- llm = ChatGoogleGenerativeAI(
277
- model=config.MODEL_NAME,
278
- google_api_key=config.GOOGLE_API_KEY,
 
279
  temperature=config.TEMPERATURE,
280
  max_tokens=config.MAX_OUTPUT_TOKENS,
281
- callbacks=[token_callback_handler]
 
282
  )
283
 
 
 
 
 
 
 
 
 
284
  # Create prompt template
285
  prompt_template = PromptTemplate(
286
  input_variables=["context", "question"],
287
- template="""
288
- You are an expert in maize agriculture. Use the following context ONLY to answer the question accurately and helpfully.
289
- If there have any query about getting personal information of a person then don't get it and reply full answer accordingly context.
290
- Answer should be concise clear and with easy language.
 
291
 
292
  Context:
293
  {context}
@@ -301,7 +366,10 @@ Answer:"""
301
  qa_chain = RetrievalQA.from_chain_type(
302
  llm=llm,
303
  chain_type="stuff",
304
- retriever=vector_store.as_retriever(search_kwargs={"k": config.RETRIEVER_K}),
 
 
 
305
  chain_type_kwargs={"prompt": prompt_template},
306
  callbacks=[token_callback_handler],
307
  return_source_documents=True
@@ -320,7 +388,7 @@ Answer:"""
320
  @app.on_event("startup")
321
  async def startup_event():
322
  """Initialize the system on startup if API key is available."""
323
- if config.GOOGLE_API_KEY:
324
  try:
325
  await initialize_rag_system()
326
  except Exception as e:
@@ -333,7 +401,20 @@ async def root():
333
  with open("static/index.html", "r") as f:
334
  return f.read()
335
  except FileNotFoundError:
336
- return "<h1>Static files not found. Please ensure static/index.html exists.</h1>"
 
 
 
 
 
 
 
 
 
 
 
 
 
337
 
338
  @app.get("/api/status", response_model=SystemStatus)
339
  async def get_status():
@@ -341,21 +422,32 @@ async def get_status():
341
  return SystemStatus(
342
  status="ready" if is_initialized else "not_initialized",
343
  is_initialized=is_initialized,
344
- model_name=config.MODEL_NAME,
345
  embedding_model=config.EMBEDDING_MODEL,
 
346
  vector_store_ready=vector_store is not None,
347
  total_chunks=len(vector_store.docstore._dict) if vector_store else 0,
348
- api_key_configured=bool(config.GOOGLE_API_KEY)
349
  )
350
 
351
  @app.post("/api/initialize", response_model=Dict[str, Any])
352
  async def initialize_system(request: InitializeRequest):
353
- """Initialize the RAG system with provided API key."""
354
  try:
355
- await initialize_rag_system(request.api_key)
 
 
 
 
 
356
  return {
357
  "success": True,
358
- "message": "System initialized successfully"
 
 
 
 
 
359
  }
360
  except Exception as e:
361
  raise HTTPException(status_code=500, detail=str(e))
@@ -366,7 +458,7 @@ async def process_query(request: QueryRequest):
366
  if not is_initialized:
367
  raise HTTPException(
368
  status_code=503,
369
- detail="System not initialized. Please provide API key."
370
  )
371
 
372
  try:
@@ -387,7 +479,12 @@ async def process_query(request: QueryRequest):
387
 
388
  delay = config.RATE_LIMIT_DELAY * (2 ** attempt) + random.uniform(0, 1)
389
  logger.warning(f"Query attempt {attempt + 1} failed: {str(e)}")
390
- logger.info(f"Retrying in {delay:.2f} seconds...")
 
 
 
 
 
391
  await asyncio.sleep(delay)
392
 
393
  processing_time = time.time() - start_time
@@ -428,8 +525,11 @@ async def upload_document(file: UploadFile = File(...)):
428
  """Upload a new document to replace the existing one."""
429
  try:
430
  # Validate file
431
- if not file.filename.endswith('.txt'):
432
- raise HTTPException(status_code=400, detail="Only .txt files are supported")
 
 
 
433
 
434
  # Save uploaded file
435
  content = await file.read()
@@ -439,7 +539,7 @@ async def upload_document(file: UploadFile = File(...)):
439
  logger.info(f"Uploaded new document: {file.filename}")
440
 
441
  # Reinitialize the system with new data
442
- if config.GOOGLE_API_KEY:
443
  # Remove old index to force recreation
444
  if os.path.exists(config.INDEX_PATH):
445
  import shutil
@@ -462,7 +562,21 @@ async def health_check():
462
  return {
463
  "status": "healthy",
464
  "timestamp": datetime.now().isoformat(),
465
- "system_initialized": is_initialized
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
  }
467
 
468
  # Mount static files
 
21
  from langchain.chains import RetrievalQA
22
  from langchain.prompts import PromptTemplate
23
  from langchain.callbacks.base import BaseCallbackHandler
24
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
25
  import tiktoken
26
 
27
  # Configure logging
 
55
 
56
  # Configuration
57
  class Config:
58
+ # OpenAI Compatible API Configuration
59
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
60
+ OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1") # Can be changed to compatible APIs
61
+
62
+ # Model Configuration
63
+ LLM_MODEL = os.getenv("LLM_MODEL", "gpt-3.5-turbo") # Can be changed to any compatible model
64
+ EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002") # Can be changed to compatible embedding model
65
+
66
+ # Document Processing
67
+ CHUNK_SIZE = 500
68
+ CHUNK_OVERLAP = 50
69
+
70
+ # Rate Limiting
71
+ MAX_RETRIES = 5
72
+ RATE_LIMIT_DELAY = 2.0
73
+ EMBEDDING_BATCH_SIZE = 10 # OpenAI allows more requests
74
+ EMBEDDING_DELAY = 1.0 # Lower delay for OpenAI
75
+
76
+ # Model Parameters
77
  TEMPERATURE = 0.5
78
+ MAX_OUTPUT_TOKENS = 2000
79
+ RETRIEVER_K = 10
80
+
81
+ # Paths
82
  INDEX_PATH = "faiss_maize_index"
83
  DATA_PATH = "data/maize_data.txt"
84
 
 
86
 
87
  # Request/Response Models
88
  class QueryRequest(BaseModel):
89
+ query: str = Field(..., min_length=1, max_length=10000)
90
 
91
  class QueryResponse(BaseModel):
92
  answer: str
 
100
  is_initialized: bool
101
  model_name: str
102
  embedding_model: str
103
+ base_url: str
104
  vector_store_ready: bool
105
  total_chunks: int = 0
106
  api_key_configured: bool
107
 
108
  class InitializeRequest(BaseModel):
109
  api_key: str = Field(..., min_length=1)
110
+ base_url: Optional[str] = Field(default=None, description="OpenAI compatible API base URL")
111
+ llm_model: Optional[str] = Field(default=None, description="LLM model name")
112
+ embedding_model: Optional[str] = Field(default=None, description="Embedding model name")
113
 
114
  # Token counting utilities
115
  try:
 
120
 
121
  def estimate_tokens(text: str) -> int:
122
  """Estimates token count for a given text."""
123
+ try:
124
+ return len(tokenizer.encode(text))
125
+ except:
126
+ return len(text.split()) * 1.3 # Rough estimate
127
 
128
  # Rate limiting helper functions
129
  async def rate_limited_embedding_creation(chunks, embeddings):
 
160
  retry_count += 1
161
  delay = config.EMBEDDING_DELAY * (2 ** retry_count) + random.uniform(0, 1)
162
  logger.warning(f"Batch {i//batch_size + 1} failed (attempt {retry_count}): {str(e)}")
163
+
164
+ if "rate limit" in str(e).lower() or "429" in str(e):
165
+ logger.info(f"Rate limit detected. Waiting {delay:.2f} seconds before retry...")
166
+ else:
167
+ logger.info(f"API error detected. Waiting {delay:.2f} seconds before retry...")
168
+
169
  await asyncio.sleep(delay)
170
 
171
  if retry_count >= max_retries:
 
173
 
174
  # Delay between batches to respect rate limits
175
  if i + batch_size < len(chunks):
176
+ delay = config.EMBEDDING_DELAY + random.uniform(0.2, 0.5)
177
  logger.info(f"Waiting {delay:.2f} seconds before next batch...")
178
  await asyncio.sleep(delay)
179
 
 
188
  logger.info("Successfully created and merged all embeddings")
189
  return final_vector_store
190
 
191
+ # Custom Callback Handler for OpenAI
192
  class TokenUsageCallbackHandler(BaseCallbackHandler):
193
+ """Callback handler to track token usage in OpenAI calls."""
194
 
195
  def __init__(self):
196
  super().__init__()
 
203
  self.last_call_tokens = {}
204
 
205
  def on_llm_end(self, response, **kwargs):
206
+ """Collect token usage from the OpenAI response."""
207
  self.total_llm_calls += 1
208
  llm_output = response.llm_output
209
 
210
+ # OpenAI token usage structure
211
+ if llm_output and 'token_usage' in llm_output:
212
+ usage = llm_output['token_usage']
213
+ prompt_tokens = usage.get('prompt_tokens', 0)
214
+ completion_tokens = usage.get('completion_tokens', 0)
215
 
216
  self.total_prompt_tokens += prompt_tokens
217
  self.total_completion_tokens += completion_tokens
 
223
  }
224
 
225
  logger.info(f"Token usage - Prompt: {prompt_tokens}, Completion: {completion_tokens}")
226
+ else:
227
+ # Fallback token estimation if usage not available
228
+ logger.info("Token usage not available from API response")
229
 
230
  def get_last_call_usage(self):
231
  return self.last_call_tokens
 
239
  }
240
 
241
  # RAG System Functions
242
+ async def initialize_rag_system(api_key: str = None, base_url: str = None, llm_model: str = None, embedding_model: str = None):
243
+ """Initialize or reinitialize the RAG system with OpenAI compatible API."""
244
  global vector_store, qa_chain, token_callback_handler, is_initialized, config
245
 
246
  try:
247
+ # Update configuration
248
  if api_key:
249
+ config.OPENAI_API_KEY = api_key
250
+ os.environ["OPENAI_API_KEY"] = api_key
251
+ elif not config.OPENAI_API_KEY:
252
+ raise ValueError("OpenAI API key not provided")
253
+
254
+ if base_url:
255
+ config.OPENAI_BASE_URL = base_url
256
+ os.environ["OPENAI_BASE_URL"] = base_url
257
+
258
+ if llm_model:
259
+ config.LLM_MODEL = llm_model
260
+
261
+ if embedding_model:
262
+ config.EMBEDDING_MODEL = embedding_model
263
 
264
+ logger.info(f"Initializing RAG system with:")
265
+ logger.info(f" - Base URL: {config.OPENAI_BASE_URL}")
266
+ logger.info(f" - LLM Model: {config.LLM_MODEL}")
267
+ logger.info(f" - Embedding Model: {config.EMBEDDING_MODEL}")
268
 
269
  # Initialize token callback handler
270
  token_callback_handler = TokenUsageCallbackHandler()
 
273
  if not os.path.exists(config.DATA_PATH):
274
  raise FileNotFoundError(f"Data file not found: {config.DATA_PATH}")
275
 
276
+ loader = TextLoader(config.DATA_PATH, encoding='utf-8')
277
  documents = loader.load()
278
 
279
  text_splitter = RecursiveCharacterTextSplitter(
280
  chunk_size=config.CHUNK_SIZE,
281
+ chunk_overlap=config.CHUNK_OVERLAP,
282
+ separators=["\n\n", "\n", " ", ""]
283
  )
284
  chunks = text_splitter.split_documents(documents)
285
  logger.info(f"Document split into {len(chunks)} chunks")
286
 
287
  # Check if we have too many chunks that might cause rate limiting
288
+ if len(chunks) > 200:
289
+ logger.warning(f"Large number of chunks ({len(chunks)}). Consider increasing chunk_size to reduce API calls.")
290
 
291
+ # Initialize OpenAI embeddings
292
+ embeddings = OpenAIEmbeddings(
293
  model=config.EMBEDDING_MODEL,
294
+ openai_api_key=config.OPENAI_API_KEY,
295
+ openai_api_base=config.OPENAI_BASE_URL,
296
+ chunk_size=1000 # Embedding batch size
297
  )
298
 
299
+ # Test embedding connection
300
+ try:
301
+ test_embedding = await asyncio.get_event_loop().run_in_executor(
302
+ None, embeddings.embed_query, "test connection"
303
+ )
304
+ logger.info("Successfully connected to embedding API")
305
+ except Exception as e:
306
+ logger.error(f"Failed to connect to embedding API: {str(e)}")
307
+ raise
308
+
309
  # Create or load FAISS index with rate limiting
310
  if os.path.exists(config.INDEX_PATH):
311
  try:
 
326
  vector_store.save_local(config.INDEX_PATH)
327
  logger.info(f"Created new FAISS index at '{config.INDEX_PATH}'")
328
 
329
+ # Initialize OpenAI LLM
330
+ llm = ChatOpenAI(
331
+ model_name=config.LLM_MODEL,
332
+ openai_api_key=config.OPENAI_API_KEY,
333
+ openai_api_base=config.OPENAI_BASE_URL,
334
  temperature=config.TEMPERATURE,
335
  max_tokens=config.MAX_OUTPUT_TOKENS,
336
+ callbacks=[token_callback_handler],
337
+ request_timeout=30
338
  )
339
 
340
+ # Test LLM connection
341
+ try:
342
+ test_response = llm.invoke("Test connection")
343
+ logger.info("Successfully connected to LLM API")
344
+ except Exception as e:
345
+ logger.error(f"Failed to connect to LLM API: {str(e)}")
346
+ raise
347
+
348
  # Create prompt template
349
  prompt_template = PromptTemplate(
350
  input_variables=["context", "question"],
351
+ template="""You are an expert in maize agriculture. Use the following context ONLY to answer the question accurately and helpfully.
352
+
353
+ If the query asks for personal information of any person, do not provide it and instead explain that you cannot share personal information.
354
+
355
+ Provide clear, concise answers in easy-to-understand language. If the context doesn't contain enough information to answer the question completely, say so.
356
 
357
  Context:
358
  {context}
 
366
  qa_chain = RetrievalQA.from_chain_type(
367
  llm=llm,
368
  chain_type="stuff",
369
+ retriever=vector_store.as_retriever(
370
+ search_type="similarity",
371
+ search_kwargs={"k": config.RETRIEVER_K}
372
+ ),
373
  chain_type_kwargs={"prompt": prompt_template},
374
  callbacks=[token_callback_handler],
375
  return_source_documents=True
 
388
  @app.on_event("startup")
389
  async def startup_event():
390
  """Initialize the system on startup if API key is available."""
391
+ if config.OPENAI_API_KEY:
392
  try:
393
  await initialize_rag_system()
394
  except Exception as e:
 
401
  with open("static/index.html", "r") as f:
402
  return f.read()
403
  except FileNotFoundError:
404
+ return """
405
+ <html>
406
+ <head><title>Maize RAG System</title></head>
407
+ <body>
408
+ <h1>Maize Crop RAG System</h1>
409
+ <p>API is running. Please use the API endpoints or add static/index.html for web interface.</p>
410
+ <h2>Available Endpoints:</h2>
411
+ <ul>
412
+ <li><a href="/docs">API Documentation</a></li>
413
+ <li><a href="/api/status">System Status</a></li>
414
+ </ul>
415
+ </body>
416
+ </html>
417
+ """
418
 
419
  @app.get("/api/status", response_model=SystemStatus)
420
  async def get_status():
 
422
  return SystemStatus(
423
  status="ready" if is_initialized else "not_initialized",
424
  is_initialized=is_initialized,
425
+ model_name=config.LLM_MODEL,
426
  embedding_model=config.EMBEDDING_MODEL,
427
+ base_url=config.OPENAI_BASE_URL,
428
  vector_store_ready=vector_store is not None,
429
  total_chunks=len(vector_store.docstore._dict) if vector_store else 0,
430
+ api_key_configured=bool(config.OPENAI_API_KEY)
431
  )
432
 
433
  @app.post("/api/initialize", response_model=Dict[str, Any])
434
  async def initialize_system(request: InitializeRequest):
435
+ """Initialize the RAG system with provided API key and configuration."""
436
  try:
437
+ await initialize_rag_system(
438
+ api_key=request.api_key,
439
+ base_url=request.base_url,
440
+ llm_model=request.llm_model,
441
+ embedding_model=request.embedding_model
442
+ )
443
  return {
444
  "success": True,
445
+ "message": "System initialized successfully",
446
+ "config": {
447
+ "base_url": config.OPENAI_BASE_URL,
448
+ "llm_model": config.LLM_MODEL,
449
+ "embedding_model": config.EMBEDDING_MODEL
450
+ }
451
  }
452
  except Exception as e:
453
  raise HTTPException(status_code=500, detail=str(e))
 
458
  if not is_initialized:
459
  raise HTTPException(
460
  status_code=503,
461
+ detail="System not initialized. Please provide API key and configuration."
462
  )
463
 
464
  try:
 
479
 
480
  delay = config.RATE_LIMIT_DELAY * (2 ** attempt) + random.uniform(0, 1)
481
  logger.warning(f"Query attempt {attempt + 1} failed: {str(e)}")
482
+
483
+ if "rate limit" in str(e).lower() or "429" in str(e):
484
+ logger.info(f"Rate limit detected. Retrying in {delay:.2f} seconds...")
485
+ else:
486
+ logger.info(f"API error detected. Retrying in {delay:.2f} seconds...")
487
+
488
  await asyncio.sleep(delay)
489
 
490
  processing_time = time.time() - start_time
 
525
  """Upload a new document to replace the existing one."""
526
  try:
527
  # Validate file
528
+ if not file.filename.endswith(('.txt', '.md')):
529
+ raise HTTPException(status_code=400, detail="Only .txt and .md files are supported")
530
+
531
+ # Ensure data directory exists
532
+ os.makedirs(os.path.dirname(config.DATA_PATH), exist_ok=True)
533
 
534
  # Save uploaded file
535
  content = await file.read()
 
539
  logger.info(f"Uploaded new document: {file.filename}")
540
 
541
  # Reinitialize the system with new data
542
+ if config.OPENAI_API_KEY:
543
  # Remove old index to force recreation
544
  if os.path.exists(config.INDEX_PATH):
545
  import shutil
 
562
  return {
563
  "status": "healthy",
564
  "timestamp": datetime.now().isoformat(),
565
+ "system_initialized": is_initialized,
566
+ "api_configured": bool(config.OPENAI_API_KEY)
567
+ }
568
+
569
+ # Configuration endpoint
570
+ @app.get("/api/config")
571
+ async def get_config():
572
+ """Get current configuration."""
573
+ return {
574
+ "base_url": config.OPENAI_BASE_URL,
575
+ "llm_model": config.LLM_MODEL,
576
+ "embedding_model": config.EMBEDDING_MODEL,
577
+ "chunk_size": config.CHUNK_SIZE,
578
+ "retriever_k": config.RETRIEVER_K,
579
+ "api_key_configured": bool(config.OPENAI_API_KEY)
580
  }
581
 
582
  # Mount static files