NitinBot001 commited on
Commit
8a3e525
·
verified ·
1 Parent(s): e96f6b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -27
app.py CHANGED
@@ -57,11 +57,15 @@ is_initialized = False
57
  class Config:
58
  # OpenAI Compatible API Configuration
59
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
60
- OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1") # Can be changed to compatible APIs
 
 
 
 
61
 
62
  # Model Configuration
63
- LLM_MODEL = os.getenv("LLM_MODEL", "gpt-3.5-turbo") # Can be changed to any compatible model
64
- EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002") # Can be changed to compatible embedding model
65
 
66
  # Document Processing
67
  CHUNK_SIZE = 500
@@ -70,12 +74,12 @@ class Config:
70
  # Rate Limiting
71
  MAX_RETRIES = 5
72
  RATE_LIMIT_DELAY = 2.0
73
- EMBEDDING_BATCH_SIZE = 10 # OpenAI allows more requests
74
- EMBEDDING_DELAY = 1.0 # Lower delay for OpenAI
75
 
76
  # Model Parameters
77
  TEMPERATURE = 0.5
78
- MAX_OUTPUT_TOKENS = 120000
79
  RETRIEVER_K = 10
80
 
81
  # Paths
@@ -86,7 +90,7 @@ config = Config()
86
 
87
  # Request/Response Models
88
  class QueryRequest(BaseModel):
89
- query: str = Field(..., min_length=1, max_length=120000)
90
 
91
  class QueryResponse(BaseModel):
92
  answer: str
@@ -100,14 +104,18 @@ class SystemStatus(BaseModel):
100
  is_initialized: bool
101
  model_name: str
102
  embedding_model: str
103
- base_url: str
 
 
104
  vector_store_ready: bool
105
  total_chunks: int = 0
106
  api_key_configured: bool
107
 
108
  class InitializeRequest(BaseModel):
109
  api_key: str = Field(..., min_length=1)
110
- base_url: Optional[str] = Field(default=None, description="OpenAI compatible API base URL")
 
 
111
  llm_model: Optional[str] = Field(default=None, description="LLM model name")
112
  embedding_model: Optional[str] = Field(default=None, description="Embedding model name")
113
 
@@ -125,7 +133,7 @@ def estimate_tokens(text: str) -> int:
125
  except:
126
  return len(text.split()) * 1.3 # Rough estimate
127
 
128
- # Rate limiting helper functions
129
  async def rate_limited_embedding_creation(chunks, embeddings):
130
  """Create embeddings with rate limiting to avoid API limits."""
131
  logger.info(f"Creating embeddings for {len(chunks)} chunks with rate limiting...")
@@ -188,7 +196,7 @@ async def rate_limited_embedding_creation(chunks, embeddings):
188
  logger.info("Successfully created and merged all embeddings")
189
  return final_vector_store
190
 
191
- # Custom Callback Handler for OpenAI
192
  class TokenUsageCallbackHandler(BaseCallbackHandler):
193
  """Callback handler to track token usage in OpenAI calls."""
194
 
@@ -239,7 +247,14 @@ class TokenUsageCallbackHandler(BaseCallbackHandler):
239
  }
240
 
241
  # RAG System Functions
242
- async def initialize_rag_system(api_key: str = None, base_url: str = None, llm_model: str = None, embedding_model: str = None):
 
 
 
 
 
 
 
243
  """Initialize or reinitialize the RAG system with OpenAI compatible API."""
244
  global vector_store, qa_chain, token_callback_handler, is_initialized, config
245
 
@@ -247,13 +262,14 @@ async def initialize_rag_system(api_key: str = None, base_url: str = None, llm_m
247
  # Update configuration
248
  if api_key:
249
  config.OPENAI_API_KEY = api_key
250
- os.environ["OPENAI_API_KEY"] = api_key
251
  elif not config.OPENAI_API_KEY:
252
  raise ValueError("OpenAI API key not provided")
253
 
254
- if base_url:
255
- config.OPENAI_BASE_URL = base_url
256
- os.environ["OPENAI_BASE_URL"] = base_url
 
 
257
 
258
  if llm_model:
259
  config.LLM_MODEL = llm_model
@@ -261,8 +277,10 @@ async def initialize_rag_system(api_key: str = None, base_url: str = None, llm_m
261
  if embedding_model:
262
  config.EMBEDDING_MODEL = embedding_model
263
 
 
264
  logger.info(f"Initializing RAG system with:")
265
- logger.info(f" - Base URL: {config.OPENAI_BASE_URL}")
 
266
  logger.info(f" - LLM Model: {config.LLM_MODEL}")
267
  logger.info(f" - Embedding Model: {config.EMBEDDING_MODEL}")
268
 
@@ -284,16 +302,15 @@ async def initialize_rag_system(api_key: str = None, base_url: str = None, llm_m
284
  chunks = text_splitter.split_documents(documents)
285
  logger.info(f"Document split into {len(chunks)} chunks")
286
 
287
- # Check if we have too many chunks that might cause rate limiting
288
  if len(chunks) > 200:
289
  logger.warning(f"Large number of chunks ({len(chunks)}). Consider increasing chunk_size to reduce API calls.")
290
 
291
- # Initialize OpenAI embeddings
292
  embeddings = OpenAIEmbeddings(
293
  model=config.EMBEDDING_MODEL,
294
  openai_api_key=config.OPENAI_API_KEY,
295
- openai_api_base=config.OPENAI_BASE_URL,
296
- chunk_size=1000 # Embedding batch size
297
  )
298
 
299
  # Test embedding connection
@@ -326,11 +343,11 @@ async def initialize_rag_system(api_key: str = None, base_url: str = None, llm_m
326
  vector_store.save_local(config.INDEX_PATH)
327
  logger.info(f"Created new FAISS index at '{config.INDEX_PATH}'")
328
 
329
- # Initialize OpenAI LLM
330
  llm = ChatOpenAI(
331
  model_name=config.LLM_MODEL,
332
  openai_api_key=config.OPENAI_API_KEY,
333
- openai_api_base=config.OPENAI_BASE_URL,
334
  temperature=config.TEMPERATURE,
335
  max_tokens=config.MAX_OUTPUT_TOKENS,
336
  callbacks=[token_callback_handler],
@@ -339,6 +356,9 @@ async def initialize_rag_system(api_key: str = None, base_url: str = None, llm_m
339
 
340
  # Test LLM connection
341
  try:
 
 
 
342
  test_response = llm.invoke("Test connection")
343
  logger.info("Successfully connected to LLM API")
344
  except Exception as e:
@@ -390,6 +410,7 @@ async def startup_event():
390
  """Initialize the system on startup if API key is available."""
391
  if config.OPENAI_API_KEY:
392
  try:
 
393
  await initialize_rag_system()
394
  except Exception as e:
395
  logger.warning(f"Could not initialize on startup: {str(e)}")
@@ -424,7 +445,9 @@ async def get_status():
424
  is_initialized=is_initialized,
425
  model_name=config.LLM_MODEL,
426
  embedding_model=config.EMBEDDING_MODEL,
427
- base_url=config.OPENAI_BASE_URL,
 
 
428
  vector_store_ready=vector_store is not None,
429
  total_chunks=len(vector_store.docstore._dict) if vector_store else 0,
430
  api_key_configured=bool(config.OPENAI_API_KEY)
@@ -434,17 +457,21 @@ async def get_status():
434
  async def initialize_system(request: InitializeRequest):
435
  """Initialize the RAG system with provided API key and configuration."""
436
  try:
 
437
  await initialize_rag_system(
438
  api_key=request.api_key,
439
- base_url=request.base_url,
 
440
  llm_model=request.llm_model,
441
  embedding_model=request.embedding_model
442
  )
 
443
  return {
444
  "success": True,
445
  "message": "System initialized successfully",
446
  "config": {
447
- "base_url": config.OPENAI_BASE_URL,
 
448
  "llm_model": config.LLM_MODEL,
449
  "embedding_model": config.EMBEDDING_MODEL
450
  }
@@ -512,6 +539,8 @@ async def process_query(request: QueryRequest):
512
  logger.error(f"Error processing query: {str(e)}")
513
  raise HTTPException(status_code=500, detail=str(e))
514
 
 
 
515
  @app.get("/api/token-stats", response_model=Dict[str, Any])
516
  async def get_token_stats():
517
  """Get token usage statistics."""
@@ -570,8 +599,10 @@ async def health_check():
570
  @app.get("/api/config")
571
  async def get_config():
572
  """Get current configuration."""
 
573
  return {
574
- "base_url": config.OPENAI_BASE_URL,
 
575
  "llm_model": config.LLM_MODEL,
576
  "embedding_model": config.EMBEDDING_MODEL,
577
  "chunk_size": config.CHUNK_SIZE,
 
57
  class Config:
58
  # OpenAI Compatible API Configuration
59
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
60
+ # REMOVED: OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
61
+
62
+ # ADDED: Separate base URLs for LLM and Embeddings
63
+ LLM_BASE_URL = os.getenv("LLM_BASE_URL", "https://api.openai.com/v1")
64
+ EMBEDDING_BASE_URL = os.getenv("EMBEDDING_BASE_URL", "https://api.openai.com/v1")
65
 
66
  # Model Configuration
67
+ LLM_MODEL = os.getenv("LLM_MODEL", "gpt-3.5-turbo")
68
+ EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")
69
 
70
  # Document Processing
71
  CHUNK_SIZE = 500
 
74
  # Rate Limiting
75
  MAX_RETRIES = 5
76
  RATE_LIMIT_DELAY = 2.0
77
+ EMBEDDING_BATCH_SIZE = 10
78
+ EMBEDDING_DELAY = 1.0
79
 
80
  # Model Parameters
81
  TEMPERATURE = 0.5
82
+ MAX_OUTPUT_TOKENS = 2000
83
  RETRIEVER_K = 10
84
 
85
  # Paths
 
90
 
91
  # Request/Response Models
92
  class QueryRequest(BaseModel):
93
+ query: str = Field(..., min_length=1, max_length=10000)
94
 
95
  class QueryResponse(BaseModel):
96
  answer: str
 
104
  is_initialized: bool
105
  model_name: str
106
  embedding_model: str
107
+ # CHANGED: Use separate URLs
108
+ llm_base_url: str
109
+ embedding_base_url: str
110
  vector_store_ready: bool
111
  total_chunks: int = 0
112
  api_key_configured: bool
113
 
114
  class InitializeRequest(BaseModel):
115
  api_key: str = Field(..., min_length=1)
116
+ # CHANGED: Accept separate URLs
117
+ llm_base_url: Optional[str] = Field(default=None, description="LLM (text generation) API base URL")
118
+ embedding_base_url: Optional[str] = Field(default=None, description="Embedding model API base URL")
119
  llm_model: Optional[str] = Field(default=None, description="LLM model name")
120
  embedding_model: Optional[str] = Field(default=None, description="Embedding model name")
121
 
 
133
  except:
134
  return len(text.split()) * 1.3 # Rough estimate
135
 
136
+ # Rate limiting helper functions (No changes needed here)
137
  async def rate_limited_embedding_creation(chunks, embeddings):
138
  """Create embeddings with rate limiting to avoid API limits."""
139
  logger.info(f"Creating embeddings for {len(chunks)} chunks with rate limiting...")
 
196
  logger.info("Successfully created and merged all embeddings")
197
  return final_vector_store
198
 
199
+ # Custom Callback Handler for OpenAI (No changes needed here)
200
  class TokenUsageCallbackHandler(BaseCallbackHandler):
201
  """Callback handler to track token usage in OpenAI calls."""
202
 
 
247
  }
248
 
249
  # RAG System Functions
250
+ # CHANGED: Function signature to accept separate URLs
251
+ async def initialize_rag_system(
252
+ api_key: str = None,
253
+ llm_base_url: str = None,
254
+ embedding_base_url: str = None,
255
+ llm_model: str = None,
256
+ embedding_model: str = None
257
+ ):
258
  """Initialize or reinitialize the RAG system with OpenAI compatible API."""
259
  global vector_store, qa_chain, token_callback_handler, is_initialized, config
260
 
 
262
  # Update configuration
263
  if api_key:
264
  config.OPENAI_API_KEY = api_key
 
265
  elif not config.OPENAI_API_KEY:
266
  raise ValueError("OpenAI API key not provided")
267
 
268
+ # CHANGED: Update separate base URLs
269
+ if llm_base_url:
270
+ config.LLM_BASE_URL = llm_base_url
271
+ if embedding_base_url:
272
+ config.EMBEDDING_BASE_URL = embedding_base_url
273
 
274
  if llm_model:
275
  config.LLM_MODEL = llm_model
 
277
  if embedding_model:
278
  config.EMBEDDING_MODEL = embedding_model
279
 
280
+ # CHANGED: Update logging
281
  logger.info(f"Initializing RAG system with:")
282
+ logger.info(f" - LLM Base URL: {config.LLM_BASE_URL}")
283
+ logger.info(f" - Embedding Base URL: {config.EMBEDDING_BASE_URL}")
284
  logger.info(f" - LLM Model: {config.LLM_MODEL}")
285
  logger.info(f" - Embedding Model: {config.EMBEDDING_MODEL}")
286
 
 
302
  chunks = text_splitter.split_documents(documents)
303
  logger.info(f"Document split into {len(chunks)} chunks")
304
 
 
305
  if len(chunks) > 200:
306
  logger.warning(f"Large number of chunks ({len(chunks)}). Consider increasing chunk_size to reduce API calls.")
307
 
308
+ # CHANGED: Initialize OpenAI embeddings with its specific base URL
309
  embeddings = OpenAIEmbeddings(
310
  model=config.EMBEDDING_MODEL,
311
  openai_api_key=config.OPENAI_API_KEY,
312
+ openai_api_base=config.EMBEDDING_BASE_URL,
313
+ chunk_size=1000
314
  )
315
 
316
  # Test embedding connection
 
343
  vector_store.save_local(config.INDEX_PATH)
344
  logger.info(f"Created new FAISS index at '{config.INDEX_PATH}'")
345
 
346
+ # CHANGED: Initialize OpenAI LLM with its specific base URL
347
  llm = ChatOpenAI(
348
  model_name=config.LLM_MODEL,
349
  openai_api_key=config.OPENAI_API_KEY,
350
+ openai_api_base=config.LLM_BASE_URL,
351
  temperature=config.TEMPERATURE,
352
  max_tokens=config.MAX_OUTPUT_TOKENS,
353
  callbacks=[token_callback_handler],
 
356
 
357
  # Test LLM connection
358
  try:
359
+ # Note: The os.environ is not strictly needed if passing params directly,
360
+ # but setting it can be a good practice for other potential library uses.
361
+ # We'll rely on direct parameter passing which is cleaner.
362
  test_response = llm.invoke("Test connection")
363
  logger.info("Successfully connected to LLM API")
364
  except Exception as e:
 
410
  """Initialize the system on startup if API key is available."""
411
  if config.OPENAI_API_KEY:
412
  try:
413
+ # This will use the URLs from environment variables by default
414
  await initialize_rag_system()
415
  except Exception as e:
416
  logger.warning(f"Could not initialize on startup: {str(e)}")
 
445
  is_initialized=is_initialized,
446
  model_name=config.LLM_MODEL,
447
  embedding_model=config.EMBEDDING_MODEL,
448
+ # CHANGED: Return separate URLs
449
+ llm_base_url=config.LLM_BASE_URL,
450
+ embedding_base_url=config.EMBEDDING_BASE_URL,
451
  vector_store_ready=vector_store is not None,
452
  total_chunks=len(vector_store.docstore._dict) if vector_store else 0,
453
  api_key_configured=bool(config.OPENAI_API_KEY)
 
457
  async def initialize_system(request: InitializeRequest):
458
  """Initialize the RAG system with provided API key and configuration."""
459
  try:
460
+ # CHANGED: Pass separate URLs to the initialization function
461
  await initialize_rag_system(
462
  api_key=request.api_key,
463
+ llm_base_url=request.llm_base_url,
464
+ embedding_base_url=request.embedding_base_url,
465
  llm_model=request.llm_model,
466
  embedding_model=request.embedding_model
467
  )
468
+ # CHANGED: Return separate URLs in the response
469
  return {
470
  "success": True,
471
  "message": "System initialized successfully",
472
  "config": {
473
+ "llm_base_url": config.LLM_BASE_URL,
474
+ "embedding_base_url": config.EMBEDDING_BASE_URL,
475
  "llm_model": config.LLM_MODEL,
476
  "embedding_model": config.EMBEDDING_MODEL
477
  }
 
539
  logger.error(f"Error processing query: {str(e)}")
540
  raise HTTPException(status_code=500, detail=str(e))
541
 
542
+ # (No changes needed in the remaining endpoints)
543
+
544
  @app.get("/api/token-stats", response_model=Dict[str, Any])
545
  async def get_token_stats():
546
  """Get token usage statistics."""
 
599
  @app.get("/api/config")
600
  async def get_config():
601
  """Get current configuration."""
602
+ # CHANGED: Return separate URLs
603
  return {
604
+ "llm_base_url": config.LLM_BASE_URL,
605
+ "embedding_base_url": config.EMBEDDING_BASE_URL,
606
  "llm_model": config.LLM_MODEL,
607
  "embedding_model": config.EMBEDDING_MODEL,
608
  "chunk_size": config.CHUNK_SIZE,