Spaces:

NitinBot001
/

CROP-RAG-API

Sleeping

App Files Files Community

NitinBot001 commited on Sep 14

Commit

72a6cfc

verified ·

1 Parent(s): 95cb046

Update app.py

Browse files

Files changed (1) hide show

app.py +120 -23

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ from datetime import datetime
 import json
 import time
 from pathlib import Path
 from fastapi import FastAPI, HTTPException, File, UploadFile, BackgroundTasks
 from fastapi.middleware.cors import CORSMiddleware
@@ -55,15 +56,17 @@ is_initialized = False
 # Configuration
 class Config:
     GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "")
-    CHUNK_SIZE = 800
-    CHUNK_OVERLAP = 100
-    MAX_RETRIES = 3
-    RATE_LIMIT_DELAY = 1.0
     MODEL_NAME = "gemma-3-27b-it"
     EMBEDDING_MODEL = "models/embedding-001"
     TEMPERATURE = 0.5
-    MAX_OUTPUT_TOKENS = 10000
-    RETRIEVER_K = 15
     INDEX_PATH = "faiss_maize_index"
     DATA_PATH = "data/maize_data.txt"
@@ -71,7 +74,7 @@ config = Config()
 # Request/Response Models
 class QueryRequest(BaseModel):
-    query: str = Field(..., min_length=1, max_length=10000)
 class QueryResponse(BaseModel):
     answer: str
@@ -103,6 +106,64 @@ def estimate_tokens(text: str) -> int:
     """Estimates token count for a given text."""
     return len(tokenizer.encode(text))
 # Custom Callback Handler
 class TokenUsageCallbackHandler(BaseCallbackHandler):
     """Callback handler to track token usage in LLM calls."""
@@ -181,26 +242,37 @@ async def initialize_rag_system(api_key: str = None):
         chunks = text_splitter.split_documents(documents)
         logger.info(f"Document split into {len(chunks)} chunks")
-        # Initialize embeddings
         embeddings = GoogleGenerativeAIEmbeddings(
             model=config.EMBEDDING_MODEL,
             google_api_key=config.GOOGLE_API_KEY
         )
-        # Create or load FAISS index
         if os.path.exists(config.INDEX_PATH):
-            vector_store = FAISS.load_local(
-                config.INDEX_PATH,
-                embeddings,
-                allow_dangerous_deserialization=True
-            )
-            logger.info(f"Loaded existing FAISS index from '{config.INDEX_PATH}'")
         else:
-            vector_store = FAISS.from_documents(chunks, embeddings)
             vector_store.save_local(config.INDEX_PATH)
             logger.info(f"Created new FAISS index at '{config.INDEX_PATH}'")
-        # Initialize LLM
         llm = ChatGoogleGenerativeAI(
             model=config.MODEL_NAME,
             google_api_key=config.GOOGLE_API_KEY,
@@ -217,7 +289,6 @@ You are an expert in maize agriculture. Use the following context ONLY to answer
 If there have any query about getting personal information of a person then don't get it and reply full answer accordingly context.
 Answer should be concise clear and with easy language.
 Context:
 {context}
@@ -258,8 +329,11 @@ async def startup_event():
 @app.get("/", response_class=HTMLResponse)
 async def root():
     """Serve the main HTML page."""
-    with open("static/index.html", "r") as f:
-        return f.read()
 @app.get("/api/status", response_model=SystemStatus)
 async def get_status():
@@ -302,7 +376,7 @@ async def process_query(request: QueryRequest):
         if token_callback_handler:
             token_callback_handler.last_call_tokens = {}
-        # Process query with retry logic
         for attempt in range(config.MAX_RETRIES):
             try:
                 result = qa_chain({"query": request.query})
@@ -310,7 +384,11 @@ async def process_query(request: QueryRequest):
             except Exception as e:
                 if attempt == config.MAX_RETRIES - 1:
                     raise
-                await asyncio.sleep(config.RATE_LIMIT_DELAY * (attempt + 1))
         processing_time = time.time() - start_time
@@ -349,17 +427,24 @@ async def get_token_stats():
 async def upload_document(file: UploadFile = File(...)):
     """Upload a new document to replace the existing one."""
     try:
         # Save uploaded file
         content = await file.read()
         with open(config.DATA_PATH, "wb") as f:
             f.write(content)
         # Reinitialize the system with new data
         if config.GOOGLE_API_KEY:
             # Remove old index to force recreation
             if os.path.exists(config.INDEX_PATH):
                 import shutil
                 shutil.rmtree(config.INDEX_PATH)
             await initialize_rag_system()
             return {"success": True, "message": "Document uploaded and system reinitialized"}
@@ -367,10 +452,22 @@ async def upload_document(file: UploadFile = File(...)):
             return {"success": True, "message": "Document uploaded. Please initialize the system."}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 # Mount static files
-app.mount("/static", StaticFiles(directory="static"), name="static")
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import json
 import time
 from pathlib import Path
+import random
 from fastapi import FastAPI, HTTPException, File, UploadFile, BackgroundTasks
 from fastapi.middleware.cors import CORSMiddleware
 # Configuration
 class Config:
     GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "")
+    CHUNK_SIZE = 500  # Reduced chunk size to create fewer embeddings
+    CHUNK_OVERLAP = 50  # Reduced overlap
+    MAX_RETRIES = 5  # Increased retries
+    RATE_LIMIT_DELAY = 2.0  # Increased delay
+    EMBEDDING_BATCH_SIZE = 5  # Process embeddings in small batches
+    EMBEDDING_DELAY = 1.5  # Delay between embedding batches
     MODEL_NAME = "gemma-3-27b-it"
     EMBEDDING_MODEL = "models/embedding-001"
     TEMPERATURE = 0.5
+    MAX_OUTPUT_TOKENS = 20000
+    RETRIEVER_K = 15  # Reduced retrieval count
     INDEX_PATH = "faiss_maize_index"
     DATA_PATH = "data/maize_data.txt"
 # Request/Response Models
 class QueryRequest(BaseModel):
+    query: str = Field(..., min_length=1, max_length=100000)
 class QueryResponse(BaseModel):
     answer: str
     """Estimates token count for a given text."""
     return len(tokenizer.encode(text))
+# Rate limiting helper functions
+async def rate_limited_embedding_creation(chunks, embeddings):
+    """Create embeddings with rate limiting to avoid API limits."""
+    logger.info(f"Creating embeddings for {len(chunks)} chunks with rate limiting...")
+    # Process chunks in smaller batches
+    batch_size = config.EMBEDDING_BATCH_SIZE
+    all_embeddings = []
+    for i in range(0, len(chunks), batch_size):
+        batch = chunks[i:i + batch_size]
+        logger.info(f"Processing batch {i//batch_size + 1}/{(len(chunks) + batch_size - 1)//batch_size} ({len(batch)} chunks)")
+        retry_count = 0
+        max_retries = 5
+        while retry_count < max_retries:
+            try:
+                # Create vector store for this batch
+                if i == 0:
+                    # First batch - create new vector store
+                    vector_store_batch = FAISS.from_documents(batch, embeddings)
+                    all_embeddings.append(vector_store_batch)
+                else:
+                    # Subsequent batches - merge with existing
+                    vector_store_batch = FAISS.from_documents(batch, embeddings)
+                    all_embeddings.append(vector_store_batch)
+                logger.info(f"Successfully processed batch {i//batch_size + 1}")
+                break
+            except Exception as e:
+                retry_count += 1
+                delay = config.EMBEDDING_DELAY * (2 ** retry_count) + random.uniform(0, 1)
+                logger.warning(f"Batch {i//batch_size + 1} failed (attempt {retry_count}): {str(e)}")
+                logger.info(f"Waiting {delay:.2f} seconds before retry...")
+                await asyncio.sleep(delay)
+                if retry_count >= max_retries:
+                    raise Exception(f"Failed to process batch after {max_retries} attempts: {str(e)}")
+        # Delay between batches to respect rate limits
+        if i + batch_size < len(chunks):
+            delay = config.EMBEDDING_DELAY + random.uniform(0.5, 1.0)
+            logger.info(f"Waiting {delay:.2f} seconds before next batch...")
+            await asyncio.sleep(delay)
+    # Merge all vector stores
+    logger.info("Merging all vector store batches...")
+    final_vector_store = all_embeddings[0]
+    for i in range(1, len(all_embeddings)):
+        final_vector_store.merge_from(all_embeddings[i])
+        logger.info(f"Merged batch {i + 1}/{len(all_embeddings)}")
+    logger.info("Successfully created and merged all embeddings")
+    return final_vector_store
 # Custom Callback Handler
 class TokenUsageCallbackHandler(BaseCallbackHandler):
     """Callback handler to track token usage in LLM calls."""
         chunks = text_splitter.split_documents(documents)
         logger.info(f"Document split into {len(chunks)} chunks")
+        # Check if we have too many chunks that might cause rate limiting
+        if len(chunks) > 100:
+            logger.warning(f"Large number of chunks ({len(chunks)}). Consider increasing chunk_size or reducing document size to avoid rate limits.")
+        # Initialize embeddings with retry logic
         embeddings = GoogleGenerativeAIEmbeddings(
             model=config.EMBEDDING_MODEL,
             google_api_key=config.GOOGLE_API_KEY
         )
+        # Create or load FAISS index with rate limiting
         if os.path.exists(config.INDEX_PATH):
+            try:
+                vector_store = FAISS.load_local(
+                    config.INDEX_PATH,
+                    embeddings,
+                    allow_dangerous_deserialization=True
+                )
+                logger.info(f"Loaded existing FAISS index from '{config.INDEX_PATH}'")
+            except Exception as e:
+                logger.warning(f"Failed to load existing index: {str(e)}")
+                logger.info("Creating new index...")
+                vector_store = await rate_limited_embedding_creation(chunks, embeddings)
+                vector_store.save_local(config.INDEX_PATH)
+                logger.info(f"Created new FAISS index at '{config.INDEX_PATH}'")
         else:
+            vector_store = await rate_limited_embedding_creation(chunks, embeddings)
             vector_store.save_local(config.INDEX_PATH)
             logger.info(f"Created new FAISS index at '{config.INDEX_PATH}'")
+        # Initialize LLM with retry and rate limiting
         llm = ChatGoogleGenerativeAI(
             model=config.MODEL_NAME,
             google_api_key=config.GOOGLE_API_KEY,
 If there have any query about getting personal information of a person then don't get it and reply full answer accordingly context.
 Answer should be concise clear and with easy language.
 Context:
 {context}
 @app.get("/", response_class=HTMLResponse)
 async def root():
     """Serve the main HTML page."""
+    try:
+        with open("static/index.html", "r") as f:
+            return f.read()
+    except FileNotFoundError:
+        return "<h1>Static files not found. Please ensure static/index.html exists.</h1>"
 @app.get("/api/status", response_model=SystemStatus)
 async def get_status():
         if token_callback_handler:
             token_callback_handler.last_call_tokens = {}
+        # Process query with retry logic and exponential backoff
         for attempt in range(config.MAX_RETRIES):
             try:
                 result = qa_chain({"query": request.query})
             except Exception as e:
                 if attempt == config.MAX_RETRIES - 1:
                     raise
+                delay = config.RATE_LIMIT_DELAY * (2 ** attempt) + random.uniform(0, 1)
+                logger.warning(f"Query attempt {attempt + 1} failed: {str(e)}")
+                logger.info(f"Retrying in {delay:.2f} seconds...")
+                await asyncio.sleep(delay)
         processing_time = time.time() - start_time
 async def upload_document(file: UploadFile = File(...)):
     """Upload a new document to replace the existing one."""
     try:
+        # Validate file
+        if not file.filename.endswith('.txt'):
+            raise HTTPException(status_code=400, detail="Only .txt files are supported")
         # Save uploaded file
         content = await file.read()
         with open(config.DATA_PATH, "wb") as f:
             f.write(content)
+        logger.info(f"Uploaded new document: {file.filename}")
         # Reinitialize the system with new data
         if config.GOOGLE_API_KEY:
             # Remove old index to force recreation
             if os.path.exists(config.INDEX_PATH):
                 import shutil
                 shutil.rmtree(config.INDEX_PATH)
+                logger.info("Removed old FAISS index")
             await initialize_rag_system()
             return {"success": True, "message": "Document uploaded and system reinitialized"}
             return {"success": True, "message": "Document uploaded. Please initialize the system."}
     except Exception as e:
+        logger.error(f"Error uploading document: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
+# Health check endpoint
+@app.get("/health")
+async def health_check():
+    """Health check endpoint."""
+    return {
+        "status": "healthy",
+        "timestamp": datetime.now().isoformat(),
+        "system_initialized": is_initialized
+    }
 # Mount static files
+if os.path.exists("static"):
+    app.mount("/static", StaticFiles(directory="static"), name="static")
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)