Spaces:

NitinBot001
/

CROP-RAG-API

Sleeping

App Files Files Community

NitinBot001 commited on Sep 14

Commit

6faaecb

verified ·

1 Parent(s): 57610f6

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -61

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ from langchain_community.vectorstores import FAISS
 from langchain.chains import RetrievalQA
 from langchain.prompts import PromptTemplate
 from langchain.callbacks.base import BaseCallbackHandler
-from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
 import tiktoken
 # Configure logging
@@ -55,18 +55,30 @@ is_initialized = False
 # Configuration
 class Config:
-    GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "")
-    CHUNK_SIZE = 500  # Reduced chunk size to create fewer embeddings
-    CHUNK_OVERLAP = 50  # Reduced overlap
-    MAX_RETRIES = 5  # Increased retries
-    RATE_LIMIT_DELAY = 2.0  # Increased delay
-    EMBEDDING_BATCH_SIZE = 5  # Process embeddings in small batches
-    EMBEDDING_DELAY = 1.5  # Delay between embedding batches
-    MODEL_NAME = "gemma-3-27b-it"
-    EMBEDDING_MODEL = "models/embedding-001"
     TEMPERATURE = 0.5
-    MAX_OUTPUT_TOKENS = 20000
-    RETRIEVER_K = 15  # Reduced retrieval count
     INDEX_PATH = "faiss_maize_index"
     DATA_PATH = "data/maize_data.txt"
@@ -74,7 +86,7 @@ config = Config()
 # Request/Response Models
 class QueryRequest(BaseModel):
-    query: str = Field(..., min_length=1, max_length=100000)
 class QueryResponse(BaseModel):
     answer: str
@@ -88,12 +100,16 @@ class SystemStatus(BaseModel):
     is_initialized: bool
     model_name: str
     embedding_model: str
     vector_store_ready: bool
     total_chunks: int = 0
     api_key_configured: bool
 class InitializeRequest(BaseModel):
     api_key: str = Field(..., min_length=1)
 # Token counting utilities
 try:
@@ -104,7 +120,10 @@ except:
 def estimate_tokens(text: str) -> int:
     """Estimates token count for a given text."""
-    return len(tokenizer.encode(text))
 # Rate limiting helper functions
 async def rate_limited_embedding_creation(chunks, embeddings):
@@ -141,7 +160,12 @@ async def rate_limited_embedding_creation(chunks, embeddings):
                 retry_count += 1
                 delay = config.EMBEDDING_DELAY * (2 ** retry_count) + random.uniform(0, 1)
                 logger.warning(f"Batch {i//batch_size + 1} failed (attempt {retry_count}): {str(e)}")
-                logger.info(f"Waiting {delay:.2f} seconds before retry...")
                 await asyncio.sleep(delay)
                 if retry_count >= max_retries:
@@ -149,7 +173,7 @@ async def rate_limited_embedding_creation(chunks, embeddings):
         # Delay between batches to respect rate limits
         if i + batch_size < len(chunks):
-            delay = config.EMBEDDING_DELAY + random.uniform(0.5, 1.0)
             logger.info(f"Waiting {delay:.2f} seconds before next batch...")
             await asyncio.sleep(delay)
@@ -164,9 +188,9 @@ async def rate_limited_embedding_creation(chunks, embeddings):
     logger.info("Successfully created and merged all embeddings")
     return final_vector_store
-# Custom Callback Handler
 class TokenUsageCallbackHandler(BaseCallbackHandler):
-    """Callback handler to track token usage in LLM calls."""
     def __init__(self):
         super().__init__()
@@ -179,14 +203,15 @@ class TokenUsageCallbackHandler(BaseCallbackHandler):
         self.last_call_tokens = {}
     def on_llm_end(self, response, **kwargs):
-        """Collect token usage from the LLM response."""
         self.total_llm_calls += 1
         llm_output = response.llm_output
-        if llm_output and 'usage_metadata' in llm_output:
-            usage = llm_output['usage_metadata']
-            prompt_tokens = usage.get('prompt_token_count', 0)
-            completion_tokens = usage.get('candidates_token_count', 0)
             self.total_prompt_tokens += prompt_tokens
             self.total_completion_tokens += completion_tokens
@@ -198,6 +223,9 @@ class TokenUsageCallbackHandler(BaseCallbackHandler):
             }
             logger.info(f"Token usage - Prompt: {prompt_tokens}, Completion: {completion_tokens}")
     def get_last_call_usage(self):
         return self.last_call_tokens
@@ -211,19 +239,32 @@ class TokenUsageCallbackHandler(BaseCallbackHandler):
         }
 # RAG System Functions
-async def initialize_rag_system(api_key: str = None):
-    """Initialize or reinitialize the RAG system."""
     global vector_store, qa_chain, token_callback_handler, is_initialized, config
     try:
-        # Use provided API key or environment variable
         if api_key:
-            config.GOOGLE_API_KEY = api_key
-            os.environ["GOOGLE_API_KEY"] = api_key
-        elif not config.GOOGLE_API_KEY:
-            raise ValueError("Google API key not provided")
-        logger.info("Initializing RAG system...")
         # Initialize token callback handler
         token_callback_handler = TokenUsageCallbackHandler()
@@ -232,26 +273,39 @@ async def initialize_rag_system(api_key: str = None):
         if not os.path.exists(config.DATA_PATH):
             raise FileNotFoundError(f"Data file not found: {config.DATA_PATH}")
-        loader = TextLoader(config.DATA_PATH)
         documents = loader.load()
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=config.CHUNK_SIZE,
-            chunk_overlap=config.CHUNK_OVERLAP
         )
         chunks = text_splitter.split_documents(documents)
         logger.info(f"Document split into {len(chunks)} chunks")
         # Check if we have too many chunks that might cause rate limiting
-        if len(chunks) > 100:
-            logger.warning(f"Large number of chunks ({len(chunks)}). Consider increasing chunk_size or reducing document size to avoid rate limits.")
-        # Initialize embeddings with retry logic
-        embeddings = GoogleGenerativeAIEmbeddings(
             model=config.EMBEDDING_MODEL,
-            google_api_key=config.GOOGLE_API_KEY
         )
         # Create or load FAISS index with rate limiting
         if os.path.exists(config.INDEX_PATH):
             try:
@@ -272,22 +326,33 @@ async def initialize_rag_system(api_key: str = None):
             vector_store.save_local(config.INDEX_PATH)
             logger.info(f"Created new FAISS index at '{config.INDEX_PATH}'")
-        # Initialize LLM with retry and rate limiting
-        llm = ChatGoogleGenerativeAI(
-            model=config.MODEL_NAME,
-            google_api_key=config.GOOGLE_API_KEY,
             temperature=config.TEMPERATURE,
             max_tokens=config.MAX_OUTPUT_TOKENS,
-            callbacks=[token_callback_handler]
         )
         # Create prompt template
         prompt_template = PromptTemplate(
             input_variables=["context", "question"],
-            template="""
-You are an expert in maize agriculture. Use the following context ONLY to answer the question accurately and helpfully.
-If there have any query about getting personal information of a person then don't get it and reply full answer accordingly context.
-Answer should be concise clear and with easy language.
 Context:
 {context}
@@ -301,7 +366,10 @@ Answer:"""
         qa_chain = RetrievalQA.from_chain_type(
             llm=llm,
             chain_type="stuff",
-            retriever=vector_store.as_retriever(search_kwargs={"k": config.RETRIEVER_K}),
             chain_type_kwargs={"prompt": prompt_template},
             callbacks=[token_callback_handler],
             return_source_documents=True
@@ -320,7 +388,7 @@ Answer:"""
 @app.on_event("startup")
 async def startup_event():
     """Initialize the system on startup if API key is available."""
-    if config.GOOGLE_API_KEY:
         try:
             await initialize_rag_system()
         except Exception as e:
@@ -333,7 +401,20 @@ async def root():
         with open("static/index.html", "r") as f:
             return f.read()
     except FileNotFoundError:
-        return "<h1>Static files not found. Please ensure static/index.html exists.</h1>"
 @app.get("/api/status", response_model=SystemStatus)
 async def get_status():
@@ -341,21 +422,32 @@ async def get_status():
     return SystemStatus(
         status="ready" if is_initialized else "not_initialized",
         is_initialized=is_initialized,
-        model_name=config.MODEL_NAME,
         embedding_model=config.EMBEDDING_MODEL,
         vector_store_ready=vector_store is not None,
         total_chunks=len(vector_store.docstore._dict) if vector_store else 0,
-        api_key_configured=bool(config.GOOGLE_API_KEY)
     )
 @app.post("/api/initialize", response_model=Dict[str, Any])
 async def initialize_system(request: InitializeRequest):
-    """Initialize the RAG system with provided API key."""
     try:
-        await initialize_rag_system(request.api_key)
         return {
             "success": True,
-            "message": "System initialized successfully"
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
@@ -366,7 +458,7 @@ async def process_query(request: QueryRequest):
     if not is_initialized:
         raise HTTPException(
             status_code=503,
-            detail="System not initialized. Please provide API key."
         )
     try:
@@ -387,7 +479,12 @@ async def process_query(request: QueryRequest):
                 delay = config.RATE_LIMIT_DELAY * (2 ** attempt) + random.uniform(0, 1)
                 logger.warning(f"Query attempt {attempt + 1} failed: {str(e)}")
-                logger.info(f"Retrying in {delay:.2f} seconds...")
                 await asyncio.sleep(delay)
         processing_time = time.time() - start_time
@@ -428,8 +525,11 @@ async def upload_document(file: UploadFile = File(...)):
     """Upload a new document to replace the existing one."""
     try:
         # Validate file
-        if not file.filename.endswith('.txt'):
-            raise HTTPException(status_code=400, detail="Only .txt files are supported")
         # Save uploaded file
         content = await file.read()
@@ -439,7 +539,7 @@ async def upload_document(file: UploadFile = File(...)):
         logger.info(f"Uploaded new document: {file.filename}")
         # Reinitialize the system with new data
-        if config.GOOGLE_API_KEY:
             # Remove old index to force recreation
             if os.path.exists(config.INDEX_PATH):
                 import shutil
@@ -462,7 +562,21 @@ async def health_check():
     return {
         "status": "healthy",
         "timestamp": datetime.now().isoformat(),
-        "system_initialized": is_initialized
     }
 # Mount static files

 from langchain.chains import RetrievalQA
 from langchain.prompts import PromptTemplate
 from langchain.callbacks.base import BaseCallbackHandler
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 import tiktoken
 # Configure logging
 # Configuration
 class Config:
+    # OpenAI Compatible API Configuration
+    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
+    OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")  # Can be changed to compatible APIs
+    # Model Configuration
+    LLM_MODEL = os.getenv("LLM_MODEL", "gpt-3.5-turbo")  # Can be changed to any compatible model
+    EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")  # Can be changed to compatible embedding model
+    # Document Processing
+    CHUNK_SIZE = 500
+    CHUNK_OVERLAP = 50
+    # Rate Limiting
+    MAX_RETRIES = 5
+    RATE_LIMIT_DELAY = 2.0
+    EMBEDDING_BATCH_SIZE = 10  # OpenAI allows more requests
+    EMBEDDING_DELAY = 1.0  # Lower delay for OpenAI
+    # Model Parameters
     TEMPERATURE = 0.5
+    MAX_OUTPUT_TOKENS = 2000
+    RETRIEVER_K = 10
+    # Paths
     INDEX_PATH = "faiss_maize_index"
     DATA_PATH = "data/maize_data.txt"
 # Request/Response Models
 class QueryRequest(BaseModel):
+    query: str = Field(..., min_length=1, max_length=10000)
 class QueryResponse(BaseModel):
     answer: str
     is_initialized: bool
     model_name: str
     embedding_model: str
+    base_url: str
     vector_store_ready: bool
     total_chunks: int = 0
     api_key_configured: bool
 class InitializeRequest(BaseModel):
     api_key: str = Field(..., min_length=1)
+    base_url: Optional[str] = Field(default=None, description="OpenAI compatible API base URL")
+    llm_model: Optional[str] = Field(default=None, description="LLM model name")
+    embedding_model: Optional[str] = Field(default=None, description="Embedding model name")
 # Token counting utilities
 try:
 def estimate_tokens(text: str) -> int:
     """Estimates token count for a given text."""
+    try:
+        return len(tokenizer.encode(text))
+    except:
+        return len(text.split()) * 1.3  # Rough estimate
 # Rate limiting helper functions
 async def rate_limited_embedding_creation(chunks, embeddings):
                 retry_count += 1
                 delay = config.EMBEDDING_DELAY * (2 ** retry_count) + random.uniform(0, 1)
                 logger.warning(f"Batch {i//batch_size + 1} failed (attempt {retry_count}): {str(e)}")
+                if "rate limit" in str(e).lower() or "429" in str(e):
+                    logger.info(f"Rate limit detected. Waiting {delay:.2f} seconds before retry...")
+                else:
+                    logger.info(f"API error detected. Waiting {delay:.2f} seconds before retry...")
                 await asyncio.sleep(delay)
                 if retry_count >= max_retries:
         # Delay between batches to respect rate limits
         if i + batch_size < len(chunks):
+            delay = config.EMBEDDING_DELAY + random.uniform(0.2, 0.5)
             logger.info(f"Waiting {delay:.2f} seconds before next batch...")
             await asyncio.sleep(delay)
     logger.info("Successfully created and merged all embeddings")
     return final_vector_store
+# Custom Callback Handler for OpenAI
 class TokenUsageCallbackHandler(BaseCallbackHandler):
+    """Callback handler to track token usage in OpenAI calls."""
     def __init__(self):
         super().__init__()
         self.last_call_tokens = {}
     def on_llm_end(self, response, **kwargs):
+        """Collect token usage from the OpenAI response."""
         self.total_llm_calls += 1
         llm_output = response.llm_output
+        # OpenAI token usage structure
+        if llm_output and 'token_usage' in llm_output:
+            usage = llm_output['token_usage']
+            prompt_tokens = usage.get('prompt_tokens', 0)
+            completion_tokens = usage.get('completion_tokens', 0)
             self.total_prompt_tokens += prompt_tokens
             self.total_completion_tokens += completion_tokens
             }
             logger.info(f"Token usage - Prompt: {prompt_tokens}, Completion: {completion_tokens}")
+        else:
+            # Fallback token estimation if usage not available
+            logger.info("Token usage not available from API response")
     def get_last_call_usage(self):
         return self.last_call_tokens
         }
 # RAG System Functions
+async def initialize_rag_system(api_key: str = None, base_url: str = None, llm_model: str = None, embedding_model: str = None):
+    """Initialize or reinitialize the RAG system with OpenAI compatible API."""
     global vector_store, qa_chain, token_callback_handler, is_initialized, config
     try:
+        # Update configuration
         if api_key:
+            config.OPENAI_API_KEY = api_key
+            os.environ["OPENAI_API_KEY"] = api_key
+        elif not config.OPENAI_API_KEY:
+            raise ValueError("OpenAI API key not provided")
+        if base_url:
+            config.OPENAI_BASE_URL = base_url
+            os.environ["OPENAI_BASE_URL"] = base_url
+        if llm_model:
+            config.LLM_MODEL = llm_model
+        if embedding_model:
+            config.EMBEDDING_MODEL = embedding_model
+        logger.info(f"Initializing RAG system with:")
+        logger.info(f"  - Base URL: {config.OPENAI_BASE_URL}")
+        logger.info(f"  - LLM Model: {config.LLM_MODEL}")
+        logger.info(f"  - Embedding Model: {config.EMBEDDING_MODEL}")
         # Initialize token callback handler
         token_callback_handler = TokenUsageCallbackHandler()
         if not os.path.exists(config.DATA_PATH):
             raise FileNotFoundError(f"Data file not found: {config.DATA_PATH}")
+        loader = TextLoader(config.DATA_PATH, encoding='utf-8')
         documents = loader.load()
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=config.CHUNK_SIZE,
+            chunk_overlap=config.CHUNK_OVERLAP,
+            separators=["\n\n", "\n", " ", ""]
         )
         chunks = text_splitter.split_documents(documents)
         logger.info(f"Document split into {len(chunks)} chunks")
         # Check if we have too many chunks that might cause rate limiting
+        if len(chunks) > 200:
+            logger.warning(f"Large number of chunks ({len(chunks)}). Consider increasing chunk_size to reduce API calls.")
+        # Initialize OpenAI embeddings
+        embeddings = OpenAIEmbeddings(
             model=config.EMBEDDING_MODEL,
+            openai_api_key=config.OPENAI_API_KEY,
+            openai_api_base=config.OPENAI_BASE_URL,
+            chunk_size=1000  # Embedding batch size
         )
+        # Test embedding connection
+        try:
+            test_embedding = await asyncio.get_event_loop().run_in_executor(
+                None, embeddings.embed_query, "test connection"
+            )
+            logger.info("Successfully connected to embedding API")
+        except Exception as e:
+            logger.error(f"Failed to connect to embedding API: {str(e)}")
+            raise
         # Create or load FAISS index with rate limiting
         if os.path.exists(config.INDEX_PATH):
             try:
             vector_store.save_local(config.INDEX_PATH)
             logger.info(f"Created new FAISS index at '{config.INDEX_PATH}'")
+        # Initialize OpenAI LLM
+        llm = ChatOpenAI(
+            model_name=config.LLM_MODEL,
+            openai_api_key=config.OPENAI_API_KEY,
+            openai_api_base=config.OPENAI_BASE_URL,
             temperature=config.TEMPERATURE,
             max_tokens=config.MAX_OUTPUT_TOKENS,
+            callbacks=[token_callback_handler],
+            request_timeout=30
         )
+        # Test LLM connection
+        try:
+            test_response = llm.invoke("Test connection")
+            logger.info("Successfully connected to LLM API")
+        except Exception as e:
+            logger.error(f"Failed to connect to LLM API: {str(e)}")
+            raise
         # Create prompt template
         prompt_template = PromptTemplate(
             input_variables=["context", "question"],
+            template="""You are an expert in maize agriculture. Use the following context ONLY to answer the question accurately and helpfully.
+If the query asks for personal information of any person, do not provide it and instead explain that you cannot share personal information.
+Provide clear, concise answers in easy-to-understand language. If the context doesn't contain enough information to answer the question completely, say so.
 Context:
 {context}
         qa_chain = RetrievalQA.from_chain_type(
             llm=llm,
             chain_type="stuff",
+            retriever=vector_store.as_retriever(
+                search_type="similarity",
+                search_kwargs={"k": config.RETRIEVER_K}
+            ),
             chain_type_kwargs={"prompt": prompt_template},
             callbacks=[token_callback_handler],
             return_source_documents=True
 @app.on_event("startup")
 async def startup_event():
     """Initialize the system on startup if API key is available."""
+    if config.OPENAI_API_KEY:
         try:
             await initialize_rag_system()
         except Exception as e:
         with open("static/index.html", "r") as f:
             return f.read()
     except FileNotFoundError:
+        return """
+        <html>
+        <head><title>Maize RAG System</title></head>
+        <body>
+        <h1>Maize Crop RAG System</h1>
+        <p>API is running. Please use the API endpoints or add static/index.html for web interface.</p>
+        <h2>Available Endpoints:</h2>
+        <ul>
+            <li><a href="/docs">API Documentation</a></li>
+            <li><a href="/api/status">System Status</a></li>
+        </ul>
+        </body>
+        </html>
+        """
 @app.get("/api/status", response_model=SystemStatus)
 async def get_status():
     return SystemStatus(
         status="ready" if is_initialized else "not_initialized",
         is_initialized=is_initialized,
+        model_name=config.LLM_MODEL,
         embedding_model=config.EMBEDDING_MODEL,
+        base_url=config.OPENAI_BASE_URL,
         vector_store_ready=vector_store is not None,
         total_chunks=len(vector_store.docstore._dict) if vector_store else 0,
+        api_key_configured=bool(config.OPENAI_API_KEY)
     )
 @app.post("/api/initialize", response_model=Dict[str, Any])
 async def initialize_system(request: InitializeRequest):
+    """Initialize the RAG system with provided API key and configuration."""
     try:
+        await initialize_rag_system(
+            api_key=request.api_key,
+            base_url=request.base_url,
+            llm_model=request.llm_model,
+            embedding_model=request.embedding_model
+        )
         return {
             "success": True,
+            "message": "System initialized successfully",
+            "config": {
+                "base_url": config.OPENAI_BASE_URL,
+                "llm_model": config.LLM_MODEL,
+                "embedding_model": config.EMBEDDING_MODEL
+            }
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
     if not is_initialized:
         raise HTTPException(
             status_code=503,
+            detail="System not initialized. Please provide API key and configuration."
         )
     try:
                 delay = config.RATE_LIMIT_DELAY * (2 ** attempt) + random.uniform(0, 1)
                 logger.warning(f"Query attempt {attempt + 1} failed: {str(e)}")
+                if "rate limit" in str(e).lower() or "429" in str(e):
+                    logger.info(f"Rate limit detected. Retrying in {delay:.2f} seconds...")
+                else:
+                    logger.info(f"API error detected. Retrying in {delay:.2f} seconds...")
                 await asyncio.sleep(delay)
         processing_time = time.time() - start_time
     """Upload a new document to replace the existing one."""
     try:
         # Validate file
+        if not file.filename.endswith(('.txt', '.md')):
+            raise HTTPException(status_code=400, detail="Only .txt and .md files are supported")
+        # Ensure data directory exists
+        os.makedirs(os.path.dirname(config.DATA_PATH), exist_ok=True)
         # Save uploaded file
         content = await file.read()
         logger.info(f"Uploaded new document: {file.filename}")
         # Reinitialize the system with new data
+        if config.OPENAI_API_KEY:
             # Remove old index to force recreation
             if os.path.exists(config.INDEX_PATH):
                 import shutil
     return {
         "status": "healthy",
         "timestamp": datetime.now().isoformat(),
+        "system_initialized": is_initialized,
+        "api_configured": bool(config.OPENAI_API_KEY)
+    }
+# Configuration endpoint
+@app.get("/api/config")
+async def get_config():
+    """Get current configuration."""
+    return {
+        "base_url": config.OPENAI_BASE_URL,
+        "llm_model": config.LLM_MODEL,
+        "embedding_model": config.EMBEDDING_MODEL,
+        "chunk_size": config.CHUNK_SIZE,
+        "retriever_k": config.RETRIEVER_K,
+        "api_key_configured": bool(config.OPENAI_API_KEY)
     }
 # Mount static files