Spaces:

NitinBot001
/

CROP-RAG-API

Sleeping

NitinBot001 commited on Sep 14, 2025

Commit

fcc60a6

verified ·

1 Parent(s): 6faaecb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -64,19 +64,19 @@ class Config:
     EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")  # Can be changed to compatible embedding model
     # Document Processing
-    CHUNK_SIZE = 500
-    CHUNK_OVERLAP = 50
     # Rate Limiting
     MAX_RETRIES = 5
     RATE_LIMIT_DELAY = 2.0
-    EMBEDDING_BATCH_SIZE = 10  # OpenAI allows more requests
     EMBEDDING_DELAY = 1.0  # Lower delay for OpenAI
     # Model Parameters
     TEMPERATURE = 0.5
-    MAX_OUTPUT_TOKENS = 2000
-    RETRIEVER_K = 10
     # Paths
     INDEX_PATH = "faiss_maize_index"
@@ -86,7 +86,7 @@ config = Config()
 # Request/Response Models
 class QueryRequest(BaseModel):
-    query: str = Field(..., min_length=1, max_length=10000)
 class QueryResponse(BaseModel):
     answer: str
@@ -492,8 +492,8 @@ async def process_query(request: QueryRequest):
         # Extract sources
         sources = []
         if 'source_documents' in result:
-            sources = [doc.page_content[:200] + "..."
-                      for doc in result['source_documents'][:3]]
         # Get token usage
         token_usage = {}

     EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")  # Can be changed to compatible embedding model
     # Document Processing
+    CHUNK_SIZE = 8000
+    CHUNK_OVERLAP = 100
     # Rate Limiting
     MAX_RETRIES = 5
     RATE_LIMIT_DELAY = 2.0
+    EMBEDDING_BATCH_SIZE = 20  # OpenAI allows more requests
     EMBEDDING_DELAY = 1.0  # Lower delay for OpenAI
     # Model Parameters
     TEMPERATURE = 0.5
+    MAX_OUTPUT_TOKENS = 200000
+    RETRIEVER_K = 20
     # Paths
     INDEX_PATH = "faiss_maize_index"
 # Request/Response Models
 class QueryRequest(BaseModel):
+    query: str = Field(..., min_length=1, max_length=100000)
 class QueryResponse(BaseModel):
     answer: str
         # Extract sources
         sources = []
         if 'source_documents' in result:
+            sources = [doc.page_content[:2000] + "..."
+                      for doc in result['source_documents']]
         # Get token usage
         token_usage = {}